diff --git i/examples/FasterRCNN/basemodel.py w/examples/FasterRCNN/basemodel.py
index 0c9eb72..3706d0e 100644
--- i/examples/FasterRCNN/basemodel.py
+++ w/examples/FasterRCNN/basemodel.py
@@ -44,7 +44,7 @@ def image_preprocess(image, bgr=True):
             std = std[::-1]
         image_mean = tf.constant(mean, dtype=tf.float32)
         image_std = tf.constant(std, dtype=tf.float32)
-        image = (image - image_mean) / image_std
+        image = (image - image_mean)# / image_std
         return image
 
 
@@ -72,12 +72,18 @@ def resnet_shortcut(l, n_out, stride, activation=tf.identity):
 
 def resnet_bottleneck(l, ch_out, stride):
     l, shortcut = l, l
-    l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU)
     if stride == 2:
-        l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
-        l = Conv2D('conv2', l, ch_out, 3, strides=2, activation=BNReLU, padding='VALID')
+        l = l[:, :, :-1, :-1]
+        l = Conv2D('conv1', l, ch_out, 1, strides=2, activation=BNReLU)
     else:
-        l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU)
+        l = Conv2D('conv1', l, ch_out, 1, strides=stride, activation=BNReLU)
+    l = Conv2D('conv2', l, ch_out, 3, strides=1, activation=BNReLU)
+    #l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU)
+    #if stride == 2:
+    #    l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
+    #    l = Conv2D('conv2', l, ch_out, 3, strides=2, activation=BNReLU, padding='VALID')
+    #else:
+    #    l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU)
     l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True))
     return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
 
@@ -96,7 +102,7 @@ def resnet_group(l, name, block_func, features, count, stride):
 def pretrained_resnet_conv4(image, num_blocks, freeze_c2=True):
     assert len(num_blocks) == 3
     with resnet_argscope():
-        l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
+        l = tf.pad(image, [[0, 0], [0, 0], [3, 2], [3, 2]])
         l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
         l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
         l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
diff --git i/examples/FasterRCNN/config.py w/examples/FasterRCNN/config.py
index 2e908a5..a3292e3 100644
--- i/examples/FasterRCNN/config.py
+++ w/examples/FasterRCNN/config.py
@@ -17,15 +17,15 @@ CLASS_NAMES = []  # NUM_CLASS strings. Will be populated later by coco loader
 # basemodel ----------------------
 RESNET_NUM_BLOCK = [3, 4, 6, 3]     # for resnet50
 # RESNET_NUM_BLOCK = [3, 4, 23, 3]     # for resnet101
-FREEZE_AFFINE = False   # do not train affine parameters inside BN
+FREEZE_AFFINE = True  # do not train affine parameters inside BN
 
 # schedule -----------------------
 BASE_LR = 1e-2
 WARMUP = 1000    # in steps
 STEPS_PER_EPOCH = 500
-LR_SCHEDULE = [150000, 230000, 280000]
-# LR_SCHEDULE = [120000, 160000, 180000]    # "1x" schedule in detectron
-# LR_SCHEDULE = [240000, 320000, 360000]    # "2x" schedule in detectron
+#LR_SCHEDULE = [150000, 230000, 280000]
+#LR_SCHEDULE = [120000, 160000, 180000]    # "1x" schedule in detectron
+LR_SCHEDULE = [240000, 320000, 360000]    # "2x" schedule in detectron
 
 # image resolution --------------------
 SHORT_EDGE_SIZE = 800
@@ -55,7 +55,7 @@ TRAIN_POST_NMS_TOPK = 2000
 CROWD_OVERLAP_THRES = 0.7
 
 # fastrcnn training ---------------------
-FASTRCNN_BATCH_PER_IM = 256
+FASTRCNN_BATCH_PER_IM = 512
 FASTRCNN_BBOX_REG_WEIGHTS = np.array([10, 10, 5, 5], dtype='float32')
 FASTRCNN_FG_THRESH = 0.5
 # fg ratio in a ROI batch