参考:http://chr10003566.github.io/2019/12/03/mmdetection(2)/
part1 测试mmdetection(通过读取一张图片,显示效果)
demo.py
from mmdet.apis
import init_detector, inference_detector, show_result_pyplot
import mmcv
config_file
= '/home/ming/work/mmdetection/configs/faster_rcnn_r50_fpn_1x.py'
checkpoint_file
= '/home/ming/work/mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth'
model
= init_detector
(config_file, checkpoint_file, device
='cuda:0')
img
= 'demo.jpg'
result
= inference_detector
(model, img
)
show_result_pyplot
(img, result, model.CLASSES
)
init_detector 三个输入参数:
config_file:检测模型的配置文件,一般位于 mmdetection/config/ 中 checkpoint_file:即训练好的权重 在mmdetection的model zoo中可以下载 device:分配到的设备对象 返回:model(检测模型)
inference_detector 两个输入参数:
model: init_detector返回的model imgs: 图像的路径 返回:图像测试的结果
show_result_pyplot 三个输入参数: img :图像的路径(字符串) result:inference_detector的返回值 model.CLASSES:训练图像的类别名称
part2 训练与配置文件
model
= dict
(
type
='FasterRCNN',
pretrained
='torchvision://resnet50',
backbone
=dict
(
type
='ResNet',
depth
=50,
num_stages
=4,
out_indices
=(0, 1, 2, 3
),
frozen_stages
=1,
norm_cfg
=dict
(type
='BN', requires_grad
=True
),
style
='pytorch'),
neck
=dict
(
type
='FPN',
in_channels
=[256, 512, 1024, 2048
],
out_channels
=256,
num_outs
=5
),
rpn_head
=dict
(
type
='RPNHead',
in_channels
=256,
feat_channels
=256,
anchor_scales
=[8
],
anchor_ratios
=[0.5, 1.0, 2.0
],
anchor_strides
=[4, 8, 16, 32, 64
],
target_means
=[.0, .0, .0, .0
],
target_stds
=[1.0, 1.0, 1.0, 1.0
],
loss_cls
=dict
(
type
='CrossEntropyLoss', use_sigmoid
=True, loss_weight
=1.0
),
loss_bbox
=dict
(type
='SmoothL1Loss', beta
=1.0 / 9.0, loss_weight
=1.0
)),
bbox_roi_extractor
=dict
(
type
='SingleRoIExtractor',
roi_layer
=dict
(type
='RoIAlign', out_size
=7, sample_num
=2
),
out_channels
=256,
featmap_strides
=[4, 8, 16, 32
]),
bbox_head
=dict
(
type
='SharedFCBBoxHead',
num_fcs
=2,
in_channels
=256,
fc_out_channels
=1024,
roi_feat_size
=7,
num_classes
=81,
target_means
=[0., 0., 0., 0.
],
target_stds
=[0.1, 0.1, 0.2, 0.2
],
reg_class_agnostic
=False,
loss_cls
=dict
(
type
='CrossEntropyLoss', use_sigmoid
=False, loss_weight
=1.0
),
loss_bbox
=dict
(type
='SmoothL1Loss', beta
=1.0, loss_weight
=1.0
)))
train_cfg
= dict
(
rpn
=dict
(
assigner
=dict
(
type
='MaxIoUAssigner',
pos_iou_thr
=0.7,
neg_iou_thr
=0.3,
min_pos_iou
=0.3,
ignore_iof_thr
=-1
),
sampler
=dict
(
type
='RandomSampler',
num
=256,
pos_fraction
=0.5,
neg_pos_ub
=-1,
add_gt_as_proposals
=False
),
allowed_border
=0,
pos_weight
=-1,
debug
=False
),
rpn_proposal
=dict
(
nms_across_levels
=False,
nms_pre
=2000,
nms_post
=2000,
max_num
=2000,
nms_thr
=0.7,
min_bbox_size
=0
),
rcnn
=dict
(
assigner
=dict
(
type
='MaxIoUAssigner',
pos_iou_thr
=0.5,
neg_iou_thr
=0.5,
min_pos_iou
=0.5,
ignore_iof_thr
=-1
),
sampler
=dict
(
type
='RandomSampler',
num
=512,
pos_fraction
=0.25,
neg_pos_ub
=-1,
add_gt_as_proposals
=True
),
pos_weight
=-1,
debug
=False
))
test_cfg
= dict
(
rpn
=dict
(
nms_across_levels
=False,
nms_pre
=1000,
nms_post
=1000,
max_num
=1000,
nms_thr
=0.7,
min_bbox_size
=0
),
rcnn
=dict
(
score_thr
=0.05, nms
=dict
(type
='nms', iou_thr
=0.5
), max_per_img
=100
)
)
dataset_type
= 'CocoDataset'
data_root
= 'data/coco/'
img_norm_cfg
= dict
(
mean
=[123.675, 116.28, 103.53
], std
=[58.395, 57.12, 57.375
], to_rgb
=True
)
train_pipeline
= [
dict
(type
='LoadImageFromFile'),
dict
(type
='LoadAnnotations', with_bbox
=True
),
dict
(type
='Resize', img_scale
=(1333, 800
), keep_ratio
=True
),
dict
(type
='RandomFlip', flip_ratio
=0.5
),
dict
(type
='Normalize', **img_norm_cfg
),
dict
(type
='Pad', size_divisor
=32
),
dict
(type
='DefaultFormatBundle'),
dict
(type
='Collect', keys
=['img',
'gt_bboxes',
'gt_labels']),
]
test_pipeline
= [
dict
(type
='LoadImageFromFile'),
dict
(
type
='MultiScaleFlipAug',
img_scale
=(1333, 800
),
flip
=False,
transforms
=[
dict
(type
='Resize', keep_ratio
=True
),
dict
(type
='RandomFlip'),
dict
(type
='Normalize', **img_norm_cfg
),
dict
(type
='Pad', size_divisor
=32
),
dict
(type
='ImageToTensor', keys
=['img']),
dict
(type
='Collect', keys
=['img']),
])
]
data
= dict
(
imgs_per_gpu
=2,
workers_per_gpu
=2,
train
=dict
(
type
=dataset_type,
ann_file
=data_root +
'annotations/instances_train2017.json',
img_prefix
=data_root +
'train2017/',
pipeline
=train_pipeline
),
val
=dict
(
type
=dataset_type,
ann_file
=data_root +
'annotations/instances_val2017.json',
img_prefix
=data_root +
'val2017/',
pipeline
=test_pipeline
),
test
=dict
(
type
=dataset_type,
ann_file
=data_root +
'annotations/instances_val2017.json',
img_prefix
=data_root +
'val2017/',
pipeline
=test_pipeline
))
evaluation
= dict
(interval
=1, metric
='bbox')
optimizer
= dict
(type
='SGD', lr
=0.02, momentum
=0.9, weight_decay
=0.0001
)
optimizer_config
= dict
(grad_clip
=dict
(max_norm
=35, norm_type
=2
))
lr_config
= dict
(
policy
='step',
warmup
='linear',
warmup_iters
=500,
warmup_ratio
=1.0 / 3,
step
=[8, 11
])
checkpoint_config
= dict
(interval
=1
)
log_config
= dict
(
interval
=50,
hooks
=[
dict
(type
='TextLoggerHook'),
])
total_epochs
= 12
dist_params
= dict
(backend
='nccl')
log_level
= 'INFO'
work_dir
= './work_dirs/faster_rcnn_r50_fpn_1x'
load_from
= None
resume_from
= None
workflow
= [('train', 1
)]