简单进行实现,主要是数据处理,训练过程不复杂. 这里基于:
COCO 数据集格式如 - Dataset - COCO Dataset 数据特点.
DeepFashion 数据集格式如 - Dataset - DeepFashion 服装数据集.
主要利用 DeepFashion 服装图片的 bbox 标注数据,这里是三类的 bbox:上身服装、下身服装、全身服装,其 ID 分别为 1,2,3.
将数据转换为 COCO 格式,实现如下:
#!/usr/bin/env python
# --*-- coding: utf-8 --*--
import numpy as np
import json
import matplotlib.pyplot as plt
import cv2
from random import shuffle
import os
categories_dict ={1: 'upper', 2: 'upper', 3: 'upper', 4: 'upper', 5: 'upper',
6: 'upper', 7: 'upper', 8: 'upper', 9: 'upper', 10: 'upper',
11: 'upper', 12: 'upper', 13: 'upper', 14: 'upper', 15: 'upper',
16: 'upper', 17: 'upper', 18: 'upper', 19: 'upper', 20: 'upper',
21: 'lower', 22: 'lower', 23: 'lower', 24: 'lower', 25: 'lower',
26: 'lower', 27: 'lower', 28: 'lower', 29: 'lower', 30: 'lower',
31: 'lower', 32: 'lower', 33: 'lower', 34: 'lower', 35: 'lower',
36: 'lower',
37: 'full', 38: 'full', 39: 'full', 40: 'full', 41: 'full',
42: 'full', 43: 'full', 44: 'full', 45: 'full', 46: 'full',
47: 'full', 48: 'full', 49: 'full', 50: 'full' }
categories3_dict = {'upper': 1, 'lower': 2, 'full': 3}
def to_coco(shuffle_idx, images, bboxs, imagesdir):
coco_dict = {}
coco_dict[u'images'] = []
coco_dict[u'annotations'] = []
count = 0
for idx in shuffle_idx:
print '---', count, '---', len(shuffle_idx)
count += 1
assert bboxs[idx].split(' ')[0] == images[idx].split(' ')[0]
image_name = images[idx].split(' ')[0]
category_id = eval(images[idx].split(' ')[-1])
image_file = imagesdir + 'Img/' + image_name
img = cv2.imread(image_file, 0)
height, width = img.shape
x1, y1, x2, y2 = map(int, bboxs[idx].split(' ')[-4:])
# img = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
# plt.imshow(img)
# plt.show()
###
image_dict = {u'date_captured': u'2018-02-06 14:02:52',
u'file_name': unicode(image_name),
u'height': height,
u'id': idx,
u'license': 1,
u'url': unicode(image_name),
u'width': width}
coco_dict[u'images'].append(image_dict)
###
bbox = [x1, y1, x2 - x1, y2 - y1] # [x,y,width,height]
annotation_dict = {u'segmentation': [[312.29, 562.89]], # 未做语义分割, 这个值是随便定的
u'area': (x2 - x1) * (y2 - y1),
u'iscrowd': 0, #
u'image_id': idx,
u'bbox': bbox,
u'category_id': categories3_dict[categories_dict[category_id]],
u'id': idx}
coco_dict[u'annotations'].append(annotation_dict)
###
coco_info_dict = {u'contributor': u'DeepFashion',
u'date_created': u'2018-03-08 18:03:52.357475',
u'description': u'This is COCO Dataset version of DeepFashoion.',
u'url': u'http://blog.csdn.net/zziahgf',
u'version': u'1.0',
u'year': 2018}
coco_type = u'instances'
coco_licenses_dict = [{u'id': 1,
u'name': u'Attribution-NonCommercial-ShareAlike License',
u'url': u'http://creativecommons.org/licenses/by-nc-sa/2.0/'}]
coco_categories = [{u'id': 1, u'name': u'upper', u'supercategory': u'upper'},
{u'id': 2, u'name': u'lower', u'supercategory': u'lower'},
{u'id': 3, u'name': u'full', u'supercategory': u'full'}]
coco_dict[u'info'] = coco_info_dict
coco_dict[u'licenses'] = coco_licenses_dict
coco_dict[u'categories'] = coco_categories
return coco_dict
if __name__ == '__main__':
print 'Convert DeepFashion Data to COCO Format...'
deepfashion_root = '/data/DeepFashion/CategoryandAttributePredictionBenchmark/'
category_annofile = deepfashion_root + 'Anno/list_category_cloth.txt'
category_imgfile = deepfashion_root + 'Anno/list_category_img.txt'
category_bboxfile = deepfashion_root + 'Anno/list_bbox.txt'
categories = open(category_annofile).readlines()[2:]
images = open(category_imgfile).readlines()[2:]
bboxs = open(category_bboxfile).readlines()[2:]
assert len(images) == len(bboxs)
print 'Num of Deepfashion Category Images: ', len(images)
shuffle_idx = range(len(images)) # 简单划分数据为 train 和 val
shuffle(shuffle_idx)
coco_dict_train = to_coco(shuffle_idx[:200000], images, bboxs, deepfashion_root)
coco_dict_val = to_coco(shuffle_idx[20000:], images, bboxs, deepfashion_root)
f = open('coco_deepfashion3_train.json', 'w')
f.write(json.dumps(coco_dict_train))
f.close()
f = open('coco_deepfashion3_val.json', 'w')
f.write(json.dumps(coco_dict_val))
f.close()
print('Done.')
目标检测任务,需要修改的部分有:
##
DEEPFASHION_DIR = '/data/DeepFashion/CategoryandAttributePredictionBenchmark'
# Available datasets
DATASETS = {
'coco_deepfashion3_train': {
IM_DIR:
DEEPFASHION_DIR + '/Img',
ANN_FN:
DEEPFASHION_DIR + '/Anno/coco_deepfashion3_train.json',
},
'coco_deepfashion3_val': {
IM_DIR:
DEEPFASHION_DIR + '/Img',
ANN_FN:
DEEPFASHION_DIR + '/Anno/coco_deepfashion3_val.json',
},
MODEL:
TYPE: generalized_rcnn
CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
NUM_CLASSES: 4 # 三个类别 + 一个background 类
FASTER_RCNN: True # Faster R-CNN 模式
NUM_GPUS: 1 # 一张 GPU 显卡
SOLVER: # 优化参数
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
BASE_LR: 0.0025
GAMMA: 0.1
MAX_ITER: 60000
STEPS: [0, 30000, 40000]
# Equivalent schedules with...
# 1 GPU:
# BASE_LR: 0.0025
# MAX_ITER: 60000
# STEPS: [0, 30000, 40000]
# 2 GPUs:
# BASE_LR: 0.005
# MAX_ITER: 30000
# STEPS: [0, 15000, 20000]
# 4 GPUs:
# BASE_LR: 0.01
# MAX_ITER: 15000
# STEPS: [0, 7500, 10000]
# 8 GPUs:
# BASE_LR: 0.02
# MAX_ITER: 7500
# STEPS: [0, 3750, 5000]
FPN: # 采用 FPN
FPN_ON: True
MULTILEVEL_ROIS: True
MULTILEVEL_RPN: True
FAST_RCNN:
ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
ROI_XFORM_METHOD: RoIAlign # RoI 对齐方法
ROI_XFORM_RESOLUTION: 7 # 7×7
ROI_XFORM_SAMPLING_RATIO: 2
TRAIN: # 训练
WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl # Finetune 模型,ResNet50
DATASETS: ('coco_deepfashion3_train',) # 需要设定的训练数据集,前面生成的COCO 格式数据
SCALES: (500,)
MAX_SIZE: 833
BATCH_SIZE_PER_IM: 256
RPN_PRE_NMS_TOP_N: 2000 # Per FPN level
TEST:
DATASETS: ('coco_deepfashion3_val',) # val 数据集
SCALES: (500,)
MAX_SIZE: 833
NMS: 0.5
RPN_PRE_NMS_TOP_N: 1000 # Per FPN level
RPN_POST_NMS_TOP_N: 1000
OUTPUT_DIR: .
python tools/train_net.py --cfg ./configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml OUTPUT_DIR ./detectron-output
log 信息:
......
json_stats: {"accuracy_cls": 0.000000, "eta": "3 days, 22:43:41", "iter": 0, "loss": 3.729450, "loss_bbox": 0.008297, "loss_cls": 2.932485, "loss_rpn_bbox_fpn2": 0.000000, "loss_rpn_bbox_fpn3": 0.000000, "loss_rpn_bbox_fpn4": 0.000000, "loss_rpn_bbox_fpn5": 0.106726, "loss_rpn_bbox_fpn6": 0.000000, "loss_rpn_cls_fpn2": 0.477785, "loss_rpn_cls_fpn3": 0.126733, "loss_rpn_cls_fpn4": 0.011390, "loss_rpn_cls_fpn5": 0.066034, "loss_rpn_cls_fpn6": 0.000000, "lr": 0.000833, "mb_qsize": 64, "mem": 2703, "time": 5.683689}
json_stats: {"accuracy_cls": 0.968750, "eta": "8:04:13", "iter": 20, "loss": 0.718310, "loss_bbox": 0.074937, "loss_cls": 0.219320, "loss_rpn_bbox_fpn2": 0.000000, "loss_rpn_bbox_fpn3": 0.000000, "loss_rpn_bbox_fpn4": 0.000000, "loss_rpn_bbox_fpn5": 0.028209, "loss_rpn_bbox_fpn6": 0.000000, "loss_rpn_cls_fpn2": 0.174210, "loss_rpn_cls_fpn3": 0.083049, "loss_rpn_cls_fpn4": 0.019438, "loss_rpn_cls_fpn5": 0.031202, "loss_rpn_cls_fpn6": 0.000000, "lr": 0.000900, "mb_qsize": 64, "mem": 3259, "time": 0.484392}
......
infer_simple.py 修改处:
dummy_coco_dataset = dummy_datasets.get_deepfashion3_dataset()
其中,需要修改 lib/datasets/dummy_datasets.py:
def get_deepfashion3_dataset():
"""A dummy COCO dataset that includes only the 'classes' field."""
ds = AttrDict()
classes = ['__background__', 'Upper', 'Lower', 'Full', ]
ds.classes = {i: name for i, name in enumerate(classes)}
return ds
然后进行测试:
python infer_simple.py --cfg
./configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml
--output-dir
./detectron-visualizations
--image-ext
jpg
--wts
./detectron-output/train/coco_deepfashion3_train/generalized_rcnn/model_final.pkl
./test_images_dir