前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >YOLOv3_02_yolov3.py

YOLOv3_02_yolov3.py

原创
作者头像
代号404
修改2019-09-09 11:26:51
5010
修改2019-09-09 11:26:51
举报
文章被收录于专栏:Deep Learning 笔记

# File name : dataset.py

# Author : YunYang1994

学习了yolov3 在 tensorflow2.0 的实现,加深理解

代码语言:javascript
复制
import numpy as np
import tensorflow as tf
import core.utils as utils  #工具代码
import core.common as common #网络结构
import core.backbone as backbone #网络结构
from core.config import cfg   #配置文件
代码语言:javascript
复制
def YOLOv3(input_layer):
    #input_layer.shape=[416,416,3] 标准图片
    route_1, route_2, conv = backbone.darknet53(input_layer)
    #route_1,route_2和conv的shape=[52 ,52 ,256],[26 ,26 ,512],[13 ,13 ,1024]
    
    conv = common.convolutional(conv, (1, 1, 1024,  512))
    conv = common.convolutional(conv, (3, 3,  512, 1024))
    conv = common.convolutional(conv, (1, 1, 1024,  512))
    conv = common.convolutional(conv, (3, 3,  512, 1024))
    conv = common.convolutional(conv, (1, 1, 1024,  512))

    conv_lobj_branch = common.convolutional(conv, (3, 3, 512, 1024))    
    # conv_lobj_branch.shape =[13 ,13 ,1024]
    
    conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
    #conv_lbbox.shape=[13,13,45]

    conv = common.convolutional(conv, (1, 1,  512,  256))
    conv = common.upsample(conv) #[26,26,256]

    conv = tf.concat([conv, route_2], axis=-1)
    #在最后一个维度上,合并conv,route_2,conv.shape=[26,26,768]

    conv = common.convolutional(conv, (1, 1, 768, 256))
    conv = common.convolutional(conv, (3, 3, 256, 512))
    conv = common.convolutional(conv, (1, 1, 512, 256))
    conv = common.convolutional(conv, (3, 3, 256, 512))
    conv = common.convolutional(conv, (1, 1, 512, 256))

    conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512))
    conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)

    conv = common.convolutional(conv, (1, 1, 256, 128))
    conv = common.upsample(conv)

    conv = tf.concat([conv, route_1], axis=-1)

    conv = common.convolutional(conv, (1, 1, 384, 128))
    conv = common.convolutional(conv, (3, 3, 128, 256))
    conv = common.convolutional(conv, (1, 1, 256, 128))
    conv = common.convolutional(conv, (3, 3, 128, 256))
    conv = common.convolutional(conv, (1, 1, 256, 128))

    conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256))
    conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)

    return [conv_sbbox, conv_mbbox, conv_lbbox]

输入经过缩放的标准化图片,输出feature_map

conv_sbbox, conv_mbbox, conv_lbbox=[13,13,45],[26,26,45],[52,52,45] ,其中NUM_CLASS=10

代码语言:javascript
复制
def decode(conv_output, i=0):#conv_output=([none,52,52,45],[none,26,26,45],[none,13,13,45]) i=0 1 2
    """
    return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
            contains (x, y, w, h, score, probability)
            
    以下注释默认 i=2 , conv_output=[none,13,13,45]
    """
       
    conv_shape       = tf.shape(conv_output)
    batch_size       = conv_shape[0]
    output_size      = conv_shape[1]

    conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
    #[none,13,13,3,15]13为特征图尺寸,3为每个单元格anchor的数量,5为单元格的x,y,w,h和置信度,10为数字的种类

    conv_raw_dxdy = conv_output[:, :, :, :, 0:2]#位置
    conv_raw_dwdh = conv_output[:, :, :, :, 2:4]#长宽
    conv_raw_conf = conv_output[:, :, :, :, 4:5]#置信度
    conv_raw_prob = conv_output[:, :, :, :, 5: ]#种类

    y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
    x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])
    #tf.newaxis 增加维度, tf.tile扩充维度        

    xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
 
    xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
    
    xy_grid = tf.cast(xy_grid, tf.float32)
    #特征图的x,y,w,h是以单元格为基准,xy_grid画出相应维度的网格,将feature_map中的x,y,w,h换算成实际图片上的尺寸
    
    pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i] #加上网格左上角的坐标值,乘以步长
    pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i] 
    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)

    pred_conf = tf.sigmoid(conv_raw_conf)
    pred_prob = tf.sigmoid(conv_raw_prob)
    
    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)

最终输出转换后的feature_map,画网格的部分可以单独拉出来做下简单的修改,多跑几遍加深理解。

代码语言:javascript
复制
def bbox_iou(boxes1, boxes2):
       
    #求交并比 boxes=[x,y,w,h,label]  
    #x,y为box中心坐标,w,h为尺寸,label为数字类别(0,1,2,3,4,5,6,7,8,9)中的一个

    boxes1_area = boxes1[..., 2] * boxes1[..., 3] #面积
    boxes2_area = boxes2[..., 2] * boxes2[..., 3]

    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)#求左上角坐标和右下角坐标


    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)#交集的长宽
    inter_area = inter_section[..., 0] * inter_section[..., 1]#交集的面积
    union_area = boxes1_area + boxes2_area - inter_area#并集的面积

    return 1.0 * inter_area / union_area #交并比
代码语言:javascript
复制
def bbox_giou(boxes1, boxes2):
    # boxes=[x,y,w,h,label]
                    
    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)

    boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
                        tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
    boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
                        tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)

    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1] #交集面积
    union_area = boxes1_area + boxes2_area - inter_area #并集面积
    iou = inter_area / union_area #交并比

    enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
    enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
    enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
    
    enclose_area = enclose[..., 0] * enclose[..., 1]
    #包含box1和box2的最小的方框的面积
    
    giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
    #用giou替代iou,会避免两个box无交集,梯度为零无法优化的情况。giou能让模型朝着正确的方向优化

    return giou

用giou替代iou,最终结果也会有少许提升。

代码语言:javascript
复制
def compute_loss(pred, conv, label, bboxes, i=0):
    #损失函数,以下注释默认i=2,大网格
    #pred为转换过的feature_map[4,13,13,45]
    #conv为feature_map[4,13,13,45]
    #label为batch_larger_target = batch_label_lbbox, batch_lbboxes#([4,13,13,3,45],[4,150,4])
    
    
    conv_shape  = tf.shape(conv)
    batch_size  = conv_shape[0]
    output_size = conv_shape[1]
    input_size  = STRIDES[i] * output_size #416
    conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
    #[4,13,13,3,45]
    conv_raw_conf = conv[:, :, :, :, 4:5]#置信度
    conv_raw_prob = conv[:, :, :, :, 5:] #类别

    pred_xywh     = pred[:, :, :, :, 0:4]#x y w h 
    pred_conf     = pred[:, :, :, :, 4:5]#置信度

    label_xywh    = label[:, :, :, :, 0:4]#x y w h 
    respond_bbox  = label[:, :, :, :, 4:5]#置信度
    label_prob    = label[:, :, :, :, 5:] #类别

    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)#计算giou的值,维度增加一维
    input_size = tf.cast(input_size, tf.float32)

    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
    giou_loss = respond_bbox * bbox_loss_scale * (1- giou)

    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
    #tf.reduce_max计算张量的各个维度上的元素的最大值

    respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 )

    conf_focal = tf.pow(respond_bbox - pred_conf, 2)
    #tf.pow函数能够计算一个值到另一个值的幂

    conf_loss = conf_focal * (
            respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
            +
            respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
    )

    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))

    return giou_loss, conf_loss, prob_loss

YOLOv3采用端到端的训练方式,损失函数的代码相对比较直观,分别为位置损失、置信度损失和类别损失。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
相关产品与服务
图像识别
腾讯云图像识别基于深度学习等人工智能技术,提供车辆,物体及场景等检测和识别服务, 已上线产品子功能包含车辆识别,商品识别,宠物识别,文件封识别等,更多功能接口敬请期待。
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档