# 教程 | 从零开始PyTorch项目：YOLO v3目标检测实现（下）

1. 本教程 1-3 部分

2. 了解 PyTorch 基本工作方式，包括使用 nn.Module、nn.Sequential 和 torch.nn.parameter 类创建自定义架构的方式

3. NumPy 基本知识

4. OpenCV 基本知识

`def write_results(prediction, confidence, num_classes, nms_conf = 0.4):`

```    conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)

```      box_corner = prediction.new(prediction.shape)
box_corner[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
box_corner[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
box_corner[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
box_corner[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
prediction[:,:,:4] = box_corner[:,:,:4]```

```          max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
max_conf = max_conf.float().unsqueeze(1)
max_conf_score = max_conf_score.float().unsqueeze(1)
seq = (image_pred[:,:5], max_conf, max_conf_score)
image_pred = torch.cat(seq, 1)```

```        non_zero_ind =  (torch.nonzero(image_pred[:,4]))
try:
image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
except:
continue

#For PyTorch 0.4 compatibility
#Since the above code with not raise exception for no detection
#as scalars are supported in PyTorch 0.4
if image_pred_.shape[0] == 0:
continue ```

```            #Get the various classes detected in the image
img_classes = unique(image_pred_[:,-1]) # -1 index holds the class index```

```  def unique(tensor):
tensor_np = tensor.cpu().numpy()
unique_np = np.unique(tensor_np)
unique_tensor = torch.from_numpy(unique_np)

tensor_res = tensor.new(unique_tensor.shape)
tensor_res.copy_(unique_tensor)
return tensor_res```

```  for i in range(idx):
#Get the IOUs of all boxes that come after the one we are looking at
#in the loop
try:
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
except ValueError:
break

except IndexError:
break

#Zero out all the detections that have IoU > treshhold

#Remove the non-zero entries
non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
image_pred_class = image_pred_class[non_zero_ind].view(-1,7)```

` ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])`

```#Zero out all the detections that have IoU > treshhold

#Remove the non-zero entries
non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
image_pred_class = image_pred_class[non_zero_ind]         ```

```def bbox_iou(box1, box2):
"""
Returns the IoU of two bounding boxes

"""
#Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]

#get the corrdinates of the intersection rectangle
inter_rect_x1 =  torch.max(b1_x1, b2_x1)
inter_rect_y1 =  torch.max(b1_y1, b2_y1)
inter_rect_x2 =  torch.min(b1_x2, b2_x2)
inter_rect_y2 =  torch.min(b1_y2, b2_y2)

#Intersection area
inter_area = (inter_rect_x2 - inter_rect_x1 + 1)*(inter_rect_y2 - inter_rect_y1 + 1)

#Union Area
b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)

iou = inter_area / (b1_area + b2_area - inter_area)

return iou```

write_results 函数输出一个形状为 Dx8 的张量；其中 D 是所有图像中的「真实」检测结果，每个都用一行表示。每一个检测结果都有 8 个属性，即：该检测结果所属的 batch 中图像的索引、4 个角的坐标、objectness 分数、有最大置信度的类别的分数、该类别的索引。

```            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
#Repeat the batch_id for as many detections of the class cls in the image
seq = batch_ind, image_pred_class

if not write:
output = torch.cat(seq,1)
write = True
else:
out = torch.cat(seq,1)
output = torch.cat((output,out))```

```    try:
return output
except:
return 0```

```   from __future__ import division
import time
import torch
import torch.nn as nn
import numpy as np
import cv2
from util import *
import argparse
import os
import os.path as osp
from darknet import Darknet
import pickle as pkl
import pandas as pd
import random```

```  def arg_parse():
"""
Parse arguements to the detect module

"""

parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')

parser.add_argument("--images", dest = 'images', help =
"Image / Directory containing images to perform detection upon",
default = "imgs", type = str)
parser.add_argument("--det", dest = 'det', help =
"Image / Directory to store detections to",
default = "det", type = str)
parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
parser.add_argument("--cfg", dest = 'cfgfile', help =
"Config file",
default = "cfg/yolov3.cfg", type = str)
parser.add_argument("--weights", dest = 'weightsfile', help =
"weightsfile",
default = "yolov3.weights", type = str)
parser.add_argument("--reso", dest = 'reso', help =
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default = "416", type = str)

return parser.parse_args()

args = arg_parse()
images = args.images
batch_size = int(args.bs)
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()```

```  mkdir data
cd data
wget https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/data/coco.names```

```num_classes = 80    #For COCO

```def load_classes(namesfile):
fp = open(namesfile, "r")
return names```

```#Set up the neural network
model = Darknet(args.cfgfile)

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32

#If there's a GPU availible, put the model on GPU
if CUDA:
model.cuda()

#Set the model in evaluation mode
model.eval()```

```read_dir = time.time()
#Detection phase
try:
imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)]
imlist = []
imlist.append(osp.join(osp.realpath('.'), images))
except FileNotFoundError:
print ("No file or directory with the name {}".format(images))
exit()```

```if not os.path.exists(args.det):
os.makedirs(args.det)```

```load_batch = time.time()

OpenCV 会将图像载入为 numpy 数组，颜色通道的顺序为 BGR。PyTorch 的图像输入格式是（batch x 通道 x 高度 x 宽度），其通道顺序为 RGB。因此，我们在 util.py 中写了一个函数 prep_image 来将 numpy 数组转换成 PyTorch 的输入格式。

```def prep_image(img, inp_dim):
"""
Prepare image for inputting to the neural network.

Returns a Variable
"""

img = cv2.resize(img, (inp_dim, inp_dim))
img = img[:,:,::-1].transpose((2,0,1)).copy()
img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
return img```

```#PyTorch Variables for images
im_batches = list(map(prep_image, loaded_ims, [inp_dim for x in range(len(imlist))]))

#List containing dimensions of original images
im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims]
im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)

if CUDA:
im_dim_list = im_dim_list.cuda()```

```leftover = 0
if (len(im_dim_list) % batch_size):
leftover = 1

if batch_size != 1:
num_batches = len(imlist) // batch_size + leftover
im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
len(im_batches))]))  for i in range(num_batches)]  ```

```write = 0
start_det_loop = time.time()
for i, batch in enumerate(im_batches):
start = time.time()
if CUDA:
batch = batch.cuda()

prediction = model(Variable(batch, volatile = True), CUDA)

prediction = write_results(prediction, confidence, num_classes, nms_conf = nms_thesh)

end = time.time()

if type(prediction) == int:

for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
im_id = i*batch_size + im_num
print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
print("{0:20s} {1:s}".format("Objects Detected:", ""))
print("----------------------------------------------------------")
continue

prediction[:,0] += i*batch_size    #transform the atribute from index in batch to index in imlist

if not write:                      #If we have't initialised output
output = prediction
write = 1
else:
output = torch.cat((output,prediction))

for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
im_id = i*batch_size + im_num
objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
print("----------------------------------------------------------")

if CUDA:
torch.cuda.synchronize()       ```

torch.cuda.synchronize 这一行是为了确保 CUDA 核与 CPU 同步。否则，一旦 GPU 工作排队了并且 GPU 工作还远未完成，那么 CUDA 核就将控制返回给 CPU（异步调用）。如果 end = time.time() 在 GPU 工作实际完成前就 print 了，那么这可能会导致时间错误。

```try:
output
except NameError:
exit()```

```output_recast = time.time()
output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))

im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())/inp_dim
output[:,1:5] *= im_dim_list```

```class_load = time.time()

```draw = time.time()

def write(x, results, color):
c1 = tuple(x[1:3].int())
c2 = tuple(x[3:5].int())
img = results[int(x[0])]
cls = int(x[-1])
label = "{0}".format(classes[cls])
cv2.rectangle(img, c1, c2,color, 1)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2,color, -1)
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
return img```

`list(map(lambda x: write(x, loaded_ims), output))`

`det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))`

```list(map(cv2.imwrite, det_names, loaded_ims))
end = time.time()```

```print("SUMMARY")
print("----------------------------------------------------------")
print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
print()
print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) +  " images)", output_recast - start_det_loop))
print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
print("----------------------------------------------------------")

torch.cuda.empty_cache()```

`python detect.py --images dog-cycle-car.png --det det`

```Loading network.....
dog-cycle-car.png    predicted in  2.456 seconds
Objects Detected:    bicycle truck dog
----------------------------------------------------------
SUMMARY
----------------------------------------------------------
Task                     : Time Taken (in seconds)

Detection (1 images)     : 2.457
Output Processing        : 0.002
Drawing Boxes            : 0.076
Average time_per_img     : 2.657
----------------------------------------------------------```

```frames = 0
start = time.time()

while cap.isOpened():

if ret:
img = prep_image(frame, inp_dim)
#        cv2.imshow("a", frame)
im_dim = frame.shape[1], frame.shape[0]
im_dim = torch.FloatTensor(im_dim).repeat(1,2)

if CUDA:
im_dim = im_dim.cuda()
img = img.cuda()

output = model(Variable(img, volatile = True), CUDA)
output = write_results(output, confidence, num_classes, nms_conf = nms_thesh)

if type(output) == int:
frames += 1
print("FPS of the video is {:5.4f}".format( frames / (time.time() - start)))
cv2.imshow("frame", frame)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
continue
output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))

im_dim = im_dim.repeat(output.size(0), 1)/inp_dim
output[:,1:5] *= im_dim

list(map(lambda x: write(x, frame), output))

cv2.imshow("frame", frame)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
frames += 1
print(time.time() - start)
print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
else:
break     ```

PyTorch 教程：http://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html

Python ArgParse：https://docs.python.org/3/library/argparse.html

222 篇文章93 人订阅

0 条评论

## 相关文章

36960

3K60

1.5K90

369100

16820

9820

39060

59130

### caffe示例实现之4在MNIST手写数字数据集上训练与测试LeNet

http://blog.csdn.net/liumaolincycle/article/details/47336921

12810

24650