前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >人脸检测——FaceBoxes之解读与效果展示

人脸检测——FaceBoxes之解读与效果展示

作者头像
MachineLP
发布2019-05-26 15:32:33
7750
发布2019-05-26 15:32:33
举报
文章被收录于专栏:小鹏的专栏

本章对其论文进行简单解读及其效果的演示:

文章链接:《FaceBoxes: A CPU Real-time Face Detector with High Accuracy》

总体的流程图如下所示:

本质上并不复杂,熟悉fasterRcnn,或者熟悉rpn就可以很快搞清楚。

faceBox相当于在fasterRcnn的基础只保留了rpn结果,去掉了roi pooling,并且采用fpn,已经对anchor进行了改进,还有一点主干网络(不算什么亮点了)。

论文中的贡献有三个部分: (分别对应:主干网络、fpn、anchor)

(1)Rapidly Digested Convolutional Layers(RDCL)

(2)Multiple Scale Convolutional Layers(MSCL)

(3)Anchor densification strategy

为了anchor密度均衡,可以对密度不足的anchor以中心进行偏移加倍,如下图所示:

训练完成后测试效果:

后续会继续更新以及代码实现......

(1)数据准备与生成:(包含数据读取、扩增、anchor生成、所需训练数据格式)

dataset.py

代码语言:javascript
复制
#encoding:utf-8

import torch
import math
import itertools
import cv2
import numpy as np

class DataEncoder:
	def __init__(self):
		'''
		compute default boxes
		'''
		# 模型训练图片得大小为1024.
		scale = 1024.
		# 锚点得平铺间隔。  可以抽象成anchors的密度。 这里的情况下:也相当于相对1024的缩放倍数。
		steps = [s / scale for s in (32, 64, 128)]
		# 每一层anchors的大小,包括:Inception3 layer; Conv3 2 layer; Conv4 2 layer;
		sizes = [s / scale for s in (32, 256, 512)] # 当32改为64时,achor与label匹配的正样本数目更多
		aspect_ratios = ((1,2,4), (1,), (1,))
		# 对应Inception3 layer; Conv3 2 layer; Conv4 2 layer; 各层的featuremap大小。
		feature_map_sizes = (32, 16, 8)

		density = [[-3,-1,1,3],[-1,1],[0]] # density for output layer1
		# density = [[0],[0],[0]] # density for output layer1
		
		# 选用多用层, 用于最终的检测。
		num_layers = len(feature_map_sizes)
		boxes = []
		# 遍历每一层
		for i in range(num_layers):
			#  选择该层的特征图大小。 
			fmsize = feature_map_sizes[i]
			# print(len(boxes))
			# 下面是计算各featuremap中,所有的box。 使用for循环进行统计,steps[i]可以抽象为第i层的anchors的密度。
			for h,w in itertools.product(range(fmsize), repeat=2):
				# 按照anchor在特征图上平铺:对应中心坐标。
				cx = (w + 0.5)*steps[i]
				cy = (h + 0.5)*steps[i]
                
				# 这里可以理解为归一化的anchors的大小。 
				s = sizes[i]
				# 每一层的anchor大小的缩放比例, Inception3 layer层为:(1,2,4) ; Conv3 2 layer层为(1,); Conv4 2 layer层为(1,)。
				for j,ar in enumerate(aspect_ratios[i]):
					# Inception3 layer层除了使用了不同大小的anchor, 还加入了密集框,相当于对框做小的平移。
					if i == 0:
						for dx,dy in itertools.product(density[j], repeat=2):
							boxes.append((cx+dx/8.*s*ar, cy+dy/8.*s*ar, s*ar, s*ar))
					else:
						boxes.append((cx, cy, s*ar, s*ar))
		
		self.default_boxes = torch.Tensor(boxes)
	
	def test_iou(self):
		box1 = torch.Tensor([0,0,10,10])
		box1 = box1[None,:]
		box2 = torch.Tensor([[5,0,15,10],[5,0,15,10]])
		print('iou', self.iou(box1, box2))

	def iou(self, box1, box2):
		'''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].

		Args:
		  box1: (tensor) bounding boxes, sized [N,4].
		  box2: (tensor) bounding boxes, sized [M,4].

		Return:
		  (tensor) iou, sized [N,M].
		'''
		N = box1.size(0)
		M = box2.size(0)

		lt = torch.max( # left top
			box1[:,:2].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
			box2[:,:2].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
		)

		rb = torch.min( # right bottom
			box1[:,2:].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
			box2[:,2:].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
		)

		wh = rb - lt  # [N,M,2]
		wh[wh<0] = 0  # clip at 0
		inter = wh[:,:,0] * wh[:,:,1]  # [N,M]

		area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1])  # [N,]
		area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1])  # [M,]
		area1 = area1.unsqueeze(1).expand_as(inter)  # [N,] -> [N,1] -> [N,M]
		area2 = area2.unsqueeze(0).expand_as(inter)  # [M,] -> [1,M] -> [N,M]

		iou = inter / (area1 + area2 - inter)
		return iou

	def test_encode(self, boxes, img, label):
		# box = torch.Tensor([ 0.4003,0.0000,0.8409,0.4295])
		# box = box[None,:]
		# label = torch.LongTensor([1])
		# label = label[None,:]
		loc, conf = self.encode(boxes, label)
		print('conf', type(conf), conf.size(), conf.long().sum())
		print('loc', loc)
		# img = cv2.imread('test1.jpg')
		w,h,_ = img.shape
		for box in boxes:
			cv2.rectangle(img, (int(box[0]*w),int(box[1]*w)), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		
		print(type(conf))
		for i in range(len(self.default_boxes)):
			if conf[i] != 0:
				print(i)
	
		im = img.copy()
		# for i in range(42):
		# 	print(self.default_boxes[i]*w)

		for i in range(32*32*21):
			box_item = self.default_boxes[i]*w
			centerx, centery = int(box_item[0]), int(box_item[1])
			if conf[i] != 0:
				cv2.circle(im, (centerx, centery), 4, (0,255,0))
			else:
				cv2.circle(im, (centerx, centery), 1, (0,0,255))
		box = self.default_boxes[0]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		box = self.default_boxes[16]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		box = self.default_boxes[20]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		cv2.imwrite('test_encoder_0.jpg', im)

		im = img.copy()
		for i in range(32*32*21, 32*32*21+16*16):
			box_item = self.default_boxes[i]*w
			centerx, centery = int(box_item[0]), int(box_item[1])
			if conf[i] != 0:
				cv2.circle(im, (centerx, centery), 4, (0,255,0))
			else:
				cv2.circle(im, (centerx, centery), 2, (0,0,255))
		box = self.default_boxes[32*32*21]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		cv2.imwrite('test_encoder_1.jpg', im)

		im = img.copy()
		for i in range(32*32*21+16*16, len(self.default_boxes)):
			box_item = self.default_boxes[i]*w
			centerx, centery = int(box_item[0]), int(box_item[1])
			if conf[i] != 0:
				cv2.circle(im, (centerx, centery), 4, (0,255,0))
			else:
				cv2.circle(im, (centerx, centery), 2, (0,0,255))
		box = self.default_boxes[32*32*21+16*16]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		cv2.imwrite('test_encoder_2.jpg', im)

		# for i in range(conf.size(0)):
			# if conf[i].numpy != 0:
				# print()

	def encode(self,boxes,classes,threshold=0.35):
		'''
		boxes:[num_obj, 4]
		default_box (x1,y1,x2,y2)
		return:boxes: (tensor) [num_obj,21824,4]
		classes:class label [obj,]
		'''
		# 做归一化后的boxes;
		boxes_org = boxes
		
		#print(boxes,classes)
		# 得到所有的default_boxes。
		default_boxes = self.default_boxes #[21824,4]
		num_default_boxes = default_boxes.size(0)
		# 图片中含有的人脸个数。
		num_obj=boxes.size(0)  #人脸个数
		#print('num_faces {}'.format(num_obj))
		# 计算真实box和预测box的iou,用于后面给每一个bounding box不管IOU大小,都设置一个与之IOU最大的default_box;并且每一个default_boxes对应到与之IOU最大的bounding box上
		iou = self.iou(
			boxes,
			torch.cat([default_boxes[:,:2] - default_boxes[:,2:]/2,
						default_boxes[:,:2] + default_boxes[:,2:]/2], 1))
		# iou = self.iou(boxes, default_boxes)
		#print('iou size {}'.format(iou.size()))
		max_iou, max_iou_index = iou.max(1) #为每一个bounding box不管IOU大小,都设置一个与之IOU最大的default_box
		iou, max_index= iou.max(0) #每一个default_boxes对应到与之IOU最大的bounding box上
		
		#print(max(iou))
		max_index.squeeze_(0)  # torch.LongTensor 21824
		iou.squeeze_(0)
		# print('boxes', boxes.size(), boxes, 'max_index', max_index)

		max_index[max_iou_index] = torch.LongTensor(range(num_obj))

		'''
		import numpy as np
		a = np.array([1,2,3])
		b = np.array([1,1,2,2,1,1])
		print (a[b]) : [2 2 3 3 2 2]
		'''
		# 可以将box的维度 扩展到和default_boxes 一样,变成[21824,4], 方便后边的批量计算。
		boxes = boxes[max_index] # [21824,4] 是图像label
		variances = [0.1, 0.2]
		# 首先求box的中心坐标。 - default_boxes[:,:2]是为了不考虑具体坐标值,这里关注的是相对坐标,可以理解为一种平移预估。
		cxcy = (boxes[:,:2] + boxes[:,2:])/2 - default_boxes[:,:2] # [21824,2]
		# 除以default_boxes宽高,
		cxcy /= variances[0] * default_boxes[:,2:]
		# 首先得到bounding box宽高, 除以default_boxes宽高
		wh = (boxes[:,2:] - boxes[:,:2]) / default_boxes[:,2:] # [21824,2]  为什么会出现0宽度??
		wh = torch.log(wh) / variances[1] # Variable
		inf_flag = wh.abs() > 10000
		# print ('>>>>>>>>>', inf_flag.long().sum())
		if(inf_flag.long().sum() is not 0):
			print('inf_flag has true', wh, boxes)
			print('org_boxes', boxes_org)
			print('max_iou', max_iou, 'max_iou_index', max_iou_index)
			raise 'inf error'
		loc = torch.cat([cxcy, wh], 1) # [21824,4]
		conf = classes[max_index] #其实都是1 [21824,]
		conf[iou < threshold] = 0 #iou小的设为背景
		conf[max_iou_index] = 1 # 这么设置有问题,loc loss 会导致有inf loss,从而干扰训练,
								# 去掉后,损失降的更稳定些,是因为widerFace数据集里有的label
								# 做的宽度为0,但是没有被滤掉,是因为max(1)必须为每一个object选择一个
								# 与之对应的default_box,需要修改数据集里的label。
		# ('targets', Variable containing:
 		# 318.7500   -1.2500      -inf      -inf
		# org_boxes 0.1338  0.3801  0.1338  0.3801

		return loc,conf

	def nms(self,bboxes,scores,threshold=0.5):
		'''
		bboxes(tensor) [N,4]
		scores(tensor) [N,]
		'''
		x1 = bboxes[:,0]
		y1 = bboxes[:,1]
		x2 = bboxes[:,2]
		y2 = bboxes[:,3]
		areas = (x2-x1) * (y2-y1)

		_,order = scores.sort(0,descending=True)
		keep = []
		while order.numel() > 0:
			i = order[0]
			keep.append(i)

			if order.numel() == 1:
				break

			xx1 = x1[order[1:]].clamp(min=x1[i])
			yy1 = y1[order[1:]].clamp(min=y1[i])
			xx2 = x2[order[1:]].clamp(max=x2[i])
			yy2 = y2[order[1:]].clamp(max=y2[i])

			w = (xx2-xx1).clamp(min=0)
			h = (yy2-yy1).clamp(min=0)
			inter = w*h

			ovr = inter / (areas[i] + areas[order[1:]] - inter)
			ids = (ovr<=threshold).nonzero().squeeze()
			if ids.numel() == 0:
				break
			order = order[ids+1]
		return torch.LongTensor(keep)

	def decode(self,loc,conf):
		'''
		將预测出的 loc/conf转换成真实的人脸框
		loc [21842,4]
		conf [21824,2]
		'''
		# encode 部分的返操作。 
		variances = [0.1, 0.2]
		cxcy = loc[:,:2] * variances[0] * self.default_boxes[:,2:] + self.default_boxes[:,:2]
		wh  = torch.exp(loc[:,2:] * variances[1]) * self.default_boxes[:,2:]
		boxes = torch.cat([cxcy-wh/2,cxcy+wh/2],1) #[21824,4]
		
		conf[:,0] = 0.4

		max_conf, labels = conf.max(1) #[21842,1]
		# print(max_conf)
		# print('labels', labels.long().sum())
		if labels.long().sum() is 0:
			sconf, slabel = conf.max(0)
			max_conf[slabel[0:5]] = sconf[0:5]
			labels[slabel[0:5]] = 1

		ids = labels.nonzero().squeeze(1)
		# print('ids', ids)
		# print('boxes', boxes.size(), boxes[ids])

		keep = self.nms(boxes[ids],max_conf[ids])#.squeeze(1))
        # 返回的boxes是归一化后的结果
		return boxes[ids][keep], labels[ids][keep], max_conf[ids][keep]

if __name__ == '__main__':
	dataencoder = DataEncoder()
	# dataencoder.test_iou()
	#dataencoder.test_encode()
	# print((dataencoder.default_boxes))
	boxes = torch.Tensor([[-8,-8,24,24],[400,400,500,500]])/1024
	dataencoder.encode(boxes,torch.Tensor([1,1]))

encoderl.py

代码语言:javascript
复制
#encoding:utf-8

import torch
import math
import itertools
import cv2
import numpy as np

class DataEncoder:
	def __init__(self):
		'''
		compute default boxes
		'''
		# 模型训练图片得大小为1024.
		scale = 1024.
		# 锚点得平铺间隔。  可以抽象成anchors的密度。 这里的情况下:也相当于相对1024的缩放倍数。
		steps = [s / scale for s in (32, 64, 128)]
		# 每一层anchors的大小,包括:Inception3 layer; Conv3 2 layer; Conv4 2 layer;
		sizes = [s / scale for s in (32, 256, 512)] # 当32改为64时,achor与label匹配的正样本数目更多
		aspect_ratios = ((1,2,4), (1,), (1,))
		# 对应Inception3 layer; Conv3 2 layer; Conv4 2 layer; 各层的featuremap大小。
		feature_map_sizes = (32, 16, 8)

		density = [[-3,-1,1,3],[-1,1],[0]] # density for output layer1
		# density = [[0],[0],[0]] # density for output layer1
		
		# 选用多用层, 用于最终的检测。
		num_layers = len(feature_map_sizes)
		boxes = []
		# 遍历每一层
		for i in range(num_layers):
			#  选择该层的特征图大小。 
			fmsize = feature_map_sizes[i]
			# print(len(boxes))
			# 下面是计算各featuremap中,所有的box。 使用for循环进行统计,steps[i]可以抽象为第i层的anchors的密度。
			for h,w in itertools.product(range(fmsize), repeat=2):
				# 按照anchor在特征图上平铺:对应中心坐标。
				cx = (w + 0.5)*steps[i]
				cy = (h + 0.5)*steps[i]
                
				# 这里可以理解为归一化的anchors的大小。 
				s = sizes[i]
				# 每一层的anchor大小的缩放比例, Inception3 layer层为:(1,2,4) ; Conv3 2 layer层为(1,); Conv4 2 layer层为(1,)。
				for j,ar in enumerate(aspect_ratios[i]):
					# Inception3 layer层除了使用了不同大小的anchor, 还加入了密集框,相当于对框做小的平移。
					if i == 0:
						for dx,dy in itertools.product(density[j], repeat=2):
							boxes.append((cx+dx/8.*s*ar, cy+dy/8.*s*ar, s*ar, s*ar))
					else:
						boxes.append((cx, cy, s*ar, s*ar))
		
		self.default_boxes = torch.Tensor(boxes)
	
	def test_iou(self):
		box1 = torch.Tensor([0,0,10,10])
		box1 = box1[None,:]
		box2 = torch.Tensor([[5,0,15,10],[5,0,15,10]])
		print('iou', self.iou(box1, box2))

	def iou(self, box1, box2):
		'''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].

		Args:
		  box1: (tensor) bounding boxes, sized [N,4].
		  box2: (tensor) bounding boxes, sized [M,4].

		Return:
		  (tensor) iou, sized [N,M].
		'''
		N = box1.size(0)
		M = box2.size(0)

		lt = torch.max( # left top
			box1[:,:2].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
			box2[:,:2].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
		)

		rb = torch.min( # right bottom
			box1[:,2:].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
			box2[:,2:].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
		)

		wh = rb - lt  # [N,M,2]
		wh[wh<0] = 0  # clip at 0
		inter = wh[:,:,0] * wh[:,:,1]  # [N,M]

		area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1])  # [N,]
		area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1])  # [M,]
		area1 = area1.unsqueeze(1).expand_as(inter)  # [N,] -> [N,1] -> [N,M]
		area2 = area2.unsqueeze(0).expand_as(inter)  # [M,] -> [1,M] -> [N,M]

		iou = inter / (area1 + area2 - inter)
		return iou

	def test_encode(self, boxes, img, label):
		# box = torch.Tensor([ 0.4003,0.0000,0.8409,0.4295])
		# box = box[None,:]
		# label = torch.LongTensor([1])
		# label = label[None,:]
		loc, conf = self.encode(boxes, label)
		print('conf', type(conf), conf.size(), conf.long().sum())
		print('loc', loc)
		# img = cv2.imread('test1.jpg')
		w,h,_ = img.shape
		for box in boxes:
			cv2.rectangle(img, (int(box[0]*w),int(box[1]*w)), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		
		print(type(conf))
		for i in range(len(self.default_boxes)):
			if conf[i] != 0:
				print(i)
	
		im = img.copy()
		# for i in range(42):
		# 	print(self.default_boxes[i]*w)

		for i in range(32*32*21):
			box_item = self.default_boxes[i]*w
			centerx, centery = int(box_item[0]), int(box_item[1])
			if conf[i] != 0:
				cv2.circle(im, (centerx, centery), 4, (0,255,0))
			else:
				cv2.circle(im, (centerx, centery), 1, (0,0,255))
		box = self.default_boxes[0]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		box = self.default_boxes[16]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		box = self.default_boxes[20]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		cv2.imwrite('test_encoder_0.jpg', im)

		im = img.copy()
		for i in range(32*32*21, 32*32*21+16*16):
			box_item = self.default_boxes[i]*w
			centerx, centery = int(box_item[0]), int(box_item[1])
			if conf[i] != 0:
				cv2.circle(im, (centerx, centery), 4, (0,255,0))
			else:
				cv2.circle(im, (centerx, centery), 2, (0,0,255))
		box = self.default_boxes[32*32*21]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		cv2.imwrite('test_encoder_1.jpg', im)

		im = img.copy()
		for i in range(32*32*21+16*16, len(self.default_boxes)):
			box_item = self.default_boxes[i]*w
			centerx, centery = int(box_item[0]), int(box_item[1])
			if conf[i] != 0:
				cv2.circle(im, (centerx, centery), 4, (0,255,0))
			else:
				cv2.circle(im, (centerx, centery), 2, (0,0,255))
		box = self.default_boxes[32*32*21+16*16]
		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
		cv2.imwrite('test_encoder_2.jpg', im)

		# for i in range(conf.size(0)):
			# if conf[i].numpy != 0:
				# print()

	def encode(self,boxes,classes,threshold=0.35):
		'''
		boxes:[num_obj, 4]
		default_box (x1,y1,x2,y2)
		return:boxes: (tensor) [num_obj,21824,4]
		classes:class label [obj,]
		'''
		# 做归一化后的boxes;
		boxes_org = boxes
		
		#print(boxes,classes)
		# 得到所有的default_boxes。
		default_boxes = self.default_boxes #[21824,4]
		num_default_boxes = default_boxes.size(0)
		# 图片中含有的人脸个数。
		num_obj=boxes.size(0)  #人脸个数
		#print('num_faces {}'.format(num_obj))
		# 计算真实box和预测box的iou,用于后面给每一个bounding box不管IOU大小,都设置一个与之IOU最大的default_box;并且每一个default_boxes对应到与之IOU最大的bounding box上
		iou = self.iou(
			boxes,
			torch.cat([default_boxes[:,:2] - default_boxes[:,2:]/2,
						default_boxes[:,:2] + default_boxes[:,2:]/2], 1))
		# iou = self.iou(boxes, default_boxes)
		#print('iou size {}'.format(iou.size()))
		max_iou, max_iou_index = iou.max(1) #为每一个bounding box不管IOU大小,都设置一个与之IOU最大的default_box
		iou, max_index= iou.max(0) #每一个default_boxes对应到与之IOU最大的bounding box上
		
		#print(max(iou))
		max_index.squeeze_(0)  # torch.LongTensor 21824
		iou.squeeze_(0)
		# print('boxes', boxes.size(), boxes, 'max_index', max_index)

		max_index[max_iou_index] = torch.LongTensor(range(num_obj))

		'''
		import numpy as np
		a = np.array([1,2,3])
		b = np.array([1,1,2,2,1,1])
		print (a[b]) : [2 2 3 3 2 2]
		'''
		# 可以将box的维度 扩展到和default_boxes 一样,变成[21824,4], 方便后边的批量计算。
		boxes = boxes[max_index] # [21824,4] 是图像label
		variances = [0.1, 0.2]
		# 首先求box的中心坐标。 - default_boxes[:,:2]是为了不考虑具体坐标值,这里关注的是相对坐标,可以理解为一种平移预估。
		cxcy = (boxes[:,:2] + boxes[:,2:])/2 - default_boxes[:,:2] # [21824,2]
		# 除以default_boxes宽高,
		cxcy /= variances[0] * default_boxes[:,2:]
		# 首先得到bounding box宽高, 除以default_boxes宽高
		wh = (boxes[:,2:] - boxes[:,:2]) / default_boxes[:,2:] # [21824,2]  为什么会出现0宽度??
		wh = torch.log(wh) / variances[1] # Variable
		inf_flag = wh.abs() > 10000
		# print ('>>>>>>>>>', inf_flag.long().sum())
		if(inf_flag.long().sum() is not 0):
			print('inf_flag has true', wh, boxes)
			print('org_boxes', boxes_org)
			print('max_iou', max_iou, 'max_iou_index', max_iou_index)
			raise 'inf error'
		loc = torch.cat([cxcy, wh], 1) # [21824,4]
		conf = classes[max_index] #其实都是1 [21824,]
		conf[iou < threshold] = 0 #iou小的设为背景
		conf[max_iou_index] = 1 # 这么设置有问题,loc loss 会导致有inf loss,从而干扰训练,
								# 去掉后,损失降的更稳定些,是因为widerFace数据集里有的label
								# 做的宽度为0,但是没有被滤掉,是因为max(1)必须为每一个object选择一个
								# 与之对应的default_box,需要修改数据集里的label。
		# ('targets', Variable containing:
 		# 318.7500   -1.2500      -inf      -inf
		# org_boxes 0.1338  0.3801  0.1338  0.3801

		return loc,conf

	def nms(self,bboxes,scores,threshold=0.5):
		'''
		bboxes(tensor) [N,4]
		scores(tensor) [N,]
		'''
		x1 = bboxes[:,0]
		y1 = bboxes[:,1]
		x2 = bboxes[:,2]
		y2 = bboxes[:,3]
		areas = (x2-x1) * (y2-y1)

		_,order = scores.sort(0,descending=True)
		keep = []
		while order.numel() > 0:
			i = order[0]
			keep.append(i)

			if order.numel() == 1:
				break

			xx1 = x1[order[1:]].clamp(min=x1[i])
			yy1 = y1[order[1:]].clamp(min=y1[i])
			xx2 = x2[order[1:]].clamp(max=x2[i])
			yy2 = y2[order[1:]].clamp(max=y2[i])

			w = (xx2-xx1).clamp(min=0)
			h = (yy2-yy1).clamp(min=0)
			inter = w*h

			ovr = inter / (areas[i] + areas[order[1:]] - inter)
			ids = (ovr<=threshold).nonzero().squeeze()
			if ids.numel() == 0:
				break
			order = order[ids+1]
		return torch.LongTensor(keep)

	def decode(self,loc,conf):
		'''
		將预测出的 loc/conf转换成真实的人脸框
		loc [21842,4]
		conf [21824,2]
		'''
		# encode 部分的返操作。 
		variances = [0.1, 0.2]
		cxcy = loc[:,:2] * variances[0] * self.default_boxes[:,2:] + self.default_boxes[:,:2]
		wh  = torch.exp(loc[:,2:] * variances[1]) * self.default_boxes[:,2:]
		boxes = torch.cat([cxcy-wh/2,cxcy+wh/2],1) #[21824,4]
		
		conf[:,0] = 0.4

		max_conf, labels = conf.max(1) #[21842,1]
		# print(max_conf)
		# print('labels', labels.long().sum())
		if labels.long().sum() is 0:
			sconf, slabel = conf.max(0)
			max_conf[slabel[0:5]] = sconf[0:5]
			labels[slabel[0:5]] = 1

		ids = labels.nonzero().squeeze(1)
		# print('ids', ids)
		# print('boxes', boxes.size(), boxes[ids])

		keep = self.nms(boxes[ids],max_conf[ids])#.squeeze(1))
        # 返回的boxes是归一化后的结果
		return boxes[ids][keep], labels[ids][keep], max_conf[ids][keep]

if __name__ == '__main__':
	dataencoder = DataEncoder()
	# dataencoder.test_iou()
	#dataencoder.test_encode()
	# print((dataencoder.default_boxes))
	boxes = torch.Tensor([[-8,-8,24,24],[400,400,500,500]])/1024
	dataencoder.encode(boxes,torch.Tensor([1,1]))
本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。
原始发表:2018年12月06日,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
相关产品与服务
批量计算
批量计算(BatchCompute,Batch)是为有大数据计算业务的企业、科研单位等提供高性价比且易用的计算服务。批量计算 Batch 可以根据用户提供的批处理规模,智能地管理作业和调动其所需的最佳资源。有了 Batch 的帮助,您可以将精力集中在如何分析和处理数据结果上。
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档