人脸检测之CenterFace

技术2023-07-25 71

论文：CenterFace: Joint Face Detection and Alignment Using Face as Point

Github：https://github.com/Star-Clouds/CenterFace

论文基于centerNet进行改进，提出了anchor free形式的人脸检测框架，可以同时实现人脸检测+关键点检测。精度和速度都优于主流的MTCNN，Face Box等框架。

主要贡献：

提出了anchor free的人脸检测设计，将人脸检测问题转化为关键点估计问题。相比之前的检测算法，该方法的的模型输出的下采样率只为4。基于多任务学习策略，同时学习人脸检测+关键点定位网络结构采用了FPN结构大量的实验表明，速度和精度都空前的好

级联检测器的缺点：

推理速度受图片中人脸数目的影响，当人脸数目增多的时候，推理速度也会大大降低。每一个模型都单独训练，训练过程繁琐。非端到端的训练模式，整体精度有限。

网络结构：

网络整体结构采用MobileNetV2结构，MobileNetV2进行了5次下采样，在MobileNetV2的最后一层，增加了3个上采样层。最终输出的大小进行了2次下采样，输出维度为原图的1/4。

Loss函数：

人脸分类loss，

其中，α = 2 ，β = 4

人脸框中心点偏移loss，

人脸框宽，高的loss，

关键点的loss，

整体loss，

实验结果：

推理速度，

FDDB精度，

WIDER FACE 精度，

Onnx推理：

Onnx模型格式，可以方便的使用程序进行op的增删改查操作。包括节点的增加，去除，输入输出维度的修改等。同时，基于onnx runtime的推理可以获得比基于pytorch推理略快的速度。缺点就是整个graph已经固定，不支持动态输入大小。

首先使用change_onnx.py修改作者提供的onnx模型的输入维度，

import onnx model = onnx.load("../models/onnx/centerface.onnx") # The model is represented as a protobuf structure and it can be accessed # using the standard python-for-protobuf methods # iterate through inputs of the graph for input in model.graph.input: print (input.name, end=": ") # get type of input tensor tensor_type = input.type.tensor_type # check if it has a shape: if (tensor_type.HasField("shape")): # iterate through dimensions of the shape: for num,d in enumerate(tensor_type.shape.dim): # the dimension may have a definite (integer) value or a symbolic identifier or neither: if (d.HasField("dim_value")): if num ==0: d.dim_value = 1 if num ==2: d.dim_value = 480 if num ==3: d.dim_value = 640 print (d.dim_value, end=", ") # known dimension elif (d.HasField("dim_param")): print (d.dim_param, end=", ") # unknown dimension with symbolic name else: print ("?", end=", ") # unknown dimension with no name else: print ("unknown rank", end="") print() break onnx.checker.check_model(model) onnx.save(model, 'out.onnx') """ model = onnx.load('models/centerface.onnx') model.graph.input[0].type.tensor_type.shape.dim[0].dim_param = '?' model.graph.input[0].type.tensor_type.shape.dim[1].dim_param = '3' model.graph.input[0].type.tensor_type.shape.dim[2].dim_param = '?' model.graph.input[0].type.tensor_type.shape.dim[3].dim_param = '?' onnx.save(model, 'dynamic_model.onnx') """

模型另存在out.onnx，和原始的centerface.onnx的输入维度进行对比，

推理代码，centerface.py，

import numpy as np import cv2 import datetime import torch import onnxruntime import onnx class CenterFace(object): def __init__(self, landmarks=True): self.landmarks = landmarks self.session = onnxruntime.InferenceSession("out.onnx") self.inputs = self.session.get_inputs()[0].name self.outputs = ["537", "538", "539", '540'] self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0 def __call__(self, img, height, width, threshold=0.5): #self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width) self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 480, 640 , 480/height, 640/width return self.inference_opencv(img, threshold) def inference_opencv(self, img, threshold): begin = datetime.datetime.now() image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) image =cv2.resize(image,(self.img_w_new, self.img_h_new)) input_image = np.expand_dims(np.swapaxes(np.swapaxes(image,0,2),1,2),0).astype(np.float32) heatmap,scale , offset ,lms = self.session.run(None, {self.inputs: input_image}) end = datetime.datetime.now() print("cpu times = ", end - begin) return self.postprocess(heatmap, lms, offset, scale, threshold) def transform(self, h, w): img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32) scale_h, scale_w = img_h_new / h, img_w_new / w return img_h_new, img_w_new, scale_h, scale_w def postprocess(self, heatmap, lms, offset, scale, threshold): if self.landmarks: dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold) else: dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold) if len(dets) > 0: dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h if self.landmarks: lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h else: dets = np.empty(shape=[0, 5], dtype=np.float32) if self.landmarks: lms = np.empty(shape=[0, 10], dtype=np.float32) if self.landmarks: return dets, lms else: return dets def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1): heatmap = np.squeeze(heatmap) scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :] offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :] c0, c1 = np.where(heatmap > threshold) if self.landmarks: boxes, lms = [], [] else: boxes = [] if len(c0) > 0: for i in range(len(c0)): s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4 o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]] s = heatmap[c0[i], c1[i]] x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2) x1, y1 = min(x1, size[1]), min(y1, size[0]) boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s]) if self.landmarks: lm = [] for j in range(5): lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1) lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1) lms.append(lm) boxes = np.asarray(boxes, dtype=np.float32) keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3) boxes = boxes[keep, :] if self.landmarks: lms = np.asarray(lms, dtype=np.float32) lms = lms[keep, :] if self.landmarks: return boxes, lms else: return boxes def nms(self, boxes, scores, nms_thresh): x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = np.argsort(scores)[::-1] num_detections = boxes.shape[0] suppressed = np.zeros((num_detections,), dtype=np.bool) keep = [] for _i in range(num_detections): i = order[_i] if suppressed[i]: continue keep.append(i) ix1 = x1[i] iy1 = y1[i] ix2 = x2[i] iy2 = y2[i] iarea = areas[i] for _j in range(_i + 1, num_detections): j = order[_j] if suppressed[j]: continue xx1 = max(ix1, x1[j]) yy1 = max(iy1, y1[j]) xx2 = min(ix2, x2[j]) yy2 = min(iy2, y2[j]) w = max(0, xx2 - xx1 + 1) h = max(0, yy2 - yy1 + 1) inter = w * h ovr = inter / (iarea + areas[j] - inter) if ovr >= nms_thresh: suppressed[j] = True return keep

推理代码，demo.py

import cv2 import scipy.io as sio import os from centerface import CenterFace def camera(): cap = cv2.VideoCapture(0) ret, frame = cap.read() h, w = frame.shape[:2] centerface = CenterFace() while True: ret, frame = cap.read() dets, lms = centerface(frame, h, w, threshold=0.35) for det in dets: boxes, score = det[:4], det[4] cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1) for lm in lms: for i in range(0, 5): cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1) cv2.imshow('out', frame) # Press Q on keyboard to stop recording if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() def test_image(): #frame = cv2.imread('000388.jpg') frame = cv2.imread('test.jpg') h, w = frame.shape[:2] landmarks = True centerface = CenterFace(landmarks=landmarks) if landmarks: dets, lms = centerface(frame, h, w, threshold=0.35) else: dets = centerface(frame, threshold=0.35) for det in dets: boxes, score = det[:4], det[4] cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1) if landmarks: for lm in lms: for i in range(0, 5): cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1) #cv2.imshow('out', frame) cv2.imwrite('out.jpg',frame) #cv2.waitKey(0) def test_image_tensorrt(): frame = cv2.imread('000388.jpg') h, w = 480, 640 # must be 480* 640 landmarks = True centerface = CenterFace(landmarks=landmarks, backend="tensorrt") if landmarks: dets, lms = centerface(frame, h, w, threshold=0.35) else: dets = centerface(frame, threshold=0.35) for det in dets: boxes, score = det[:4], det[4] cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1) if landmarks: for lm in lms: for i in range(0, 5): cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1) cv2.imshow('out', frame) cv2.waitKey(0) def test_widerface(): Path = 'widerface/WIDER_val/images/' wider_face_mat = sio.loadmat('widerface/wider_face_split/wider_face_val.mat') event_list = wider_face_mat['event_list'] file_list = wider_face_mat['file_list'] save_path = 'save_out/' for index, event in enumerate(event_list): file_list_item = file_list[index][0] im_dir = event[0][0] # print(save_path + im_dir) if not os.path.exists(save_path + im_dir): os.makedirs(save_path + im_dir) landmarks = True centerface = CenterFace(landmarks=landmarks) for num, file in enumerate(file_list_item): im_name = file[0][0] zip_name = '%s/%s.jpg' % (im_dir, im_name) print(os.path.join(Path, zip_name)) img = cv2.imread(os.path.join(Path, zip_name)) h, w = img.shape[:2] if landmarks: dets, lms = centerface(img, h, w, threshold=0.05) else: dets = centerface(img, threshold=0.05) f = open(save_path + im_dir + '/' + im_name + '.txt', 'w') f.write('{:s}\n'.format('%s/%s.jpg' % (im_dir, im_name))) f.write('{:d}\n'.format(len(dets))) for b in dets: x1, y1, x2, y2, s = b f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(x1, y1, (x2 - x1 + 1), (y2 - y1 + 1), s)) f.close() print('event:%d num:%d' % (index + 1, num + 1)) if __name__ == '__main__': # camera() test_image() # test_widerface()

最终效果，

人脸检测小江湖：

自己的测试，时间包括网络推理+后处理时间，face++为服务接口测试，相比不是很公平。

方法

MTCNN

ultra-face

Retina-Face-mobilenetv1

Retina-Face-resnet50

Centerface-mobileNetv2

face++

模型大小

2.9M

1.8M

105M

7.2M

CPU（640*480）

Intel(R) Xeon(R) Silver 4216

600ms

200ms

140ms

2000ms

130ms

10ms

GPU（640*480）

2080TI

110ms

18ms

38ms

50ms

8ms

CPU（1280*720）

Intel(R) Xeon(R) Silver 4216

1000ms

500ms

350ms

3500ms

300ms

10ms

GPU（1280*720）

2080TI

200ms

40ms

100ms

120ms

25ms

CPU（1920*1080）

Intel(R) Xeon(R) Silver 4216

1600ms

800ms

8000ms

750ms

10ms

GPU（1920*1080）

2080TI

330ms

200ms

250ms

50ms

精度

框略好，5点好

框检出少

框差，5点差

框误检大，5点一般

框好，5点非常差

框好，点好

Processed: 0.009, SQL: 9