YOLOv3的keras版本代码注释

技术2022-07-10 162

源工程来自：https://github.com/qqwweee/keras-yolo3 使用方法参考博客：windows10+keras下的yolov3的快速使用及自己数据集的训练

应该会有小伙伴和我一样，下载工程文件后不知道从何看起，下面是各个文件的作用（加粗字体是文件夹）：

keras-yolo3-master ---- font — 字体文件夹 ---- model_data -------- coco_classes.txt — coco数据集的类别 -------- tiny_yolo_anchors.txt — yolo简化版的6个anchor值（宽，高） -------- voc_classes.txt — voc数据集的类别 -------- yolo_anchors.txt — 预设的9个anchor值 ---- yolo3 -------- _ init _.py — 无作用 -------- model.py — 定义yolov3网络结构、iou的计算以及yolo损失函数 -------- utils.py — 定义一些工具函数 ---- coco_annotation.py — 将coco形式的标签转为yolo形式 ---- convert.py — 将下载的权重文件转为keras形式，即.h5格式 ---- darknet53.cfg — 定义darknet的网络结构，训练时没有用到 ---- kmeans.py — 对标签做kmeans聚类，得到anchor ---- train.py — 定义训练参数的方法 ---- train_bottleneck ---- voc_annotation.py — 将voc形式的标签转为yolo形式 ---- yolo.py — 定义测试用到的方法 ---- yolo_video —定义用户接口，使用这个文件来测试 ---- yolov3.cfg — 定义yolov3网络结构，在转换权重文件时用到，训练时没有用到 ---- yolov3-tiny.cfg — 定义yolov3简化版的网络结构，训练时没有用到

想直接上手测试的话从yolo_video.py和yolo.py看起需要训练自己的数据集的话建议从train.py看起。

下面是主要代码文件的注释：(个人理解，可能有错误） yolo.py model.py utils.py train.py

yolo.py

# -*- coding: utf-8 -*- """ Class definition of YOLO_v3 style detection model on image and video """ import colorsys import os from timeit import default_timer as timer import numpy as np from keras import backend as K from keras.models import load_model from keras.layers import Input from PIL import Image, ImageFont, ImageDraw from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body from yolo3.utils import letterbox_image import os from keras.utils import multi_gpu_model class YOLO(object): _defaults = { "model_path": 'model_data/yolo.h5',#权重文件路径 "anchors_path": 'model_data/yolo_anchors.txt',#anchor路径 "classes_path": 'model_data/coco_classes.txt',#类别路径 "score" : 0.3,#置信度阈值 "iou" : 0.45,#交并比阈值 "model_image_size" : (416, 416),#图片尺寸，会影响速度和准确度 "gpu_num" : 1, } @classmethod # 将字典里的参数转为类的成员变量 def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" def __init__(self, **kwargs): self.__dict__.update(self._defaults) # set up default values self.__dict__.update(kwargs) # and update with user overrides self.class_names = self._get_class()#读取所有的类别 self.anchors = self._get_anchors()#读取所有的anchor self.sess = K.get_session() self.boxes, self.scores, self.classes = self.generate() # 读取所有的类别 def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names]#strip() 方法用于移除字符串头尾指定的字符（默认为空格或换行符）或字符序列 return class_names # 读取所有的anchor def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape(-1, 2) # 读入训练好的模型，如果失败则创建模型，调用model.py里的yolo_eval()得到边框、置信度、类别 def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' # Load model, or construct model and load weights. num_anchors = len(self.anchors) num_classes = len(self.class_names) is_tiny_version = num_anchors==6 # default setting try: self.yolo_model = load_model(model_path, compile=False) except: self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match else: #检查一下模型中的参数是否和设置的anchor、类别数量一致 assert self.yolo_model.layers[-1].output_shape[-1] == \ num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ 'Mismatch between model and given anchor and class sizes' print('{} model, anchors, and classes loaded.'.format(model_path)) # 生成用来画边框的颜色 # Generate colors for drawing bounding boxes. hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. # 通过yolo_eval()得到输出,yolo_eval()定义在model.py中 # Generate output tensor targets for filtered bounding boxes. self.input_image_shape = K.placeholder(shape=(2, )) if self.gpu_num>=2: self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou) return boxes, scores, classes # 检测图片 def detect_image(self, image): start = timer() # 确保图片尺寸合理（32的倍数） if self.model_image_size != (None, None): assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') print(image_data.shape) image_data /= 255.#归一化 image_data = np.expand_dims(image_data, 0) # Add batch dimension. #跑一遍前向传播，得到输出 out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes],#这几个的计算方法在self.generate()里 feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300#根据检测图片的大小设置边框宽度 #遍历所有的结果 for i, c in reversed(list(enumerate(out_classes))): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image)#准备绘图 label_size = draw.textsize(label, font)#计算需要绘制的标签的大小 top, left, bottom, right = box # 将边框限定在图片里面，且属性限定为正整数 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0:#设置类别标签的显示区域 text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # 画检测框，因为draw.rectangle()不能指定线条宽度，所以要画多次 # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[c]) # 画标签框 draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) # 画标签 draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw end = timer() print(end - start) return image def close_session(self): self.sess.close() #检测视频，读入视频，然后分帧调用detect_image() def detect_video(yolo, video_path, output_path=""): import cv2 vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError("Couldn't open webcam or video") video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC)) video_fps = vid.get(cv2.CAP_PROP_FPS) video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) isOutput = True if output_path != "" else False if isOutput: print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size)) out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() while True: return_value, frame = vid.read()#读入一帧图像 image = Image.fromarray(frame) image = yolo.detect_image(image)#检测 result = np.asarray(image) #计算fps curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 #画出fps cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(255, 0, 0), thickness=2) cv2.namedWindow("result", cv2.WINDOW_NORMAL) cv2.imshow("result", result) if isOutput: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break yolo.close_session()

model.py

"""YOLO_v3 Model Defined in Keras.""" from functools import wraps import numpy as np import tensorflow as tf from keras import backend as K from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D from keras.layers.advanced_activations import LeakyReLU from keras.layers.normalization import BatchNormalization from keras.models import Model from keras.regularizers import l2 from yolo3.utils import compose #定义卷积层 @wraps(Conv2D) def DarknetConv2D(*args, **kwargs): """Wrapper to set Darknet parameters for Convolution2D.""" darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' darknet_conv_kwargs.update(kwargs) return Conv2D(*args, **darknet_conv_kwargs) #卷积层+批量标准化+激活函数，作为yolov3的一个基本块 def DarknetConv2D_BN_Leaky(*args, **kwargs): """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(kwargs) return compose( DarknetConv2D(*args, **no_bias_kwargs), BatchNormalization(), LeakyReLU(alpha=0.1)) #定义残差块，由基本块和全零填充组成 def resblock_body(x, num_filters, num_blocks): '''A series of resblocks starting with a downsampling Convolution2D''' # Darknet uses left and top padding instead of 'same' mode x = ZeroPadding2D(((1,0),(1,0)))(x) x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x) for i in range(num_blocks): y = compose( DarknetConv2D_BN_Leaky(num_filters//2, (1,1)), DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x) x = Add()([x,y]) return x #定义主干网络darknet的结构，由基本块和残差块组成 #DarknetConv2D_BN_Leaky和resblock_body定义在上面 def darknet_body(x): '''Darknent body having 52 Convolution2D layers''' x = DarknetConv2D_BN_Leaky(32, (3,3))(x) x = resblock_body(x, 64, 1) x = resblock_body(x, 128, 2) x = resblock_body(x, 256, 8) x = resblock_body(x, 512, 8) x = resblock_body(x, 1024, 4) return x #定义除主干网络外的后面几层网络 def make_last_layers(x, num_filters, out_filters): '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer''' x = compose( DarknetConv2D_BN_Leaky(num_filters, (1,1)), DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), DarknetConv2D_BN_Leaky(num_filters, (1,1)), DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x) y = compose( DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), DarknetConv2D(out_filters, (1,1)))(x) return x, y #定义yolov3网络结构，包括主干和三个输出层 def yolo_body(inputs, num_anchors, num_classes): """Create YOLO_V3 model CNN body in Keras.""" darknet = Model(inputs, darknet_body(inputs)) x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5)) x = compose( DarknetConv2D_BN_Leaky(256, (1,1)), UpSampling2D(2))(x) x = Concatenate()([x,darknet.layers[152].output]) x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5)) x = compose( DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(x) x = Concatenate()([x,darknet.layers[92].output]) x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5)) return Model(inputs, [y1,y2,y3]) #定义简化版的网络结构 def tiny_yolo_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 model CNN body in keras.''' x1 = compose( DarknetConv2D_BN_Leaky(16, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), DarknetConv2D_BN_Leaky(32, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), DarknetConv2D_BN_Leaky(64, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), DarknetConv2D_BN_Leaky(128, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), DarknetConv2D_BN_Leaky(256, (3,3)))(inputs) x2 = compose( MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), DarknetConv2D_BN_Leaky(512, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'), DarknetConv2D_BN_Leaky(1024, (3,3)), DarknetConv2D_BN_Leaky(256, (1,1)))(x1) y1 = compose( DarknetConv2D_BN_Leaky(512, (3,3)), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) x2 = compose( DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(256, (3,3)), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1]) return Model(inputs, [y1,y2]) #从输出层得到边框信息 def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # 更据论文中的公式，将预测值转化为真实值的形式 # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs #对边框进行缩放，对应不同尺寸的图片 def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): '''Get corrected boxes''' box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] input_shape = K.cast(input_shape, K.dtype(box_yx)) image_shape = K.cast(image_shape, K.dtype(box_yx)) new_shape = K.round(image_shape * K.min(input_shape/image_shape)) offset = (input_shape-new_shape)/2./input_shape scale = input_shape/new_shape box_yx = (box_yx - offset) * scale box_hw *= scale box_mins = box_yx - (box_hw / 2.) box_maxes = box_yx + (box_hw / 2.) boxes = K.concatenate([ box_mins[..., 0:1], # y_min box_mins[..., 1:2], # x_min box_maxes[..., 0:1], # y_max box_maxes[..., 1:2] # x_max ]) # Scale boxes back to original image shape. boxes *= K.concatenate([image_shape, image_shape]) return boxes #这个函数调用上面的yolo_head()和yolo_correct_boxes(),并转换边框和置信度的格式 def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): '''Process Conv layer output''' box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape) boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) boxes = K.reshape(boxes, [-1, 4]) box_scores = box_confidence * box_class_probs box_scores = K.reshape(box_scores, [-1, num_classes]) return boxes, box_scores #测试用的函数，返回边框、置信度和类别，yolo_boxes_and_scores()的定义在上面 def yolo_eval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input and return filtered boxes.""" num_layers = len(yolo_outputs) #把anchor平均分给输出层 anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] #遍历所有的输出层（3个） for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold#mask用来筛选置信度高于阈值的结果 max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) #进行非极大值抑制 nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index)#gather()在给定的张量中搜索给定下标的向量 class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_ #将真实标签值（xmin,ymin...)转换成yolo中的形式（tx,ty...)，使之能与预测值相比较 def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): '''Preprocess true boxes to training input format Parameters ---------- true_boxes: array, shape=(m, T, 5) Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape. input_shape: array-like, hw, multiples of 32 anchors: array, shape=(N, 2), wh num_classes: integer Returns ------- y_true: list of array, shape like yolo_outputs, xywh are reletive value ''' assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes' num_layers = len(anchors)//3 # default setting anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] true_boxes = np.array(true_boxes, dtype='float32') input_shape = np.array(input_shape, dtype='int32') boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2 boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2] true_boxes[..., 0:2] = boxes_xy/input_shape[::-1] true_boxes[..., 2:4] = boxes_wh/input_shape[::-1] m = true_boxes.shape[0] grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)] y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes), dtype='float32') for l in range(num_layers)] # Expand dim to apply broadcasting. anchors = np.expand_dims(anchors, 0) anchor_maxes = anchors / 2. anchor_mins = -anchor_maxes valid_mask = boxes_wh[..., 0]>0 for b in range(m): # Discard zero rows. wh = boxes_wh[b, valid_mask[b]] if len(wh)==0: continue # Expand dim to apply broadcasting. wh = np.expand_dims(wh, -2) box_maxes = wh / 2. box_mins = -box_maxes intersect_mins = np.maximum(box_mins, anchor_mins) intersect_maxes = np.minimum(box_maxes, anchor_maxes) intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] box_area = wh[..., 0] * wh[..., 1] anchor_area = anchors[..., 0] * anchors[..., 1] iou = intersect_area / (box_area + anchor_area - intersect_area) # Find best anchor for each true box best_anchor = np.argmax(iou, axis=-1) for t, n in enumerate(best_anchor): for l in range(num_layers): if n in anchor_mask[l]: i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32') j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32') k = anchor_mask[l].index(n) c = true_boxes[b,t, 4].astype('int32') y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4] y_true[l][b, j, i, k, 4] = 1 y_true[l][b, j, i, k, 5+c] = 1 return y_true #计算两个矩形框的交并比 def box_iou(b1, b2): '''Return iou tensor Parameters ---------- b1: tensor, shape=(i1,...,iN, 4), xywh b2: tensor, shape=(j, 4), xywh Returns ------- iou: tensor, shape=(i1,...,iN, j) ''' # Expand dim to apply broadcasting. b1 = K.expand_dims(b1, -2) b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh/2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half # Expand dim to apply broadcasting. b2 = K.expand_dims(b2, 0) b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh/2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half intersect_mins = K.maximum(b1_mins, b2_mins) intersect_maxes = K.minimum(b1_maxes, b2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] iou = intersect_area / (b1_area + b2_area - intersect_area) return iou #定义损失函数 def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss

utils.py

"""Miscellaneous utility functions.""" from functools import reduce from PIL import Image import numpy as np from matplotlib.colors import rgb_to_hsv, hsv_to_rgb #组合多个函数，上一个函数的输出作为下一个的输入 def compose(*funcs): """Compose arbitrarily many functions, evaluated left to right. Reference: https://mathieularose.com/function-composition-in-python/ """ # return lambda x: reduce(lambda v, f: f(v), funcs, x) if funcs: return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) else: raise ValueError('Composition of empty sequence not supported.') #等比例缩放图片， def letterbox_image(image, size): '''resize image with unchanged aspect ratio using padding''' iw, ih = image.size w, h = size scale = min(w/iw, h/ih) nw = int(iw*scale) nh = int(ih*scale) image = image.resize((nw,nh), Image.BICUBIC) new_image = Image.new('RGB', size, (128,128,128)) new_image.paste(image, ((w-nw)//2, (h-nh)//2)) return new_image #返回a~b之间的随机数 def rand(a=0, b=1): return np.random.rand()*(b-a) + a '''数据预处理，对图像属性做一些变换，模拟实时图像数据,并且把RGB值归一化；有两种模式，随机和不随机，由参数random控制''' def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5, proc_img=True): '''random preprocessing for real-time data augmentation''' line = annotation_line.split()#按空格分割字符串，line[0]是图片路径 image = Image.open(line[0])#读入图片 iw, ih = image.size#该图片尺寸 h, w = input_shape#规定的输入图像的尺寸 box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])#截取对应的边框信息 if not random: # 缩放图片，使之尺寸等于输入尺寸的整数倍，多出的部分用（128,128,128）颜色填充 # resize image scale = min(w/iw, h/ih) nw = int(iw*scale) nh = int(ih*scale) dx = (w-nw)//2 dy = (h-nh)//2 image_data=0 if proc_img: image = image.resize((nw,nh), Image.BICUBIC) new_image = Image.new('RGB', (w,h), (128,128,128)) new_image.paste(image, (dx, dy)) image_data = np.array(new_image)/255.# 归一化，将RGB值转换到[0,1]区间 # 处理对应的标签，同步缩放与移动 # correct boxes box_data = np.zeros((max_boxes,5)) if len(box)>0: np.random.shuffle(box) if len(box)>max_boxes: box = box[:max_boxes] box[:, [0,2]] = box[:, [0,2]]*scale + dx box[:, [1,3]] = box[:, [1,3]]*scale + dy box_data[:len(box)] = box return image_data, box_data # 下面也是缩放图片，但是缩放比例为随机值，最后的尺寸仍为输入尺寸的整数倍 # resize image new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter) scale = rand(.25, 2) if new_ar < 1: nh = int(scale*h) nw = int(nh*new_ar) else: nw = int(scale*w) nh = int(nw/new_ar) image = image.resize((nw,nh), Image.BICUBIC) # place image dx = int(rand(0, w-nw)) dy = int(rand(0, h-nh)) new_image = Image.new('RGB', (w,h), (128,128,128)) new_image.paste(image, (dx, dy)) image = new_image # 随机决定是否翻转图片，概率为50% # flip image or not flip = rand()<.5 if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) # 下面的代码是对图像随机做亮度、饱和度，色调，颜色灰度扭曲之类的变换 # distort image hue = rand(-hue, hue) sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat) val = rand(1, val) if rand()<.5 else 1/rand(1, val) x = rgb_to_hsv(np.array(image)/255.) x[..., 0] += hue x[..., 0][x[..., 0]>1] -= 1 x[..., 0][x[..., 0]<0] += 1 x[..., 1] *= sat x[..., 2] *= val x[x>1] = 1 x[x<0] = 0 image_data = hsv_to_rgb(x) # numpy array, 0 to 1 # 对标签做相同的操作 # correct boxes box_data = np.zeros((max_boxes,5)) if len(box)>0: np.random.shuffle(box) box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy if flip: box[:, [0,2]] = w - box[:, [2,0]] box[:, 0:2][box[:, 0:2]<0] = 0 box[:, 2][box[:, 2]>w] = w box[:, 3][box[:, 3]>h] = h box_w = box[:, 2] - box[:, 0] box_h = box[:, 3] - box[:, 1] box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box if len(box)>max_boxes: box = box[:max_boxes] box_data[:len(box)] = box return image_data, box_data

train.py

""" Retrain the YOLO model for your own dataset. """ import numpy as np import keras.backend as K from keras.layers import Input, Lambda from keras.models import Model from keras.optimizers import Adam from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss from yolo3.utils import get_random_data def _main(): annotation_path = 'train.txt'#标签路径 log_dir = 'logs/000/'#权重的存储路径 classes_path = 'model_data/voc_classes.txt'#类别路径 anchors_path = 'model_data/yolo_anchors.txt'#anchor路径 class_names = get_classes(classes_path)#获取类别 num_classes = len(class_names)#类别数量 anchors = get_anchors(anchors_path)#获取anchor input_shape = (416,416) # multiple of 32, hw#输入尺寸(高，宽)，32的倍数 is_tiny_version = len(anchors)==6 # default setting#是否使用简化的网络 if is_tiny_version:#创建基于简化的网络的模型，create_tiny_model函数定义在下面，freeze_body用来计算要冻结的部分 model = create_tiny_model(input_shape, anchors, num_classes, freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5') else:#创建基于默认的网络的模型，create_model函数定义在下面 model = create_model(input_shape, anchors, num_classes, freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze #日志路径，在下面model.fit_generator()里面用到，作为callbacks的一部分 logging = TensorBoard(log_dir=log_dir) #定义断点，在下面model.fit_generator()里面用到，作为callbacks的一部分 checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) ''' ReduceLROnPlateau为库函数，是callbacks的一种（callbacks用于指定在每个epoch开始和结束的时候进行哪种特定操作）该函数用于更新学习率（lr指学习率），当指标停止提升时，降低学习速率 monitor：要监测的数量。 factor：学习速率降低的因子。new_lr = lr * factor patience：没有提升的epoch数，之后学习率将降低。 verbose：int。0：安静，1：更新消息。 ''' reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) # EarlyStopping也是callbacks的一种，可以通过在模型训练整个过程中截取保存结果最优的参数模型，防止过拟合 early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) val_split = 0.1#验证集的比例 with open(annotation_path) as f:#打开标签文件，读入标签 lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines)#打乱顺序 np.random.seed(None) num_val = int(len(lines)*val_split)#验证集的数量 num_train = len(lines) - num_val#训练集的数量 # 分两个阶段训练 # 第一阶段，冻结一些层进行训练 # Train with frozen layers first, to get a stable loss. # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. if True: # 编译Keras模型，优化器为Adam，初始学习率为10^-3，损失函数为yolo_loss（定义在yolo3/model.py中） model.compile(optimizer=Adam(lr=1e-3), loss={ # use custom yolo_loss Lambda layer. 'yolo_loss': lambda y_true, y_pred: y_pred}) # 批处理的大小，显存不够建议设小一点 batch_size = 32 print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) '''fit_generator功能与fit类似，不过它使用python生成器逐批生成的数据，按批次训练模型，防止内存不够 data_generator_wrapper的定义在下面 steps_per_epoch：每轮迭代的步数，最小为1 validation_data：验证集 validation_steps：停止前要验证的总步数 epochs：训练终止时的epoch值，训练将在达到该epoch值时停止，当没有设置当没有设置initial_epoch时，它就是训练的总轮数，否则训练的总轮数为epochs - inital_epoch initial_epoch: 开始训练的轮次，在继续之前的训练时有用 ''' model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train//batch_size), validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val//batch_size), epochs=50, initial_epoch=0, callbacks=[logging, checkpoint]) model.save_weights(log_dir + 'trained_weights_stage_1.h5')#保存训练后的权重 # 第二阶段，解冻再训练 # Unfreeze and continue training, to fine-tune. # Train longer if the result is not good. if True: for i in range(len(model.layers)): model.layers[i].trainable = True#trainable属性控制这个变量是否可以被优化器更新 # 编译Keras模型，优化器为Adam，初始学习率为10^-4，损失函数为yolo_loss（定义在yolo3/model.py中） model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change print('Unfreeze all of the layers.') batch_size = 32 # note that more GPU memory is required after unfreezing the body print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train//batch_size), validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val//batch_size), epochs=100, initial_epoch=50, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) model.save_weights(log_dir + 'trained_weights_final.h5')#保存训练后的权重 # Further training if needed. #读取所有的类别 def get_classes(classes_path): '''loads the classes''' with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #读取所有的anchor def get_anchors(anchors_path): '''loads the anchors from a file''' with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape(-1, 2) #创建训练模型，可以设置是否读取已经训练的权重 def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, weights_path='model_data/yolo_weights.h5'): '''create the training model''' K.clear_session() # get a new session image_input = Input(shape=(None, None, 3))#Input()用来实例化一个keras张量 h, w = input_shape#输入图像的高和宽 num_anchors = len(anchors)#anchor的个数 # 定义真实值 y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \ num_anchors//3, num_classes+5)) for l in range(3)] # yolo_body定义在yolo3/model.py中 model_body = yolo_body(image_input, num_anchors//3, num_classes) print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) if load_pretrained: #读取原先训练的权重 model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) print('Load weights {}.'.format(weights_path)) if freeze_body in [1, 2]: #冻结一些层（三个输出层除外），不让其被优化器更新 # Freeze darknet53 body or freeze all but 3 output layers. num = (185, len(model_body.layers)-3)[freeze_body-1] for i in range(num): model_body.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers))) # 定义损失函数，用到了yolo3/model.py中的yolo_loss model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( [*model_body.output, *y_true]) model = Model([model_body.input, *y_true], model_loss) return model #创建基于简化的网络结构的训练模型，与上面类似，只有model_body不一样 def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5'): '''create the training model, for Tiny YOLOv3''' K.clear_session() # get a new session image_input = Input(shape=(None, None, 3)) h, w = input_shape num_anchors = len(anchors) y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \ num_anchors//2, num_classes+5)) for l in range(2)] model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes) print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) if load_pretrained: model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) print('Load weights {}.'.format(weights_path)) if freeze_body in [1, 2]: # Freeze the darknet body or freeze all but 2 output layers. num = (20, len(model_body.layers)-2)[freeze_body-1] for i in range(num): model_body.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers))) model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})( [*model_body.output, *y_true]) model = Model([model_body.input, *y_true], model_loss) return model #自定义的生成器，用来分批读入数据 def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes): '''data generator for fit_generator''' n = len(annotation_lines) i = 0 while True: image_data = [] box_data = [] for b in range(batch_size): if i==0: np.random.shuffle(annotation_lines)#打乱一下顺序 # get_random_data定义在yolo3/utils.py中，用于对输入图像做一些处理（缩放、改颜色等） image, box = get_random_data(annotation_lines[i], input_shape, random=True) image_data.append(image)#加入处理后的数据（图像） box_data.append(box)#加入处理后的数据（标签） i = (i+1) % n image_data = np.array(image_data)#转成numpy的形式 box_data = np.array(box_data)#转成numpy的形式 # preprocess_true_boxes定义在yolo3/model.py中，用于将标签转换为真实值的形式，使之能与预测值相比较 y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) # 带有 yield 的函数在 Python 中被称之为 generator（生成器），可以用来节省内存 yield [image_data, *y_true], np.zeros(batch_size) #用来调用上面定义的data_generator，并在调用前加了个条件判断 def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes): n = len(annotation_lines) if n==0 or batch_size<=0: return None return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes) if __name__ == '__main__': _main()

Processed: 0.009, SQL: 9