图像数据集预处理-基于numpy、PIL等框架实现-转载自paddlepaddle官方

技术2022-07-11 133

数据预处理

在计算机视觉中，通常会对图像做一些随机的变化，产生相似但又不完全相同的样本。主要作用是扩大训练数据集，抑制过拟合，提升模型的泛化能力，常用的方法见下面的程序。

随机改变亮暗、对比度和颜色等

import numpy as np import cv2 from PIL import Image, ImageEnhance import random

随机改变亮暗、对比度和颜色等

def random_distort(img): # 随机改变亮度 def random_brightness(img, lower=0.5, upper=1.5): e = np.random.uniform(lower, upper) return ImageEnhance.Brightness(img).enhance(e) # 随机改变对比度 def random_contrast(img, lower=0.5, upper=1.5): e = np.random.uniform(lower, upper) return ImageEnhance.Contrast(img).enhance(e) # 随机改变颜色 def random_color(img, lower=0.5, upper=1.5): e = np.random.uniform(lower, upper) return ImageEnhance.Color(img).enhance(e) ops = [random_brightness, random_contrast, random_color] np.random.shuffle(ops) img = Image.fromarray(img) img = ops[0](img) img = ops[1](img) img = ops[2](img) img = np.asarray(img) return img

随机填充

def random_expand(img, gtboxes, max_ratio=4., fill=None, keep_ratio=True, thresh=0.5): if random.random() > thresh: return img, gtboxes if max_ratio < 1.0: return img, gtboxes h, w, c = img.shape ratio_x = random.uniform(1, max_ratio) if keep_ratio: ratio_y = ratio_x else: ratio_y = random.uniform(1, max_ratio) oh = int(h * ratio_y) ow = int(w * ratio_x) off_x = random.randint(0, ow - w) off_y = random.randint(0, oh - h) out_img = np.zeros((oh, ow, c)) if fill and len(fill) == c: for i in range(c): out_img[:, :, i] = fill[i] * 255.0 out_img[off_y:off_y + h, off_x:off_x + w, :] = img gtboxes[:, 0] = ((gtboxes[:, 0] * w) + off_x) / float(ow) gtboxes[:, 1] = ((gtboxes[:, 1] * h) + off_y) / float(oh) gtboxes[:, 2] = gtboxes[:, 2] / ratio_x gtboxes[:, 3] = gtboxes[:, 3] / ratio_y return out_img.astype('uint8'), gtboxes

随机裁剪

随机裁剪之前需要先定义两个函数，multi_box_iou_xywh和box_crop这两个函数将被保存在box_utils.py文件中。

import numpy as np def multi_box_iou_xywh(box1, box2): """ In this case, box1 or box2 can contain multi boxes. Only two cases can be processed in this method: 1, box1 and box2 have the same shape, box1.shape == box2.shape 2, either box1 or box2 contains only one box, len(box1) == 1 or len(box2) == 1 If the shape of box1 and box2 does not match, and both of them contain multi boxes, it will be wrong. """ assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4." assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4." b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 inter_x1 = np.maximum(b1_x1, b2_x1) inter_x2 = np.minimum(b1_x2, b2_x2) inter_y1 = np.maximum(b1_y1, b2_y1) inter_y2 = np.minimum(b1_y2, b2_y2) inter_w = inter_x2 - inter_x1 inter_h = inter_y2 - inter_y1 inter_w = np.clip(inter_w, a_min=0., a_max=None) inter_h = np.clip(inter_h, a_min=0., a_max=None) inter_area = inter_w * inter_h b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) return inter_area / (b1_area + b2_area - inter_area) def box_crop(boxes, labels, crop, img_shape): x, y, w, h = map(float, crop) im_w, im_h = map(float, img_shape) boxes = boxes.copy() boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, ( boxes[:, 0] + boxes[:, 2] / 2) * im_w boxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, ( boxes[:, 1] + boxes[:, 3] / 2) * im_h crop_box = np.array([x, y, x + w, y + h]) centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all( axis=1) boxes[:, :2] = np.maximum(boxes[:, :2], crop_box[:2]) boxes[:, 2:] = np.minimum(boxes[:, 2:], crop_box[2:]) boxes[:, :2] -= crop_box[:2] boxes[:, 2:] -= crop_box[:2] mask = np.logical_and(mask, (boxes[:, :2] < boxes[:, 2:]).all(axis=1)) boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1) labels = labels * mask.astype('float32') boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, ( boxes[:, 2] - boxes[:, 0]) / w boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, ( boxes[:, 3] - boxes[:, 1]) / h return boxes, labels, mask.sum() ## 随机裁剪 def random_crop(img, boxes, labels, scales=[0.3, 1.0], max_ratio=2.0, constraints=None, max_trial=50): if len(boxes) == 0: return img, boxes if not constraints: constraints = [(0.1, 1.0), (0.3, 1.0), (0.5, 1.0), (0.7, 1.0), (0.9, 1.0), (0.0, 1.0)] img = Image.fromarray(img) w, h = img.size crops = [(0, 0, w, h)] for min_iou, max_iou in constraints: for _ in range(max_trial): scale = random.uniform(scales[0], scales[1]) aspect_ratio = random.uniform(max(1 / max_ratio, scale * scale), \ min(max_ratio, 1 / scale / scale)) crop_h = int(h * scale / np.sqrt(aspect_ratio)) crop_w = int(w * scale * np.sqrt(aspect_ratio)) crop_x = random.randrange(w - crop_w) crop_y = random.randrange(h - crop_h) crop_box = np.array([[(crop_x + crop_w / 2.0) / w, (crop_y + crop_h / 2.0) / h, crop_w / float(w), crop_h / float(h)]]) iou = multi_box_iou_xywh(crop_box, boxes) if min_iou <= iou.min() and max_iou >= iou.max(): crops.append((crop_x, crop_y, crop_w, crop_h)) break while crops: crop = crops.pop(np.random.randint(0, len(crops))) crop_boxes, crop_labels, box_num = box_crop(boxes, labels, crop, (w, h)) if box_num < 1: continue img = img.crop((crop[0], crop[1], crop[0] + crop[2], crop[1] + crop[3])).resize(img.size, Image.LANCZOS) img = np.asarray(img) return img, crop_boxes, crop_labels img = np.asarray(img) return img, boxes, labels

随机缩放

def random_interp(img, size, interp=None): interp_method = [ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4, ] if not interp or interp not in interp_method: interp = interp_method[random.randint(0, len(interp_method) - 1)] h, w, _ = img.shape im_scale_x = size / float(w) im_scale_y = size / float(h) img = cv2.resize( img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=interp) return img

随机翻转

def random_flip(img, gtboxes, thresh=0.5): if random.random() > thresh: img = img[:, ::-1, :] gtboxes[:, 0] = 1.0 - gtboxes[:, 0] return img, gtboxes

随机打乱真实框排列顺序

# 随机打乱真实框排列顺序 def shuffle_gtbox(gtbox, gtlabel): gt = np.concatenate( [gtbox, gtlabel[:, np.newaxis]], axis=1) idx = np.arange(gt.shape[0]) np.random.shuffle(idx) gt = gt[idx, :] return gt[:, :4], gt[:, 4]

图像增广方法

# 图像增广方法汇总 def image_augment(img, gtboxes, gtlabels, size, means=None): # 随机改变亮暗、对比度和颜色等 img = random_distort(img) # 随机填充 img, gtboxes = random_expand(img, gtboxes, fill=means) # 随机裁剪 img, gtboxes, gtlabels, = random_crop(img, gtboxes, gtlabels) # 随机缩放 img = random_interp(img, size) # 随机翻转 img, gtboxes = random_flip(img, gtboxes) # 随机打乱真实框排列顺序 gtboxes, gtlabels = shuffle_gtbox(gtboxes, gtlabels) return img.astype('float32'), gtboxes.astype('float32'), gtlabels.astype('int32') img, gt_boxes, gt_labels, scales = get_img_data_from_file(record) size = 512 img, gt_boxes, gt_labels = image_augment(img, gt_boxes, gt_labels, size)

Processed: 0.012, SQL: 9