Pytorch学习第二天

    技术2026-04-07  12

    1.Dataloader与Dataset

    根据自己的数据创建自己的Dataset,例如:

    class RMBDataset(Dataset): def __init__(self, data_dir, transform=None): """ rmb面额分类任务的Dataset :param data_dir: str, 数据集所在路径 :param transform: torch.transform,数据预处理 """ self.label_name = {"1": 0, "100": 1} self.data_info = self.get_img_info(data_dir) # data_info存储所有图片路径和标签,在DataLoader中通过index读取样本 self.transform = transform def __getitem__(self, index): path_img, label = self.data_info[index] img = Image.open(path_img).convert('RGB') # 0~255 if self.transform is not None: img = self.transform(img) # 在这里做transform,转为tensor等等 return img, label def __len__(self): return len(self.data_info) @staticmethod #静态设置,可以不实例化 直接调用 def get_img_info(data_dir): data_info = list() for root, dirs, _ in os.walk(data_dir): # 遍历类别 for sub_dir in dirs: img_names = os.listdir(os.path.join(root, sub_dir)) img_names = list(filter(lambda x: x.endswith('.jpg'), img_names)) # 遍历图片 for i in range(len(img_names)): img_name = img_names[i] path_img = os.path.join(root, sub_dir, img_name) label = rmb_label[sub_dir] data_info.append((path_img, int(label))) return data_info

    2.transform–数据增强

    裁剪,旋转,对比度,亮度,填充等等

    transforms.CenterCrop() #中心旋转 transforms.RandomCrop(size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant') #随机裁剪 transforms.RandomRotation(degrees, resample=False, expand=False, center=None) #旋转 transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0) #调整亮度、对比度、饱和度和色相 transforms.RandomAffine(degrees, translate=None, scale=None, shear=None, resample=False, fillcolor=0) #仿射变换

    3.自定义transforms

    class AddPepperNoise(object): """增加椒盐噪声 Args: snr (float): Signal Noise Rate p (float): 概率值,依概率执行该操作 """ def __init__(self, snr, p=0.9): assert isinstance(snr, float) or (isinstance(p, float)) self.snr = snr self.p = p def __call__(self, img): """ Args: img (PIL Image): PIL Image Returns: PIL Image: PIL image. """ if random.uniform(0, 1) < self.p: img_ = np.array(img).copy() h, w, c = img_.shape signal_pct = self.snr noise_pct = (1 - self.snr) mask = np.random.choice((0, 1, 2), size=(h, w, 1), p=[signal_pct, noise_pct/2., noise_pct/2.]) mask = np.repeat(mask, c, axis=2) img_[mask == 1] = 255 # 盐噪声 img_[mask == 2] = 0 # 椒噪声 return Image.fromarray(img_.astype('uint8')).convert('RGB') else: return img
    Processed: 0.017, SQL: 9