训练数据转换为PASCAL VOC2007

技术2022-07-11 91

数据

我们已有标注数据个数为json保存的，现在训练代码使用的标注格式为PASCAL VOC2007，为了不修改代码，将数据转换到PASCAL VOC2007的xml格式。

转换代码

import os import json from lxml.etree import Element, SubElement, tostring, ElementTree from xml.dom import minidom # 从文件夹下获取json文件名 def ReadFileDir(path): file = [] dirs = os.listdir(path) # 获取指定路径下的文件 for i in dirs: # 循环读取路径下的文件并筛选输出 if os.path.splitext(i)[1] == ".json": # 筛选json文件 file.append(i) return file # 加载json文件 def ReadJson(path): with open(path,'r') as load_f: load_dict = json.load(load_f) return load_dict # xml创建一个子项 def subElement(root, tag, text): ele = SubElement(root, tag) if text != "": ele.text = text return ele # 保存为xml文件，并格式化 def saveXML(root, filename, indent="\t", newl="\n", encoding="utf-8"): rawText = tostring(root) dom = minidom.parseString(rawText) with open(filename, 'w') as f: dom.writexml(f, "", indent, newl, encoding) # 基础信息保存在xml中 def make_xml(image_name, width, height): node_root = Element('annotation') subElement(node_root, "folder", "widerface") subElement(node_root, "filename", image_name) node_source = subElement(node_root, "source", "") subElement(node_source, "database", "wider face Database") subElement(node_source, "annotation", "PASCAL VOC2007") subElement(node_source, "image", "flickr") subElement(node_source, "flickrid", "-1") node_owner = subElement(node_root, "owner", "") subElement(node_owner, "flickrid", "yanyu") subElement(node_owner, "name", "yanyu") subElement(node_root, "segmented", "0") node_size = subElement(node_root, "size", "") subElement(node_size, "width", str(width)) subElement(node_size, "height", str(height)) subElement(node_size, "depth", "3") return node_root if __name__ == "__main__": path = '/home/q/train/Data/images/hxlx' save_xml_dir = 'xml' jsonfile = ReadFileDir(path) for i, v in enumerate(jsonfile): dict = ReadJson(os.path.join(path, v)) if 'photo_id' in dict: image_id = dict['photo_id'] image_name = image_id + ".jpg" else: continue if 'width' in dict: width = dict['width'] else: continue if 'height' in dict: height = dict['height'] else: continue node_root = make_xml(image_name, width, height) if 'croppers' in dict: croppers = dict['croppers'] for i, crop_dict in enumerate(croppers): print (crop_dict) xmin = int(crop_dict['x'] * width) ymin = int(crop_dict['y'] * height) xmax = int((crop_dict['x'] + crop_dict['width']) * width) ymax = int((crop_dict['x'] + crop_dict['height']) * height) node_object = subElement(node_root, "object", "") subElement(node_object, "name", crop_dict["cropper_type"]) subElement(node_object, "pose", 'Unspecified') subElement(node_object, "truncated", '1') subElement(node_object, "difficult", '0') node_bndbox = subElement(node_object, "bndbox", "") subElement(node_bndbox, "xmin", str(xmin)) subElement(node_bndbox, "ymin", str(ymin)) subElement(node_bndbox, "xmax", str(xmax)) subElement(node_bndbox, "ymax", str(ymax)) subElement(node_object, "has_lm", '0') # 保存xml文件 saveXML(node_root, os.path.join(save_xml_dir, image_id + ".xml"))

具体使用根据自己的数据来添加

创建img_list.txt

import os def ReadFileDir(path, pattern=".json"): file = [] pfile = [] dirs = os.listdir(path) for i in dirs: if os.path.splitext(i)[1] == pattern: file.append(i) pfile.append(os.path.join(path, i)) return file, pfile def RemoveFile(path): if os.path.exists(path): os.remove(path) else: print('no such file:%s' % path) if __name__ == '__main__': path = 'hxlx' _, pfile = ReadFileDir(path, '.xml') path = 'image_list.txt' RemoveFile(path) f = open(path, 'a') for i, file in enumerate(pfile): line = file.split('.')[0]+'.jpg ' + file f.write("{}\n".format(line)) f.close()

Processed: 0.011, SQL: 9