数据
我们已有标注数据个数为json保存的,现在训练代码使用的标注格式为PASCAL VOC2007,为了不修改代码,将数据转换到PASCAL VOC2007的xml格式。
转换代码
import os
import json
from lxml
.etree
import Element
, SubElement
, tostring
, ElementTree
from xml
.dom
import minidom
def ReadFileDir(path
):
file = []
dirs
= os
.listdir
(path
)
for i
in dirs
:
if os
.path
.splitext
(i
)[1] == ".json":
file.append
(i
)
return file
def ReadJson(path
):
with open(path
,'r') as load_f
:
load_dict
= json
.load
(load_f
)
return load_dict
def subElement(root
, tag
, text
):
ele
= SubElement
(root
, tag
)
if text
!= "":
ele
.text
= text
return ele
def saveXML(root
, filename
, indent
="\t", newl
="\n", encoding
="utf-8"):
rawText
= tostring
(root
)
dom
= minidom
.parseString
(rawText
)
with open(filename
, 'w') as f
:
dom
.writexml
(f
, "", indent
, newl
, encoding
)
def make_xml(image_name
, width
, height
):
node_root
= Element
('annotation')
subElement
(node_root
, "folder", "widerface")
subElement
(node_root
, "filename", image_name
)
node_source
= subElement
(node_root
, "source", "")
subElement
(node_source
, "database", "wider face Database")
subElement
(node_source
, "annotation", "PASCAL VOC2007")
subElement
(node_source
, "image", "flickr")
subElement
(node_source
, "flickrid", "-1")
node_owner
= subElement
(node_root
, "owner", "")
subElement
(node_owner
, "flickrid", "yanyu")
subElement
(node_owner
, "name", "yanyu")
subElement
(node_root
, "segmented", "0")
node_size
= subElement
(node_root
, "size", "")
subElement
(node_size
, "width", str(width
))
subElement
(node_size
, "height", str(height
))
subElement
(node_size
, "depth", "3")
return node_root
if __name__
== "__main__":
path
= '/home/q/train/Data/images/hxlx'
save_xml_dir
= 'xml'
jsonfile
= ReadFileDir
(path
)
for i
, v
in enumerate(jsonfile
):
dict = ReadJson
(os
.path
.join
(path
, v
))
if 'photo_id' in dict:
image_id
= dict['photo_id']
image_name
= image_id
+ ".jpg"
else:
continue
if 'width' in dict:
width
= dict['width']
else:
continue
if 'height' in dict:
height
= dict['height']
else:
continue
node_root
= make_xml
(image_name
, width
, height
)
if 'croppers' in dict:
croppers
= dict['croppers']
for i
, crop_dict
in enumerate(croppers
):
print (crop_dict
)
xmin
= int(crop_dict
['x'] * width
)
ymin
= int(crop_dict
['y'] * height
)
xmax
= int((crop_dict
['x'] + crop_dict
['width']) * width
)
ymax
= int((crop_dict
['x'] + crop_dict
['height']) * height
)
node_object
= subElement
(node_root
, "object", "")
subElement
(node_object
, "name", crop_dict
["cropper_type"])
subElement
(node_object
, "pose", 'Unspecified')
subElement
(node_object
, "truncated", '1')
subElement
(node_object
, "difficult", '0')
node_bndbox
= subElement
(node_object
, "bndbox", "")
subElement
(node_bndbox
, "xmin", str(xmin
))
subElement
(node_bndbox
, "ymin", str(ymin
))
subElement
(node_bndbox
, "xmax", str(xmax
))
subElement
(node_bndbox
, "ymax", str(ymax
))
subElement
(node_object
, "has_lm", '0')
saveXML
(node_root
, os
.path
.join
(save_xml_dir
, image_id
+ ".xml"))
具体使用根据自己的数据来添加
创建img_list.txt
import os
def ReadFileDir(path
, pattern
=".json"):
file = []
pfile
= []
dirs
= os
.listdir
(path
)
for i
in dirs
:
if os
.path
.splitext
(i
)[1] == pattern
:
file.append
(i
)
pfile
.append
(os
.path
.join
(path
, i
))
return file, pfile
def RemoveFile(path
):
if os
.path
.exists
(path
):
os
.remove
(path
)
else:
print('no such file:%s' % path
)
if __name__
== '__main__':
path
= 'hxlx'
_
, pfile
= ReadFileDir
(path
, '.xml')
path
= 'image_list.txt'
RemoveFile
(path
)
f
= open(path
, 'a')
for i
, file in enumerate(pfile
):
line
= file.split
('.')[0]+'.jpg ' + file
f
.write
("{}\n".format(line
))
f
.close
()
转载请注明原文地址:https://ipadbbs.8miu.com/read-12214.html