先安装 pip install baidu-aip
文字识别
百度OCR接口
简单demo
from aip
import AipOcr
APP_ID
= 'xx'
API_KEY
= 'xxx'
SECRET_KEY
= 'xx'
options
= {
'language_type': 'CHN_ENG',
"detect_direction": False,
"detect_language": False,
"probability": False,
}
aipOcr
= AipOcr
(APP_ID
, API_KEY
, SECRET_KEY
)
file_path
= 'D:\\1.jpg'
with open(file_path
, 'rb') as f
:
img_data
= f
.read
()
def get_img_data(image
):
"""读取image二进制数据"""
imgByteArr
= io
.BytesIO
()
image
.save
(imgByteArr
, format='PNG')
return imgByteArr
.getvalue
()
image
= Image
.open(file_path
)
img_data
= get_img_data
(image
)
result
= aipOcr
.general
(img_data
, options
)
for item
in result
['words_result']:
print(item
['words'])
长图文字识别
def cut_image(image
):
"""切分图片"""
LH
= 50.1
TOP
= 4
LINES
= 20
cut_h
= LINES
* LH
- LH
/ 2 + TOP
image_list
= []
for i
in range(0, image
.size
[1]*10, int(cut_h
*10)):
i
= i
/ 10
crop_box
= image
.crop
((0, i
, image
.size
[0], i
+ cut_h
))
image_list
.append
(crop_box
)
return image_list
def img_to_words(img_data
):
"""识别文字,并保存"""
result
= aipOcr
.general
(img_data
, options
)
with open('D:\\1.txt', 'a', encoding
='utf-8') as f
:
for item
in result
['words_result']:
f
.write
(item
['words'] + '\n')
image_list
= [get_img_data
(img
) for img
in cut_image
(image
)]
for img_data
in image_list
:
img_to_words
(img_data
)
加上os.listdir可以一次访问文件夹下多个文件
for file_name
in os
.listdir
(dir_path
):
if os
.path
.splitext
(file_name
)[1] in ['jpg', 'png']:
print(file_name
)
姓名识别
使用自然语言处理(NLP)
百度NLP接口
可以识别词性
pos
词性含义词性含义词性含义词性含义
n普通名词f方位名词s处所名词t时间名词nr人名ns地名nt机构团体名nw作品名nz其他专名v普通动词vd动副词vn名动词a形容词ad副形词an名形词d副词m数量词q量词r代词p介词c连词u助词xc其他虚词w标点符号
ne
缩略词含义缩略词含义缩略词含义缩略词含义
PER人名LOC地名ORG机构名TIME时间
from aip
import AipNlp
APP_ID
= 'xx'
API_KEY
= 'xxx'
SECRET_KEY
= 'xx'
aipNlp
= AipNlp
(APP_ID
, API_KEY
, SECRET_KEY
)
text
= '我叫张大天,我有个弟弟叫小天'
result
= []
for i
in aipNlp
.lexer
(text
)['items']:
if i
['ne'] == 'PER' or i
['pos'] == 'nr':
result
.append
(i
['item'])
print(result
)
['张大天', '小天']
转载请注明原文地址:https://ipadbbs.8miu.com/read-61730.html