目录
一、引入所需要的包二、定义相关常量三、生成请求url四、音频相关1. 首先使用pyaudio获取音频流2. 从音频流中获取音频并上传到接口
五、语音识别接口调用六、返回结果处理七、语音识别类八、调用附录
一、引入所需要的包
import hashlib
import base64
import hmac
import json
from urllib
.parse
import urlencode
import logging
from wsgiref
.handlers
import format_date_time
import datetime
from datetime
import datetime
import time
from time
import mktime
import _thread
as thread
import pyaudio
from ws4py
.client
.threadedclient
import WebSocketClient
logging
.basicConfig
(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level
=logging
.INFO
)
二、定义相关常量
STATUS_FIRST_FRAME
= 0
STATUS_CONTINUE_FRAME
= 1
STATUS_LAST_FRAME
= 2
CHUNK
= 1024
FORMAT
= pyaudio
.paInt16
CHANNELS
= 1
RATE
= 16000
三、生成请求url
class WsParam(object):
def __init__(self
, APPId
, APIKey
, APISecret
, AudioFile
):
self
.APPId
= APPId
self
.APIKey
= APIKey
self
.APISecret
= APISecret
self
.AudioFile
= AudioFile
self
.CommonArgs
= {
'app_id': self
.APPId
}
self
.BusinessArgs
= {
'domain': 'iat',
'language': 'zh_cn',
'accent': 'mandarin',
'vinfo': 1,
'vad_eos': 10000,
'dwa': 'wpgs',
'ptt': 0
}
def create_url(self
):
url
= 'wss://ws-api.xfyun.cn/v2/iat'
now
= datetime
.now
()
date
= format_date_time
(mktime
(now
.timetuple
()))
signature_origin
= 'host: ' + 'ws-api.xfyun.cn' + '\n'
signature_origin
+= 'date: ' + date
+ '\n'
signature_origin
+= 'GET ' + '/v2/iat ' + 'HTTP/1.1'
signature_sha
= hmac
.new
(self
.APISecret
.encode
('utf-8'), signature_origin
.encode
('utf-8'),
digestmod
=hashlib
.sha256
).digest
()
signature_sha
= base64
.b64encode
(signature_sha
).decode
(encoding
='utf-8')
authorization_origin
= 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % (
self
.APIKey
, 'hmac-sha256', 'host date request-line', signature_sha
)
authorization
= base64
.b64encode
(authorization_origin
.encode
('utf-8')).decode
(encoding
='utf-8')
v
= {
'authorization': authorization
,
'date': date
,
'host': 'ws-api.xfyun.cn'
}
url
= url
+ '?' + urlencode
(v
)
return url
四、音频相关
1. 首先使用pyaudio获取音频流
audio
= pyaudio
.PyAudio
()
stream
= audio
.open(format=FORMAT
,
channels
=CHANNELS
,
rate
=RATE
,
input=True)
2. 从音频流中获取音频并上传到接口
while True:
buf
= stream
.read
()
d
= {'common': self
.ws_param
.CommonArgs
,
'business': self
.ws_param
.BusinessArgs
,
'data': {'status': 0, 'format': 'audio/L16;rate=16000',
'audio': str(base64
.b64encode
(buf
), 'utf-8'),
'encoding': 'raw'}}
d
= json
.dumps
(d
)
self
.send
(d
)
五、语音识别接口调用
def opened(self
):
def run(*args
):
interval
= 0.04
status
= STATUS_FIRST_FRAME
audio
= pyaudio
.PyAudio
()
stream
= audio
.open(format=FORMAT
,
channels
=CHANNELS
,
rate
=RATE
,
input=True)
while True:
buf
= stream
.read
(CHUNK
)
if status
== STATUS_FIRST_FRAME
:
d
= {'common': self
.ws_param
.CommonArgs
,
'business': self
.ws_param
.BusinessArgs
,
'data': {'status': 0, 'format': 'audio/L16;rate=16000',
'audio': str(base64
.b64encode
(buf
), 'utf-8'),
'encoding': 'raw'}}
d
= json
.dumps
(d
)
self
.send
(d
)
status
= STATUS_CONTINUE_FRAME
elif status
== STATUS_CONTINUE_FRAME
:
d
= {'data': {'status': 1, 'format': 'audio/L16;rate=16000',
'audio': str(base64
.b64encode
(buf
), 'utf-8'),
'encoding': 'raw'}}
self
.send
(json
.dumps
(d
))
elif status
== STATUS_LAST_FRAME
:
d
= {'data': {'status': 2, 'format': 'audio/L16;rate=16000',
'audio': str(base64
.b64encode
(buf
), 'utf-8'),
'encoding': 'raw'}}
self
.send
(json
.dumps
(d
))
logging
.info
('录音结束')
time
.sleep
(1)
stream
.stop_stream
()
stream
.close
()
audio
.terminate
()
break
time
.sleep
(interval
)
self
.closed
(1000, '')
thread
.start_new_thread
(run
, ())
六、返回结果处理
def received_message(self
, message
):
message
= message
.__str__
()
try:
code
= json
.loads
(message
)['code']
sid
= json
.loads
(message
)['sid']
status
= json
.loads
(message
)['data']['status']
if code
!= 0:
err_msg
= json
.loads
(message
)['message']
logging
.error
('sid:%s call error:%s code is:%s' % (sid
, err_msg
, code
))
else:
data
= json
.loads
(message
)['data']['result']
ws
= data
['ws']
pgs
= data
['pgs']
sn
= data
['sn']
result
= ''
for i
in ws
:
for w
in i
['cw']:
result
+= w
['w']
if pgs
== 'rpl':
rg
= data
['rg']
self
.rec_text
.update
({rg
[0]: result
})
for i
in range(rg
[0] + 1, rg
[1]):
self
.rec_text
.pop
(i
, '404')
else:
self
.rec_text
[sn
] = result
logging
.info
('识别结果为: {}'.format(self
.rec_text
))
except Exception
as e
:
logging
.info
(message
)
logging
.error
('receive msg,but parse exception: {}'.format(e
))
七、语音识别类
class RecognitionWebsocket(WebSocketClient
):
def __init__(self
, url
, ws_param
):
super().__init__
(url
)
self
.ws_param
= ws_param
self
.rec_text
= {}
def received_message(self
, message
):
...
def on_error(self
, error
):
logging
.error
(error
)
def closed(self
, code
, reason
=None):
logging
.info
('语音识别通道关闭' + str(code
) + str(reason
))
def opened(self
):
...
八、调用
ws_param
= WsParam
(APPId
='', APIKey
='', APISecret
='', AudioFile
=r
'')
ws_url
= ws_param
.create_url
()
ws
= RecognitionWebsocket
(ws_url
, ws_param
)
ws
.connect
()
ws
.run_forever
()
附录
自动停止录音的实现可以参考python录音实现自动结束录音