语音识别flask接口开发

要开发一个flask语音识别接口，首先要解决语音文件在网络中的传输问题，然后选识别算法进行识别

文章目录

1、以二进制文件流方式上次语音
2、网页端长连接流式上传语音文件
3、语音识别接口

1、以二进制文件流方式上次语音

python服务端代码，以flask.request.files接收前端的语音上传请求

from flask import Flask, request
import io
import wave
import os
import jsonapp = Flask(__name__)@app.route('/upload_audio', methods=['POST'])
def upload_audio():"""#接收语音文件并保存为.wav格式的文件#:return:"""f_obj = request.files.get("file", None)if f_obj is None:return json.dumps({'status': 1, 'msg': 'No audio was received.', 'result': ''})else:audio_data = f_obj.read()with open('output.wav', 'ab') as f:f.write(audio_data)  # 追加写入音频数据return json.dumps({'status': 0, 'msg': '', 'result': 'receive audio success.'})if __name__ == '__main__':socketio.run(app, port=8200, debug=True)

前端请求代码示例如下：

import requests
import timedef post_audio():"""上传语音文件:return:"""url = "http://localhost:8200/upload_audio"files = {'file': open('./c1.wav', 'rb')}t1 = time.time()r = requests.post(url, files=files)t2 = time.time()print("comsume time: %f s"%(t2-t1))if r.json()['status']:print(r.json()['msg'])else:response = r.json()['result']print(response)if __name__ == '__main__':post_audio()

2、网页端长连接流式上传语音文件

python服务端代码，接收网页端发来的语音片段并保存为.wav格式的语音文件，方便后面的语音识别

from flask import Flask, request, render_template
from flask_socketio import SocketIO, emit
import json
import base64app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret'
socketio = SocketIO(app, async_mode='eventlet')# In-memory storage for the audio chunks
audio_chunks = []@app.route('/')
def index():return render_template('index.html')@socketio.on('audio_chunk')
def handle_audio_chunk(data):global audio_chunksaudio_chunks.append(data)# Optionally, you can write each chunk to a file here if you prefer not to keep it in memorywith open('audio_chunk.wav', 'ab') as f:f.write(base64.b64decode(data))@socketio.on('audio_end')
def handle_audio_end():global audio_chunksif audio_chunks:print("开始保存语音文件")with open('uploaded_audio.wav', 'ab') as f:f.write(base64.b64decode(audio_chunks[0]))print("服务端保存语音文件完成")audio_chunks = []  # Clear the chunks listemit('audio_saved', {'message': 'Audio saved successfully!'})@socketio.on('connect')
def connected_msg():"""socket client event - connected"""print('客户端连接成功，client connected!')@socketio.on('disconnect')
def disconnect_msg():"""socket client event - disconnected"""print('客户端断开连接，client disconnected!')if __name__ == '__main__':socketio.run(app, port=8200, debug=True)

前端html及JavaScript代码如下：

<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Audio Stream Upload</title><script src="https://cdn.socket.io/4.0.0/socket.io.min.js"></script>
</head>
<body><h1>Upload Audio Stream</h1><button id="start-recording">Start Recording</button><button id="stop-recording" disabled>Stop Recording</button><p id="status"></p><script>const socket = io.connect('http://localhost:8200');let mediaRecorder;let audioChunks = [];document.getElementById('start-recording').addEventListener('click', async () => {const stream = await navigator.mediaDevices.getUserMedia({ audio: true });mediaRecorder = new MediaRecorder(stream);mediaRecorder.ondataavailable = event => {audioChunks.push(event.data);// Send the chunk to the server (convert to ArrayBuffer first)const buffer = event.data.arrayBuffer();buffer.then(arrayBuffer => {const base64String = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));socket.emit('audio_chunk', base64String);});};mediaRecorder.start();document.getElementById('start-recording').disabled = true;document.getElementById('stop-recording').disabled = false;document.getElementById('status').textContent = 'Recording...';mediaRecorder.onstop = () => {// Inform the server that the audio stream has endedsocket.emit('audio_end');document.getElementById('start-recording').disabled = false;document.getElementById('stop-recording').disabled = true;document.getElementById('status').textContent = 'Recording stopped. Waiting for server response...';};});document.getElementById('stop-recording').addEventListener('click', () => {mediaRecorder.stop();});socket.on('audio_saved', data => {document.getElementById('status').textContent = data.message;});</script>
</body>
</html>

启动python服务，浏览器访问http://localhost:8200/就可以看到如下网页：
在这里插入图片描述

3、语音识别接口

语音识别算法这里选择openai的开源项目：whisper，项目地址：https://github.com/openai/whisper

安装
pip install -U openai-whisper
还需要在终端安装ffmpeg，sudo apt update && sudo apt install ffmpeg

flask服务端代码如下：

import os
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
from flask import Flask, request
import io
import wave
import json
import whisperapp = Flask(__name__)
model = whisper.load_model("turbo")  # or your model@app.route('/audio_rec', methods=['POST'])
def audio_recognize():"""#接收语音文件并用whisper语音识别算法进行语音识别#:return:"""f_obj = request.files.get("file", None)if f_obj is None:return json.dumps({'status': 1, 'msg': 'No audio was received.', 'result': ''})else:save_path = "temp.wav"audio_data = f_obj.read()with open(save_path, 'ab') as f:f.write(audio_data)  # 追加写入音频数据result = model.transcribe(save_path)return json.dumps({'status': 0, 'msg': '', 'result': result["text"]})if __name__ == '__main__':socketio.run(app, port=8200, debug=True)