CODE
- python端的绑定和本文一样,还需要将cdef char* LANGUAGE = b’en’改为中文zh(也可以在函数中配置一个参数修改这个值)。
- ps:本来想尝试
cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)
然后进行调用,但是发现最新版的whisper.h没有这个API了,所以先不加了。
import pyaudio
import wave
import struct
import sys
import numpy as npimport pyqtgraph as pg
from PyQt5 import QtWidgets
from PyQt5.QtCore import Qtfrom whispercpp import Whisper# Audio Format (check Audio MIDI Setup if on Mac)
FORMAT = pyaudio.paInt16
RATE = 16000
CHANNELS = 2# Set Plot Range [-RANGE,RANGE], default is nyquist/2
RANGE = None
if not RANGE:RANGE = RATE/2# Set these parameters (How much data to plot per FFT)
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)# Which Channel? (L or R)
LR = "l"class SpectrumAnalyzer():def __init__(self):self.pa = pyaudio.PyAudio()self.initMicrophone()self.initUI()def find_input_device(self):device_index = None for i in range(self.pa.get_device_count()): devinfo = self.pa.get_device_info_by_index(i)if devinfo["name"].lower() in ["mic","input"]:device_index = ireturn device_indexdef initMicrophone(self):device_index = self.find_input_device()self.stream = self.pa.open( format = FORMAT,channels = CHANNELS,rate = RATE,input = True,input_device_index = device_index,frames_per_buffer = INPUT_FRAMES_PER_BLOCK)def readData(self):block = self.stream.read(INPUT_FRAMES_PER_BLOCK)count = len(block)/2format = "%dh"%(count)shorts = struct.unpack( format, block )if CHANNELS == 1:return np.array(shorts)else:l = shorts[::2]r = shorts[1::2]if LR == 'l':return np.array(l)else:return np.array(r)def initUI(self):self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])self.app.quitOnLastWindowClosed()self.mainWindow = QtWidgets.QMainWindow()self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)self.mainWindow.setWindowTitle("Spectrum Analyzer")self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)self.centralWid = QtWidgets.QWidget()self.mainWindow.setCentralWidget(self.centralWid)self.lay = QtWidgets.QVBoxLayout()self.centralWid.setLayout(self.lay)# Add a buttonself.button_start = QtWidgets.QPushButton("Start Record Audio")self.button_start.clicked.connect(self.Button_Start)self.lay.addWidget(self.button_start)self.button_end = QtWidgets.QPushButton("whisper Init")self.whisper = Noneself.is_whisper_inited = Falseself.button_end.clicked.connect(self.Button_Whisper)self.lay.addWidget(self.button_end)self.button = QtWidgets.QPushButton("TRANS AUDIO")self.button.clicked.connect(self.Button_TransAudio)self.lay.addWidget(self.button)# Add a text labelself.label = QtWidgets.QLabel("Text will appear here:")self.lay.addWidget(self.label)# Add a QLineEditself.text_field = QtWidgets.QLineEdit()self.text_field.setFixedSize(280, 200)self.lay.addWidget(self.text_field)self.specWid = pg.PlotWidget(name="spectrum")self.specItem = self.specWid.getPlotItem()self.specItem.setMouseEnabled(y=False)self.specItem.setYRange(0,1000)self.specItem.setXRange(-RANGE,RANGE, padding=0)self.specAxis = self.specItem.getAxis("bottom")self.specAxis.setLabel("Frequency [Hz]")self.lay.addWidget(self.specWid)self.mainWindow.show()self.app.aboutToQuit.connect(self.close)def onButtonClick(self):self.label.setText("Whisper res is:")self.text_field.setText("Hello")def Button_Whisper(self):self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin")self.is_whisper_inited = Trueself.text_field.setText("Whisper INITED")def Button_TransAudio(self):result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3")print(123)text = self.whisper.extract_text(result)self.text_field.setText(str(text))def Button_Start(self):self.label.setText("Whisper res is:")self.text_field.setText("Start ---")# 录制音频frames = []sample_rate = 16000duration = 5for i in range(0, int(sample_rate / 1024 * duration)):data = self.stream.read(1024)frames.append(data)# 将录制的音频保存为wav文件with wave.open("output.wav", 'wb') as wf:wf.setnchannels(CHANNELS) # 2wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2wf.setframerate(sample_rate)wf.writeframes(b''.join(frames))self.text_field.setText("保存为wav文件")def close(self):self.stream.close()sys.exit()def get_spectrum(self, data):T = 1.0/RATEN = data.shape[0]Pxx = (1./N)*np.fft.fft(data)f = np.fft.fftfreq(N,T)Pxx = np.fft.fftshift(Pxx)f = np.fft.fftshift(f)return f.tolist(), (np.absolute(Pxx)).tolist()def mainLoop(self):while 1:# Sometimes Input overflowed because of mouse events, ignore thistry:data = self.readData()except IOError:continuef, Pxx = self.get_spectrum(data)self.specItem.plot(x=f,y=Pxx, clear=True)QtWidgets.QApplication.processEvents()if __name__ == '__main__':sa = SpectrumAnalyzer()sa.mainLoop()