python pyaudio给数据加噪声
# -*- coding: utf-8 -*-
import argparse
import array
import math
import numpy
import numpy as np
import random
import wavedef get_args():parser = argparse.ArgumentParser()parser.add_argument("--clean_file", type=str, required=True)parser.add_argument("--noise_file", type=str, required=True)parser.add_argument("--output_mixed_file", type=str, default="", required=True)parser.add_argument("--output_clean_file", type=str, default="")parser.add_argument("--output_noise_file", type=str, default="")parser.add_argument("--snr", type=float, default="", required=True)args = parser.parse_args()return args# 根据干净音频的均方根和信噪比,计算调整后的噪声音频的均方根
def cal_adjusted_rms(clean_rms, snr):# 计算比例因子 aa = float(snr) / 20# 计算噪声水平的均方根noise_rms = clean_rms / (10 ** a)return noise_rms# 拿到音频文件的振幅数组
def cal_amp(wf):# 从音频文件中读取所有帧buffer = wf.readframes(wf.getnframes())# 保持较高的计算精度,直接转为 float64 会导致数据溢出amptitude = (np.frombuffer(buffer, dtype="int16")).astype(np.float64)return amptitude# 基于振幅数组计算音频的均方根值
def cal_rms(amp):# np.square() 平方return np.sqrt(np.mean(np.square(amp), axis=-1))# 保存wave文件
def save_waveform(output_path, params, amp):output_file = wave.Wave_write(output_path)output_file.setparams(params) # params 包含了干净音频的采样率、采样位数、通道数等信息# 先将振幅数据转换为 16 位整型,再转换为字节流,最后写入 waveoutput_file.writeframes(array.array("h", amp.astype(np.int16)).tobytes())output_file.close()if __name__ == "__main__":args = get_args()# 源文件 和 噪声文件clean_file = args.clean_filenoise_file = args.noise_fileclean_wav = wave.open(clean_file, "r")noise_wav = wave.open(noise_file, "r")print(clean_file)# 音频文件数组clean_amp = cal_amp(clean_wav)noise_amp = cal_amp(noise_wav)# 计算纯净音频的均方根clean_rms = cal_rms(clean_amp)# 随机选择噪声音频中与干净音频长度相同的一段进行切割# 计算切割后的噪声音频的均方根start = random.randint(0, len(noise_amp) - len(clean_amp))divided_noise_amp = noise_amp[start: start + len(clean_amp)]noise_rms = cal_rms(divided_noise_amp)# 根据干净音频的均方根和信噪比,计算调整后的噪声音频的均方根snr = args.snradjusted_noise_rms = cal_adjusted_rms(clean_rms, snr)# 将调整后的噪声音频与干净音频相加得到混合后的音频adjusted_noise_amp = divided_noise_amp * (adjusted_noise_rms / noise_rms)mixed_amp = (clean_amp + adjusted_noise_amp)# np.iinfo(np.int16).max 获取 np.int16 类型能够表示的最大值,并将其赋给变量 max_int16max_int16 = np.iinfo(np.int16).maxmin_int16 = np.iinfo(np.int16).minif mixed_amp.max(axis=0) > max_int16 or mixed_amp.min(axis=0) < min_int16:# 如果混合音频的最大值大于等于最小值的绝对值,则使用最大值的缩放因子,否则,使用最小值的缩放因子if mixed_amp.max(axis=0) >= abs(mixed_amp.min(axis=0)):reduction_rate = max_int16 / mixed_amp.max(axis=0)else:reduction_rate = min_int16 / mixed_amp.min(axis=0)mixed_amp = mixed_amp * (reduction_rate)clean_amp = clean_amp * (reduction_rate)# 保存添加噪声后的 wavsave_waveform(args.output_mixed_file, clean_wav.getparams(), mixed_amp)print('finish')
代码运行方式如下:
python3 speech_noise.py --clean_file data/source_clean/arctic_a0001.wav --noise_file data/source_noise/ch01.wav --output_mixed_file data/output_mixed/0.wav --snr 0