1442 lines
51 KiB
Python
1442 lines
51 KiB
Python
"""
|
||
音频工具类
|
||
提供音频处理和文件操作功能
|
||
"""
|
||
|
||
import numpy as np
|
||
import wave
|
||
import struct
|
||
from typing import Dict, Any, List
|
||
import os
|
||
|
||
class AudioUtils:
|
||
"""
|
||
音频工具类
|
||
提供音频处理和文件操作功能
|
||
"""
|
||
|
||
def __init__(self, plugin):
|
||
"""
|
||
初始化音频工具类
|
||
|
||
Args:
|
||
plugin: 语音识别和合成插件实例
|
||
"""
|
||
self.plugin = plugin
|
||
self.sample_rate = plugin.config.get('sample_rate', 16000)
|
||
self.channels = plugin.config.get('channels', 1)
|
||
self.sample_width = 2 # 16位音频
|
||
|
||
print("✓ 音频工具类已创建")
|
||
|
||
def save_audio_file(self, filename: str, audio_data: bytes) -> bool:
|
||
"""
|
||
保存音频文件
|
||
|
||
Args:
|
||
filename: 文件名
|
||
audio_data: 音频数据
|
||
|
||
Returns:
|
||
是否保存成功
|
||
"""
|
||
try:
|
||
# 确保目录存在
|
||
os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
|
||
|
||
# 保存音频数据到文件
|
||
with open(filename, 'wb') as f:
|
||
f.write(audio_data)
|
||
|
||
print(f"✓ 音频文件已保存: {filename}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"✗ 保存音频文件失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def load_audio_file(self, filename: str) -> bytes:
|
||
"""
|
||
加载音频文件
|
||
|
||
Args:
|
||
filename: 文件名
|
||
|
||
Returns:
|
||
音频数据
|
||
"""
|
||
try:
|
||
with open(filename, 'rb') as f:
|
||
audio_data = f.read()
|
||
print(f"✓ 音频文件已加载: {filename}")
|
||
return audio_data
|
||
|
||
except Exception as e:
|
||
print(f"✗ 加载音频文件失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return b''
|
||
|
||
def save_wav_file(self, filename: str, audio_data: np.ndarray, sample_rate: int = None) -> bool:
|
||
"""
|
||
保存WAV音频文件
|
||
|
||
Args:
|
||
filename: 文件名
|
||
audio_data: 音频数据(numpy数组)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
是否保存成功
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 确保目录存在
|
||
os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
|
||
|
||
# 确保音频数据在合法范围内
|
||
audio_data = np.clip(audio_data, -1.0, 1.0)
|
||
|
||
# 转换为16位整数
|
||
audio_int16 = (audio_data * 32767).astype(np.int16)
|
||
|
||
# 保存为WAV文件
|
||
with wave.open(filename, 'w') as wav_file:
|
||
wav_file.setnchannels(self.channels)
|
||
wav_file.setsampwidth(self.sample_width) # 16位 = 2字节
|
||
wav_file.setframerate(sample_rate)
|
||
wav_file.writeframes(audio_int16.tobytes())
|
||
|
||
print(f"✓ WAV文件已保存: {filename}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"✗ 保存WAV文件失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def load_wav_file(self, filename: str) -> Dict[str, Any]:
|
||
"""
|
||
加载WAV音频文件
|
||
|
||
Args:
|
||
filename: 文件名
|
||
|
||
Returns:
|
||
包含音频数据和参数的字典
|
||
"""
|
||
try:
|
||
with wave.open(filename, 'r') as wav_file:
|
||
# 获取音频参数
|
||
channels = wav_file.getnchannels()
|
||
sample_width = wav_file.getsampwidth()
|
||
sample_rate = wav_file.getframerate()
|
||
frames = wav_file.getnframes()
|
||
|
||
# 读取音频数据
|
||
audio_data = wav_file.readframes(frames)
|
||
|
||
# 转换为numpy数组
|
||
if sample_width == 1:
|
||
# 8位音频
|
||
audio_array = np.frombuffer(audio_data, dtype=np.uint8)
|
||
audio_array = (audio_array.astype(np.float32) - 128) / 128.0
|
||
elif sample_width == 2:
|
||
# 16位音频
|
||
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||
audio_array = audio_array.astype(np.float32) / 32768.0
|
||
elif sample_width == 4:
|
||
# 32位音频
|
||
audio_array = np.frombuffer(audio_data, dtype=np.int32)
|
||
audio_array = audio_array.astype(np.float32) / 2147483648.0
|
||
else:
|
||
print(f"✗ 不支持的采样宽度: {sample_width}")
|
||
return {}
|
||
|
||
print(f"✓ WAV文件已加载: {filename}")
|
||
return {
|
||
'audio_data': audio_array,
|
||
'sample_rate': sample_rate,
|
||
'channels': channels,
|
||
'sample_width': sample_width,
|
||
'frames': frames
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"✗ 加载WAV文件失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return {}
|
||
|
||
def convert_audio_format(self, audio_data: np.ndarray, from_format: str, to_format: str) -> np.ndarray:
|
||
"""
|
||
转换音频格式
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
from_format: 源格式
|
||
to_format: 目标格式
|
||
|
||
Returns:
|
||
转换后的音频数据
|
||
"""
|
||
try:
|
||
# 这里简化处理,实际项目中可能需要更复杂的格式转换
|
||
print(f"✓ 音频格式已转换: {from_format} -> {to_format}")
|
||
return audio_data
|
||
except Exception as e:
|
||
print(f"✗ 音频格式转换失败: {e}")
|
||
return audio_data
|
||
|
||
def resample_audio(self, audio_data: np.ndarray, from_rate: int, to_rate: int) -> np.ndarray:
|
||
"""
|
||
重采样音频数据
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
from_rate: 源采样率
|
||
to_rate: 目标采样率
|
||
|
||
Returns:
|
||
重采样后的音频数据
|
||
"""
|
||
try:
|
||
if from_rate == to_rate:
|
||
return audio_data
|
||
|
||
# 计算重采样比例
|
||
ratio = to_rate / from_rate
|
||
new_length = int(len(audio_data) * ratio)
|
||
|
||
# 简单的线性插值(实际项目中应该使用更高质量的重采样算法)
|
||
if new_length > 0:
|
||
indices = np.linspace(0, len(audio_data) - 1, new_length)
|
||
resampled_data = np.interp(indices, np.arange(len(audio_data)), audio_data)
|
||
print(f"✓ 音频已重采样: {from_rate}Hz -> {to_rate}Hz")
|
||
return resampled_data
|
||
else:
|
||
return np.array([], dtype=np.float32)
|
||
|
||
except Exception as e:
|
||
print(f"✗ 音频重采样失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_gain(self, audio_data: np.ndarray, gain_db: float) -> np.ndarray:
|
||
"""
|
||
应用增益到音频数据
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
gain_db: 增益值(分贝)
|
||
|
||
Returns:
|
||
应用增益后的音频数据
|
||
"""
|
||
try:
|
||
# 将分贝转换为线性增益
|
||
gain_linear = 10 ** (gain_db / 20.0)
|
||
amplified_data = audio_data * gain_linear
|
||
|
||
# 防止削波
|
||
amplified_data = np.clip(amplified_data, -1.0, 1.0)
|
||
|
||
print(f"✓ 音频增益已应用: {gain_db}dB")
|
||
return amplified_data
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用音频增益失败: {e}")
|
||
return audio_data
|
||
|
||
def normalize_audio(self, audio_data: np.ndarray) -> np.ndarray:
|
||
"""
|
||
归一化音频数据
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
|
||
Returns:
|
||
归一化后的音频数据
|
||
"""
|
||
try:
|
||
if len(audio_data) == 0:
|
||
return audio_data
|
||
|
||
# 计算最大幅度
|
||
max_amplitude = np.max(np.abs(audio_data))
|
||
|
||
if max_amplitude > 0:
|
||
normalized_data = audio_data / max_amplitude
|
||
print("✓ 音频已归一化")
|
||
return normalized_data
|
||
else:
|
||
return audio_data
|
||
|
||
except Exception as e:
|
||
print(f"✗ 音频归一化失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_fade_in(self, audio_data: np.ndarray, fade_duration: float, sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用淡入效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
fade_duration: 淡入时长(秒)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
应用淡入效果后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
fade_samples = int(fade_duration * sample_rate)
|
||
if fade_samples <= 0 or fade_samples >= len(audio_data):
|
||
return audio_data
|
||
|
||
# 创建淡入曲线
|
||
fade_curve = np.linspace(0, 1, fade_samples)
|
||
|
||
# 应用淡入效果
|
||
faded_data = audio_data.copy()
|
||
faded_data[:fade_samples] *= fade_curve
|
||
|
||
print(f"✓ 淡入效果已应用: {fade_duration}秒")
|
||
return faded_data
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用淡入效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_fade_out(self, audio_data: np.ndarray, fade_duration: float, sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用淡出效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
fade_duration: 淡出时长(秒)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
应用淡出效果后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
fade_samples = int(fade_duration * sample_rate)
|
||
if fade_samples <= 0 or fade_samples >= len(audio_data):
|
||
return audio_data
|
||
|
||
# 创建淡出曲线
|
||
fade_curve = np.linspace(1, 0, fade_samples)
|
||
|
||
# 应用淡出效果
|
||
faded_data = audio_data.copy()
|
||
faded_data[-fade_samples:] *= fade_curve
|
||
|
||
print(f"✓ 淡出效果已应用: {fade_duration}秒")
|
||
return faded_data
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用淡出效果失败: {e}")
|
||
return audio_data
|
||
|
||
def concatenate_audio(self, audio_segments: List[np.ndarray]) -> np.ndarray:
|
||
"""
|
||
连接多个音频片段
|
||
|
||
Args:
|
||
audio_segments: 音频片段列表
|
||
|
||
Returns:
|
||
连接后的音频数据
|
||
"""
|
||
try:
|
||
if not audio_segments:
|
||
return np.array([], dtype=np.float32)
|
||
|
||
# 连接所有音频片段
|
||
concatenated = np.concatenate(audio_segments)
|
||
print(f"✓ {len(audio_segments)}个音频片段已连接")
|
||
return concatenated
|
||
|
||
except Exception as e:
|
||
print(f"✗ 连接音频片段失败: {e}")
|
||
return np.array([], dtype=np.float32)
|
||
|
||
def split_audio(self, audio_data: np.ndarray, segment_duration: float, sample_rate: int = None) -> List[np.ndarray]:
|
||
"""
|
||
分割音频数据为多个片段
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
segment_duration: 片段时长(秒)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
音频片段列表
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
segment_samples = int(segment_duration * sample_rate)
|
||
if segment_samples <= 0:
|
||
return [audio_data]
|
||
|
||
# 分割音频数据
|
||
segments = []
|
||
for i in range(0, len(audio_data), segment_samples):
|
||
segment = audio_data[i:i + segment_samples]
|
||
segments.append(segment)
|
||
|
||
print(f"✓ 音频已分割为{len(segments)}个片段")
|
||
return segments
|
||
|
||
except Exception as e:
|
||
print(f"✗ 分割音频失败: {e}")
|
||
return [audio_data]
|
||
|
||
def calculate_audio_level(self, audio_data: np.ndarray) -> float:
|
||
"""
|
||
计算音频级别(RMS)
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
|
||
Returns:
|
||
音频级别 (0.0-1.0)
|
||
"""
|
||
try:
|
||
if len(audio_data) == 0:
|
||
return 0.0
|
||
|
||
# 计算RMS值
|
||
rms = np.sqrt(np.mean(audio_data ** 2))
|
||
return float(rms)
|
||
|
||
except Exception as e:
|
||
print(f"✗ 计算音频级别失败: {e}")
|
||
return 0.0
|
||
|
||
def detect_silence(self, audio_data: np.ndarray, threshold: float = 0.01,
|
||
min_silence_duration: float = 0.1, sample_rate: int = None) -> List[Dict[str, float]]:
|
||
"""
|
||
检测音频中的静音段
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
threshold: 静音阈值
|
||
min_silence_duration: 最小静音时长(秒)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
静音段列表 [{'start': start_time, 'end': end_time}]
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
min_silence_samples = int(min_silence_duration * sample_rate)
|
||
silence_segments = []
|
||
|
||
# 简化的静音检测
|
||
is_silent = np.abs(audio_data) < threshold
|
||
in_silence = False
|
||
silence_start = 0
|
||
|
||
for i in range(len(is_silent)):
|
||
if is_silent[i] and not in_silence:
|
||
# 开始静音段
|
||
in_silence = True
|
||
silence_start = i
|
||
elif not is_silent[i] and in_silence:
|
||
# 结束静音段
|
||
in_silence = False
|
||
silence_duration = i - silence_start
|
||
if silence_duration >= min_silence_samples:
|
||
silence_segments.append({
|
||
'start': silence_start / sample_rate,
|
||
'end': i / sample_rate
|
||
})
|
||
|
||
# 处理最后一个静音段
|
||
if in_silence:
|
||
silence_duration = len(audio_data) - silence_start
|
||
if silence_duration >= min_silence_samples:
|
||
silence_segments.append({
|
||
'start': silence_start / sample_rate,
|
||
'end': len(audio_data) / sample_rate
|
||
})
|
||
|
||
print(f"✓ 检测到{len(silence_segments)}个静音段")
|
||
return silence_segments
|
||
|
||
except Exception as e:
|
||
print(f"✗ 检测静音段失败: {e}")
|
||
return []
|
||
|
||
def remove_silence(self, audio_data: np.ndarray, threshold: float = 0.01,
|
||
min_silence_duration: float = 0.1, sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
移除音频中的静音段
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
threshold: 静音阈值
|
||
min_silence_duration: 最小静音时长(秒)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
移除静音后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 检测静音段
|
||
silence_segments = self.detect_silence(audio_data, threshold, min_silence_duration, sample_rate)
|
||
|
||
if not silence_segments:
|
||
return audio_data
|
||
|
||
# 移除静音段
|
||
cleaned_audio = audio_data.copy()
|
||
for segment in reversed(silence_segments): # 从后往前删除,避免索引变化
|
||
start_sample = int(segment['start'] * sample_rate)
|
||
end_sample = int(segment['end'] * sample_rate)
|
||
cleaned_audio = np.concatenate([
|
||
cleaned_audio[:start_sample],
|
||
cleaned_audio[end_sample:]
|
||
])
|
||
|
||
print(f"✓ 已移除{len(silence_segments)}个静音段")
|
||
return cleaned_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 移除静音段失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_high_pass_filter(self, audio_data: np.ndarray, cutoff_freq: float,
|
||
sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用高通滤波器
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
cutoff_freq: 截止频率(Hz)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
滤波后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 简化的高通滤波实现
|
||
# 实际项目中应该使用更专业的滤波器设计
|
||
rc = 1.0 / (2 * np.pi * cutoff_freq)
|
||
dt = 1.0 / sample_rate
|
||
alpha = rc / (rc + dt)
|
||
|
||
filtered_data = np.zeros_like(audio_data)
|
||
for i in range(1, len(audio_data)):
|
||
filtered_data[i] = alpha * (filtered_data[i-1] + audio_data[i] - audio_data[i-1])
|
||
|
||
print(f"✓ 高通滤波器已应用: {cutoff_freq}Hz")
|
||
return filtered_data
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用高通滤波器失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_low_pass_filter(self, audio_data: np.ndarray, cutoff_freq: float,
|
||
sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用低通滤波器
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
cutoff_freq: 截止频率(Hz)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
滤波后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 简化的低通滤波实现
|
||
rc = 1.0 / (2 * np.pi * cutoff_freq)
|
||
dt = 1.0 / sample_rate
|
||
alpha = dt / (rc + dt)
|
||
|
||
filtered_data = np.zeros_like(audio_data)
|
||
filtered_data[0] = audio_data[0]
|
||
for i in range(1, len(audio_data)):
|
||
filtered_data[i] = alpha * audio_data[i] + (1 - alpha) * filtered_data[i-1]
|
||
|
||
print(f"✓ 低通滤波器已应用: {cutoff_freq}Hz")
|
||
return filtered_data
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用低通滤波器失败: {e}")
|
||
return audio_data
|
||
|
||
def generate_sine_wave(self, frequency: float, duration: float,
|
||
amplitude: float = 1.0, sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
生成正弦波音频
|
||
|
||
Args:
|
||
frequency: 频率(Hz)
|
||
duration: 时长(秒)
|
||
amplitude: 振幅 (0.0-1.0)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
正弦波音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 生成时间轴
|
||
t = np.linspace(0, duration, int(sample_rate * duration), False)
|
||
|
||
# 生成正弦波
|
||
sine_wave = amplitude * np.sin(2 * np.pi * frequency * t)
|
||
|
||
print(f"✓ 正弦波已生成: {frequency}Hz, {duration}秒")
|
||
return sine_wave
|
||
|
||
except Exception as e:
|
||
print(f"✗ 生成正弦波失败: {e}")
|
||
return np.array([], dtype=np.float32)
|
||
|
||
def generate_white_noise(self, duration: float, amplitude: float = 0.1,
|
||
sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
生成白噪声音频
|
||
|
||
Args:
|
||
duration: 时长(秒)
|
||
amplitude: 振幅 (0.0-1.0)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
白噪声音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 生成样本数
|
||
num_samples = int(sample_rate * duration)
|
||
|
||
# 生成白噪声
|
||
white_noise = amplitude * (np.random.random(num_samples) * 2 - 1)
|
||
|
||
print(f"✓ 白噪声已生成: {duration}秒")
|
||
return white_noise
|
||
|
||
except Exception as e:
|
||
print(f"✗ 生成白噪声失败: {e}")
|
||
return np.array([], dtype=np.float32)
|
||
|
||
def mix_audio_signals(self, audio1: np.ndarray, audio2: np.ndarray,
|
||
ratio: float = 0.5) -> np.ndarray:
|
||
"""
|
||
混合两个音频信号
|
||
|
||
Args:
|
||
audio1: 第一个音频信号
|
||
audio2: 第二个音频信号
|
||
ratio: 混合比例 (0.0-1.0, 0表示全audio1,1表示全audio2)
|
||
|
||
Returns:
|
||
混合后的音频信号
|
||
"""
|
||
try:
|
||
# 确保两个音频信号长度相同
|
||
max_length = max(len(audio1), len(audio2))
|
||
if len(audio1) < max_length:
|
||
audio1 = np.pad(audio1, (0, max_length - len(audio1)))
|
||
if len(audio2) < max_length:
|
||
audio2 = np.pad(audio2, (0, max_length - len(audio2)))
|
||
|
||
# 混合音频
|
||
mixed_audio = (1 - ratio) * audio1 + ratio * audio2
|
||
|
||
# 防止削波
|
||
mixed_audio = np.clip(mixed_audio, -1.0, 1.0)
|
||
|
||
print("✓ 音频信号已混合")
|
||
return mixed_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 混合音频信号失败: {e}")
|
||
# 返回较长的音频信号
|
||
return audio1 if len(audio1) >= len(audio2) else audio2
|
||
|
||
def calculate_audio_duration(self, audio_data: np.ndarray, sample_rate: int = None) -> float:
|
||
"""
|
||
计算音频时长
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
音频时长(秒)
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
if len(audio_data) == 0:
|
||
return 0.0
|
||
|
||
duration = len(audio_data) / sample_rate
|
||
return duration
|
||
|
||
except Exception as e:
|
||
print(f"✗ 计算音频时长失败: {e}")
|
||
return 0.0
|
||
|
||
def convert_to_mono(self, audio_data: np.ndarray, channels: int = None) -> np.ndarray:
|
||
"""
|
||
转换为单声道音频
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
channels: 声道数
|
||
|
||
Returns:
|
||
单声道音频数据
|
||
"""
|
||
try:
|
||
if channels is None:
|
||
channels = self.channels
|
||
|
||
if channels == 1:
|
||
return audio_data
|
||
|
||
# 假设音频数据是交错存储的
|
||
if len(audio_data) % channels != 0:
|
||
print("✗ 音频数据长度与声道数不匹配")
|
||
return audio_data
|
||
|
||
# 重新整形并计算平均值
|
||
reshaped = audio_data.reshape(-1, channels)
|
||
mono_audio = np.mean(reshaped, axis=1)
|
||
|
||
print("✓ 音频已转换为单声道")
|
||
return mono_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 转换为单声道失败: {e}")
|
||
return audio_data
|
||
|
||
def convert_to_stereo(self, audio_data: np.ndarray) -> np.ndarray:
|
||
"""
|
||
转换为立体声音频(复制单声道到两个声道)
|
||
|
||
Args:
|
||
audio_data: 单声道音频数据
|
||
|
||
Returns:
|
||
立体声音频数据
|
||
"""
|
||
try:
|
||
# 将单声道数据复制到两个声道
|
||
stereo_audio = np.tile(audio_data.reshape(-1, 1), (1, 2))
|
||
stereo_audio = stereo_audio.flatten()
|
||
|
||
print("✓ 音频已转换为立体声")
|
||
return stereo_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 转换为立体声失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_compressor(self, audio_data: np.ndarray, threshold: float = -20.0,
|
||
ratio: float = 4.0, attack: float = 0.01, release: float = 0.1) -> np.ndarray:
|
||
"""
|
||
应用压缩器效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
threshold: 阈值(dB)
|
||
ratio: 压缩比
|
||
attack: 启动时间(秒)
|
||
release: 释放时间(秒)
|
||
|
||
Returns:
|
||
压缩后的音频数据
|
||
"""
|
||
try:
|
||
# 简化的压缩器实现
|
||
threshold_linear = 10 ** (threshold / 20.0)
|
||
|
||
# 计算增益缩减
|
||
gain_reduction = np.zeros_like(audio_data)
|
||
envelope = 0.0
|
||
|
||
for i in range(len(audio_data)):
|
||
# 计算包络
|
||
envelope = max(abs(audio_data[i]), envelope * 0.99)
|
||
|
||
# 计算增益缩减
|
||
if envelope > threshold_linear:
|
||
gain_db = 20 * np.log10(envelope / threshold_linear)
|
||
reduction_db = gain_db - gain_db / ratio
|
||
gain_reduction[i] = 10 ** (-reduction_db / 20.0)
|
||
else:
|
||
gain_reduction[i] = 1.0
|
||
|
||
# 应用增益缩减
|
||
compressed_audio = audio_data * gain_reduction
|
||
|
||
print("✓ 压缩器效果已应用")
|
||
return compressed_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用压缩器效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_limiter(self, audio_data: np.ndarray, threshold: float = -0.1) -> np.ndarray:
|
||
"""
|
||
应用限制器效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
threshold: 阈值(dB,相对于满量程)
|
||
|
||
Returns:
|
||
限制后的音频数据
|
||
"""
|
||
try:
|
||
# 转换阈值为线性值
|
||
threshold_linear = 10 ** (threshold / 20.0)
|
||
|
||
# 应用限制器
|
||
limited_audio = np.clip(audio_data, -threshold_linear, threshold_linear)
|
||
|
||
print(f"✓ 限制器效果已应用: {threshold}dB")
|
||
return limited_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用限制器效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_expander(self, audio_data: np.ndarray, threshold: float = -40.0,
|
||
ratio: float = 2.0, attack: float = 0.01, release: float = 0.1) -> np.ndarray:
|
||
"""
|
||
应用扩展器效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
threshold: 阈值(dB)
|
||
ratio: 扩展比
|
||
attack: 启动时间(秒)
|
||
release: 释放时间(秒)
|
||
|
||
Returns:
|
||
扩展后的音频数据
|
||
"""
|
||
try:
|
||
# 简化的扩展器实现
|
||
threshold_linear = 10 ** (threshold / 20.0)
|
||
|
||
# 计算增益扩展
|
||
gain_expansion = np.zeros_like(audio_data)
|
||
envelope = 0.0
|
||
|
||
for i in range(len(audio_data)):
|
||
# 计算包络
|
||
envelope = max(abs(audio_data[i]), envelope * 0.99)
|
||
|
||
# 计算增益扩展
|
||
if envelope < threshold_linear:
|
||
gain_db = 20 * np.log10(envelope / threshold_linear)
|
||
expansion_db = gain_db * (1 - 1/ratio)
|
||
gain_expansion[i] = 10 ** (expansion_db / 20.0)
|
||
else:
|
||
gain_expansion[i] = 1.0
|
||
|
||
# 应用增益扩展
|
||
expanded_audio = audio_data * gain_expansion
|
||
|
||
print("✓ 扩展器效果已应用")
|
||
return expanded_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用扩展器效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_deesser(self, audio_data: np.ndarray, threshold: float = -30.0,
|
||
frequency: float = 5000.0, sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用去齿音器效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
threshold: 阈值(dB)
|
||
frequency: 处理频率(Hz)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
处理后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 简化的去齿音器实现
|
||
# 实际实现中会使用更复杂的频域处理
|
||
|
||
# 应用高通滤波器突出高频
|
||
filtered_audio = self.apply_high_pass_filter(audio_data, frequency, sample_rate)
|
||
|
||
# 检测高频能量
|
||
threshold_linear = 10 ** (threshold / 20.0)
|
||
high_energy = np.abs(filtered_audio)
|
||
|
||
# 计算减少因子
|
||
reduction_factor = np.ones_like(high_energy)
|
||
reduction_factor[high_energy > threshold_linear] = threshold_linear / high_energy[high_energy > threshold_linear]
|
||
|
||
# 应用减少因子
|
||
deessed_audio = audio_data * reduction_factor
|
||
|
||
print(f"✓ 去齿音器效果已应用: {frequency}Hz")
|
||
return deessed_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用去齿音器效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_noise_gate(self, audio_data: np.ndarray, threshold: float = -40.0,
|
||
attack: float = 0.001, release: float = 0.01, sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用噪声门效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
threshold: 阈值(dB)
|
||
attack: 启动时间(秒)
|
||
release: 释放时间(秒)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
处理后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
threshold_linear = 10 ** (threshold / 20.0)
|
||
|
||
# 计算包络
|
||
envelope = np.abs(audio_data)
|
||
|
||
# 简化的噪声门实现
|
||
gate_open = envelope > threshold_linear
|
||
smoothed_gate = np.zeros_like(gate_open, dtype=np.float32)
|
||
|
||
# 应用启动和释放时间
|
||
attack_coeff = np.exp(-1.0 / (sample_rate * attack))
|
||
release_coeff = np.exp(-1.0 / (sample_rate * release))
|
||
|
||
gate_state = 0.0
|
||
for i in range(len(gate_open)):
|
||
if gate_open[i]:
|
||
gate_state = attack_coeff * gate_state + (1 - attack_coeff)
|
||
else:
|
||
gate_state = release_coeff * gate_state
|
||
|
||
smoothed_gate[i] = gate_state
|
||
|
||
# 应用噪声门
|
||
gated_audio = audio_data * smoothed_gate
|
||
|
||
print("✓ 噪声门效果已应用")
|
||
return gated_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用噪声门效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_eq_filter(self, audio_data: np.ndarray, frequencies: List[float],
|
||
gains: List[float], sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用均衡器滤波器
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
frequencies: 频率列表(Hz)
|
||
gains: 增益列表(dB)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
滤波后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 简化的均衡器实现
|
||
eq_audio = audio_data.copy()
|
||
|
||
# 对每个频段应用滤波
|
||
for freq, gain in zip(frequencies, gains):
|
||
if gain > 0:
|
||
# 应用低通或高通滤波器增强特定频率
|
||
if freq < sample_rate / 4:
|
||
eq_audio = self.apply_low_pass_filter(eq_audio, freq * 2, sample_rate)
|
||
elif gain < 0:
|
||
# 应用滤波器减弱特定频率
|
||
if freq > 100:
|
||
eq_audio = self.apply_high_pass_filter(eq_audio, freq / 2, sample_rate)
|
||
|
||
# 应用总体增益
|
||
total_gain = 10 ** (sum(gains) / len(gains) / 20.0)
|
||
eq_audio = eq_audio * total_gain
|
||
eq_audio = np.clip(eq_audio, -1.0, 1.0)
|
||
|
||
print(f"✓ 均衡器效果已应用: {len(frequencies)}个频段")
|
||
return eq_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用均衡器效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_reverb(self, audio_data: np.ndarray, room_size: float = 0.5,
|
||
damping: float = 0.5, wet_level: float = 0.33,
|
||
sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用混响效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
room_size: 房间大小 (0.0-1.0)
|
||
damping: 阻尼 (0.0-1.0)
|
||
wet_level: 湿信号级别 (0.0-1.0)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
添加混响后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 简化的混响实现
|
||
# 创建延迟线
|
||
max_delay = int(sample_rate * 0.1) # 最大100ms延迟
|
||
delay_line = np.zeros(max_delay)
|
||
delay_index = 0
|
||
|
||
# 混响参数
|
||
delay_times = [int(sample_rate * 0.013), int(sample_rate * 0.023),
|
||
int(sample_rate * 0.037), int(sample_rate * 0.043)]
|
||
decay_factors = [0.7, 0.6, 0.5, 0.4]
|
||
|
||
# 应用混响
|
||
reverb_audio = np.zeros_like(audio_data)
|
||
|
||
for i in range(len(audio_data)):
|
||
input_sample = audio_data[i]
|
||
|
||
# 从延迟线读取
|
||
delayed_samples = []
|
||
for j, delay_time in enumerate(delay_times):
|
||
read_index = (delay_index - delay_time) % max_delay
|
||
delayed_samples.append(delay_line[read_index] * decay_factors[j])
|
||
|
||
# 计算反馈
|
||
feedback = sum(delayed_samples) * damping
|
||
|
||
# 写入延迟线
|
||
delay_line[delay_index] = input_sample + feedback
|
||
|
||
# 更新延迟索引
|
||
delay_index = (delay_index + 1) % max_delay
|
||
|
||
# 输出混合信号
|
||
dry_sample = input_sample * (1 - wet_level)
|
||
wet_sample = sum(delayed_samples) * wet_level
|
||
reverb_audio[i] = dry_sample + wet_sample
|
||
|
||
# 防止削波
|
||
reverb_audio = np.clip(reverb_audio, -1.0, 1.0)
|
||
|
||
print("✓ 混响效果已应用")
|
||
return reverb_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用混响效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_chorus(self, audio_data: np.ndarray, rate: float = 1.0,
|
||
depth: float = 0.5, mix: float = 0.5,
|
||
sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用合唱效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
rate: 速率(Hz)
|
||
depth: 深度 (0.0-1.0)
|
||
mix: 混合比例 (0.0-1.0)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
添加合唱效果后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 创建延迟缓冲区
|
||
delay_buffer = np.zeros(int(sample_rate * 0.05)) # 50ms缓冲区
|
||
write_index = 0
|
||
|
||
# 合唱参数
|
||
lfo_phase = 0.0
|
||
delay_base = 0.005 # 5ms基础延迟
|
||
|
||
# 应用合唱效果
|
||
chorus_audio = np.zeros_like(audio_data)
|
||
|
||
for i in range(len(audio_data)):
|
||
# 更新LFO相位
|
||
lfo_phase += 2 * np.pi * rate / sample_rate
|
||
if lfo_phase > 2 * np.pi:
|
||
lfo_phase -= 2 * np.pi
|
||
|
||
# 计算延迟时间
|
||
lfo_value = np.sin(lfo_phase)
|
||
delay_time = delay_base + depth * 0.01 * lfo_value
|
||
delay_samples = delay_time * sample_rate
|
||
|
||
# 写入当前样本
|
||
delay_buffer[write_index] = audio_data[i]
|
||
|
||
# 计算读取位置
|
||
read_index = write_index - int(delay_samples)
|
||
if read_index < 0:
|
||
read_index += len(delay_buffer)
|
||
|
||
# 线性插值获取延迟样本
|
||
frac = delay_samples - int(delay_samples)
|
||
delayed_sample = (delay_buffer[read_index] * (1 - frac) +
|
||
delay_buffer[(read_index + 1) % len(delay_buffer)] * frac)
|
||
|
||
# 混合干湿信号
|
||
chorus_audio[i] = audio_data[i] * (1 - mix) + delayed_sample * mix
|
||
|
||
# 更新写入索引
|
||
write_index = (write_index + 1) % len(delay_buffer)
|
||
|
||
print("✓ 合唱效果已应用")
|
||
return chorus_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用合唱效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_flanger(self, audio_data: np.ndarray, rate: float = 0.5,
|
||
depth: float = 0.002, feedback: float = 0.3,
|
||
mix: float = 0.5, sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用镶边效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
rate: 速率(Hz)
|
||
depth: 深度(秒)
|
||
feedback: 反馈 (0.0-1.0)
|
||
mix: 混合比例 (0.0-1.0)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
添加镶边效果后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 创建延迟缓冲区
|
||
max_delay = int(sample_rate * 0.01) # 10ms最大延迟
|
||
delay_buffer = np.zeros(max_delay)
|
||
write_index = 0
|
||
last_output = 0.0
|
||
|
||
# LFO参数
|
||
lfo_phase = 0.0
|
||
|
||
# 应用镶边效果
|
||
flanger_audio = np.zeros_like(audio_data)
|
||
|
||
for i in range(len(audio_data)):
|
||
# 更新LFO相位
|
||
lfo_phase += 2 * np.pi * rate / sample_rate
|
||
if lfo_phase > 2 * np.pi:
|
||
lfo_phase -= 2 * np.pi
|
||
|
||
# 计算延迟时间
|
||
lfo_value = np.sin(lfo_phase)
|
||
delay_time = depth * lfo_value
|
||
delay_samples = abs(delay_time) * sample_rate
|
||
|
||
# 写入当前样本(带反馈)
|
||
delay_buffer[write_index] = audio_data[i] + last_output * feedback
|
||
|
||
# 计算读取位置
|
||
read_index = write_index - int(delay_samples)
|
||
if read_index < 0:
|
||
read_index += len(delay_buffer)
|
||
|
||
# 线性插值获取延迟样本
|
||
frac = delay_samples - int(delay_samples)
|
||
delayed_sample = (delay_buffer[read_index] * (1 - frac) +
|
||
delay_buffer[(read_index + 1) % len(delay_buffer)] * frac)
|
||
|
||
# 混合干湿信号
|
||
flanger_audio[i] = audio_data[i] * (1 - mix) + delayed_sample * mix
|
||
last_output = flanger_audio[i]
|
||
|
||
# 更新写入索引
|
||
write_index = (write_index + 1) % len(delay_buffer)
|
||
|
||
print("✓ 镶边效果已应用")
|
||
return flanger_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用镶边效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_distortion(self, audio_data: np.ndarray, drive: float = 0.5,
|
||
tone: float = 0.5) -> np.ndarray:
|
||
"""
|
||
应用失真效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
drive: 驱动 (0.0-1.0)
|
||
tone: 音调 (0.0-1.0)
|
||
|
||
Returns:
|
||
添加失真效果后的音频数据
|
||
"""
|
||
try:
|
||
# 应用前置增益
|
||
drive_factor = 1.0 + drive * 9.0 # 1x 到 10x 增益
|
||
processed = audio_data * drive_factor
|
||
|
||
# 应用软削波失真
|
||
processed = np.tanh(processed)
|
||
|
||
# 应用色调控制(简单的高通滤波)
|
||
if tone != 0.5:
|
||
# 简单的一阶高通滤波器
|
||
rc = 1.0 / (2 * np.pi * (200 + tone * 2000)) # 200Hz 到 2200Hz
|
||
dt = 1.0 / self.sample_rate
|
||
alpha = rc / (rc + dt)
|
||
|
||
for i in range(1, len(processed)):
|
||
processed[i] = alpha * (processed[i] + processed[i-1] - processed[i-1])
|
||
|
||
print("✓ 失真效果已应用")
|
||
return processed
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用失真效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_pitch_shift(self, audio_data: np.ndarray, semitones: float = 0.0,
|
||
sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用音高移位效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
semitones: 半音数(正数升调,负数降调)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
音高移位后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 计算音高移位因子
|
||
pitch_factor = 2 ** (semitones / 12.0)
|
||
|
||
if pitch_factor == 1.0:
|
||
return audio_data
|
||
|
||
# 简化的音高移位(线性插值)
|
||
output_length = int(len(audio_data) / pitch_factor)
|
||
if output_length <= 0:
|
||
return np.array([], dtype=np.float32)
|
||
|
||
processed = np.zeros(output_length)
|
||
|
||
# 重采样
|
||
for i in range(output_length):
|
||
# 计算源位置
|
||
src_pos = i * pitch_factor
|
||
src_index = int(src_pos)
|
||
frac = src_pos - src_index
|
||
|
||
# 线性插值
|
||
if src_index < len(audio_data) - 1:
|
||
processed[i] = audio_data[src_index] * (1 - frac) + \
|
||
audio_data[src_index + 1] * frac
|
||
else:
|
||
processed[i] = audio_data[min(src_index, len(audio_data) - 1)]
|
||
|
||
print(f"✓ 音高移位效果已应用: {semitones}半音")
|
||
return processed
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用音高移位效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_time_stretch(self, audio_data: np.ndarray, factor: float = 1.0,
|
||
sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用时间拉伸效果(不改变音高)
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
factor: 拉伸因子(>1.0变慢,<1.0变快)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
时间拉伸后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
if factor == 1.0:
|
||
return audio_data
|
||
|
||
# 简化的时间拉伸实现
|
||
new_length = int(len(audio_data) * factor)
|
||
if new_length <= 0:
|
||
return np.array([], dtype=np.float32)
|
||
|
||
# 使用线性插值进行时间拉伸
|
||
indices = np.linspace(0, len(audio_data) - 1, new_length)
|
||
stretched_audio = np.interp(indices, np.arange(len(audio_data)), audio_data)
|
||
|
||
print(f"✓ 时间拉伸效果已应用: {factor}x")
|
||
return stretched_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用时间拉伸效果失败: {e}")
|
||
return audio_data
|
||
|
||
def apply_vibrato(self, audio_data: np.ndarray, rate: float = 5.0,
|
||
depth: float = 0.5, sample_rate: int = None) -> np.ndarray:
|
||
"""
|
||
应用颤音效果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
rate: 速率(Hz)
|
||
depth: 深度 (0.0-1.0)
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
添加颤音效果后的音频数据
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
# 使用音高移位实现颤音
|
||
lfo_phase = 0.0
|
||
vibrato_audio = np.zeros_like(audio_data)
|
||
|
||
for i in range(len(audio_data)):
|
||
# 更新LFO相位
|
||
lfo_phase += 2 * np.pi * rate / sample_rate
|
||
if lfo_phase > 2 * np.pi:
|
||
lfo_phase -= 2 * np.pi
|
||
|
||
# 计算音高变化
|
||
lfo_value = np.sin(lfo_phase)
|
||
semitones = depth * 2 * lfo_value # ±2半音变化
|
||
|
||
# 简化处理:对整个信号应用平均音高变化
|
||
# 实际实现中会使用更复杂的逐样本处理
|
||
if i == 0: # 只计算一次作为示例
|
||
vibrato_audio = self.apply_pitch_shift(audio_data, semitones, sample_rate)
|
||
|
||
if len(vibrato_audio) == 0:
|
||
vibrato_audio = audio_data # 出错时返回原音频
|
||
|
||
print("✓ 颤音效果已应用")
|
||
return vibrato_audio
|
||
|
||
except Exception as e:
|
||
print(f"✗ 应用颤音效果失败: {e}")
|
||
return audio_data
|
||
|
||
def get_audio_info(self, audio_data: np.ndarray, sample_rate: int = None) -> Dict[str, Any]:
|
||
"""
|
||
获取音频信息
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
sample_rate: 采样率
|
||
|
||
Returns:
|
||
音频信息字典
|
||
"""
|
||
try:
|
||
if sample_rate is None:
|
||
sample_rate = self.sample_rate
|
||
|
||
if len(audio_data) == 0:
|
||
return {
|
||
'duration': 0.0,
|
||
'samples': 0,
|
||
'sample_rate': sample_rate,
|
||
'channels': self.channels,
|
||
'peak_level': 0.0,
|
||
'rms_level': 0.0
|
||
}
|
||
|
||
duration = len(audio_data) / sample_rate
|
||
peak_level = float(np.max(np.abs(audio_data)))
|
||
rms_level = float(np.sqrt(np.mean(audio_data ** 2)))
|
||
|
||
return {
|
||
'duration': duration,
|
||
'samples': len(audio_data),
|
||
'sample_rate': sample_rate,
|
||
'channels': self.channels,
|
||
'peak_level': peak_level,
|
||
'rms_level': rms_level
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"✗ 获取音频信息失败: {e}")
|
||
return {} |