modify human mel

This commit is contained in:
brige 2024-09-27 19:31:36 +08:00
parent e606fb6ef5
commit f848529859
4 changed files with 58 additions and 7 deletions

View File

@ -310,10 +310,10 @@ class Human:
self.mel_chunks_queue_ = Queue()
self.audio_chunks_queue_ = Queue()
self._test_image_queue = Queue()
self._thread = None
thread = threading.Thread(target=self.test)
thread.start()
#
# self._thread = None
# thread = threading.Thread(target=self.test)
# thread.start()
# self.test()
# self.play_pcm()

View File

@ -5,6 +5,8 @@ import numpy as np
from scipy import signal
from scipy.io import wavfile
from hparams import hparams as hp
import soundfile as sf
from IPython.display import Audio
def load_wav(path, sr):
return librosa.core.load(path, sr=sr)[0]
@ -134,3 +136,39 @@ def _denormalize(D):
return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
else:
return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
def load_audio(file_path, sr=16000):
"""加载音频文件并返回音频数据和采样率"""
wav, sr = librosa.load(file_path, sr=sr)
return wav, sr
def split_audio(wav, sr, chunk_duration):
"""将音频按指定时长切割"""
# 计算每个片段包含的采样点数量
chunk_size = int(chunk_duration * sr)
num_chunks = int(np.ceil(len(wav) / chunk_size))
audio_chunks = []
for i in range(num_chunks):
start_idx = i * chunk_size
end_idx = min((i + 1) * chunk_size, len(wav))
chunk = wav[start_idx:end_idx]
audio_chunks.append(chunk)
return audio_chunks
def save_chunks(chunks, sr, output_folder, base_filename="chunk"):
"""保存切割的音频块"""
for idx, chunk in enumerate(chunks):
output_path = f"{output_folder}/{base_filename}_{idx}.wav"
sf.write(output_path, chunk, sr)
print(f"Saved {output_path}")
def play_audio_chunk(chunk, sr):
"""播放指定音频块"""
return Audio(chunk, rate=sr)

View File

@ -189,9 +189,9 @@ class Infer:
p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
f[y1:y2, x1:x2] = p
# name = "%04d" % j
# cv2.imwrite(f'temp/images/{j}.jpg', p)
# j = j + 1
name = "%04d" % j
cv2.imwrite(f'temp/images/{j}.jpg', p)
j = j + 1
p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
self._human.push_render_image(p)
# out.write(f)

View File

@ -14,8 +14,11 @@ from io import BytesIO
from queue import Queue
from threading import Thread, Event
from IPython.core.display_functions import display
from pydub import AudioSegment
import audio
logger = logging.getLogger(__name__)
@ -55,6 +58,16 @@ class TTSBase:
self._io_stream.seek(0)
stream = self.__create_bytes_stream(self._io_stream)
stream_len = stream.shape[0]
sr = 16000
soundfile.read('./temp/audio/audio.wav', stream, sr)
# audio_chunks = audio.split_audio(stream, sr, 4)
# display(audio.play_audio_chunk(audio_chunks[0], sr=sr))
# 保存切割后的片段
# audio.save_chunks(stream[0:-1], sr, './temp/audio/')
# audio.save_chunks(audio_chunks, sr, './temp/audio/')
# try:
# sounddevice.play(stream, samplerate=self._human.get_audio_sample_rate())
# sounddevice.wait() # 等待音频播放完毕