modify human mel

This commit is contained in:
brige 2024-09-27 19:31:36 +08:00
parent e606fb6ef5
commit f848529859
4 changed files with 58 additions and 7 deletions

View File

@ -310,10 +310,10 @@ class Human:
self.mel_chunks_queue_ = Queue() self.mel_chunks_queue_ = Queue()
self.audio_chunks_queue_ = Queue() self.audio_chunks_queue_ = Queue()
self._test_image_queue = Queue() self._test_image_queue = Queue()
#
self._thread = None # self._thread = None
thread = threading.Thread(target=self.test) # thread = threading.Thread(target=self.test)
thread.start() # thread.start()
# self.test() # self.test()
# self.play_pcm() # self.play_pcm()

View File

@ -5,6 +5,8 @@ import numpy as np
from scipy import signal from scipy import signal
from scipy.io import wavfile from scipy.io import wavfile
from hparams import hparams as hp from hparams import hparams as hp
import soundfile as sf
from IPython.display import Audio
def load_wav(path, sr): def load_wav(path, sr):
return librosa.core.load(path, sr=sr)[0] return librosa.core.load(path, sr=sr)[0]
@ -134,3 +136,39 @@ def _denormalize(D):
return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db) return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
else: else:
return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db) return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
def load_audio(file_path, sr=16000):
"""加载音频文件并返回音频数据和采样率"""
wav, sr = librosa.load(file_path, sr=sr)
return wav, sr
def split_audio(wav, sr, chunk_duration):
"""将音频按指定时长切割"""
# 计算每个片段包含的采样点数量
chunk_size = int(chunk_duration * sr)
num_chunks = int(np.ceil(len(wav) / chunk_size))
audio_chunks = []
for i in range(num_chunks):
start_idx = i * chunk_size
end_idx = min((i + 1) * chunk_size, len(wav))
chunk = wav[start_idx:end_idx]
audio_chunks.append(chunk)
return audio_chunks
def save_chunks(chunks, sr, output_folder, base_filename="chunk"):
"""保存切割的音频块"""
for idx, chunk in enumerate(chunks):
output_path = f"{output_folder}/{base_filename}_{idx}.wav"
sf.write(output_path, chunk, sr)
print(f"Saved {output_path}")
def play_audio_chunk(chunk, sr):
"""播放指定音频块"""
return Audio(chunk, rate=sr)

View File

@ -189,9 +189,9 @@ class Infer:
p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1)) p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
f[y1:y2, x1:x2] = p f[y1:y2, x1:x2] = p
# name = "%04d" % j name = "%04d" % j
# cv2.imwrite(f'temp/images/{j}.jpg', p) cv2.imwrite(f'temp/images/{j}.jpg', p)
# j = j + 1 j = j + 1
p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
self._human.push_render_image(p) self._human.push_render_image(p)
# out.write(f) # out.write(f)

View File

@ -14,8 +14,11 @@ from io import BytesIO
from queue import Queue from queue import Queue
from threading import Thread, Event from threading import Thread, Event
from IPython.core.display_functions import display
from pydub import AudioSegment from pydub import AudioSegment
import audio
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -55,6 +58,16 @@ class TTSBase:
self._io_stream.seek(0) self._io_stream.seek(0)
stream = self.__create_bytes_stream(self._io_stream) stream = self.__create_bytes_stream(self._io_stream)
stream_len = stream.shape[0] stream_len = stream.shape[0]
sr = 16000
soundfile.read('./temp/audio/audio.wav', stream, sr)
# audio_chunks = audio.split_audio(stream, sr, 4)
# display(audio.play_audio_chunk(audio_chunks[0], sr=sr))
# 保存切割后的片段
# audio.save_chunks(stream[0:-1], sr, './temp/audio/')
# audio.save_chunks(audio_chunks, sr, './temp/audio/')
# try: # try:
# sounddevice.play(stream, samplerate=self._human.get_audio_sample_rate()) # sounddevice.play(stream, samplerate=self._human.get_audio_sample_rate())
# sounddevice.wait() # 等待音频播放完毕 # sounddevice.wait() # 等待音频播放完毕