modify human mel
This commit is contained in:
parent
e606fb6ef5
commit
f848529859
8
Human.py
8
Human.py
@ -310,10 +310,10 @@ class Human:
|
|||||||
self.mel_chunks_queue_ = Queue()
|
self.mel_chunks_queue_ = Queue()
|
||||||
self.audio_chunks_queue_ = Queue()
|
self.audio_chunks_queue_ = Queue()
|
||||||
self._test_image_queue = Queue()
|
self._test_image_queue = Queue()
|
||||||
|
#
|
||||||
self._thread = None
|
# self._thread = None
|
||||||
thread = threading.Thread(target=self.test)
|
# thread = threading.Thread(target=self.test)
|
||||||
thread.start()
|
# thread.start()
|
||||||
# self.test()
|
# self.test()
|
||||||
# self.play_pcm()
|
# self.play_pcm()
|
||||||
|
|
||||||
|
38
audio.py
38
audio.py
@ -5,6 +5,8 @@ import numpy as np
|
|||||||
from scipy import signal
|
from scipy import signal
|
||||||
from scipy.io import wavfile
|
from scipy.io import wavfile
|
||||||
from hparams import hparams as hp
|
from hparams import hparams as hp
|
||||||
|
import soundfile as sf
|
||||||
|
from IPython.display import Audio
|
||||||
|
|
||||||
def load_wav(path, sr):
|
def load_wav(path, sr):
|
||||||
return librosa.core.load(path, sr=sr)[0]
|
return librosa.core.load(path, sr=sr)[0]
|
||||||
@ -134,3 +136,39 @@ def _denormalize(D):
|
|||||||
return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
|
return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
|
||||||
else:
|
else:
|
||||||
return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
|
return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_audio(file_path, sr=16000):
|
||||||
|
"""加载音频文件并返回音频数据和采样率"""
|
||||||
|
wav, sr = librosa.load(file_path, sr=sr)
|
||||||
|
return wav, sr
|
||||||
|
|
||||||
|
|
||||||
|
def split_audio(wav, sr, chunk_duration):
|
||||||
|
"""将音频按指定时长切割"""
|
||||||
|
# 计算每个片段包含的采样点数量
|
||||||
|
chunk_size = int(chunk_duration * sr)
|
||||||
|
num_chunks = int(np.ceil(len(wav) / chunk_size))
|
||||||
|
|
||||||
|
audio_chunks = []
|
||||||
|
for i in range(num_chunks):
|
||||||
|
start_idx = i * chunk_size
|
||||||
|
end_idx = min((i + 1) * chunk_size, len(wav))
|
||||||
|
chunk = wav[start_idx:end_idx]
|
||||||
|
audio_chunks.append(chunk)
|
||||||
|
|
||||||
|
return audio_chunks
|
||||||
|
|
||||||
|
|
||||||
|
def save_chunks(chunks, sr, output_folder, base_filename="chunk"):
|
||||||
|
"""保存切割的音频块"""
|
||||||
|
for idx, chunk in enumerate(chunks):
|
||||||
|
output_path = f"{output_folder}/{base_filename}_{idx}.wav"
|
||||||
|
sf.write(output_path, chunk, sr)
|
||||||
|
print(f"Saved {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def play_audio_chunk(chunk, sr):
|
||||||
|
"""播放指定音频块"""
|
||||||
|
return Audio(chunk, rate=sr)
|
||||||
|
6
infer.py
6
infer.py
@ -189,9 +189,9 @@ class Infer:
|
|||||||
p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
|
p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
|
||||||
|
|
||||||
f[y1:y2, x1:x2] = p
|
f[y1:y2, x1:x2] = p
|
||||||
# name = "%04d" % j
|
name = "%04d" % j
|
||||||
# cv2.imwrite(f'temp/images/{j}.jpg', p)
|
cv2.imwrite(f'temp/images/{j}.jpg', p)
|
||||||
# j = j + 1
|
j = j + 1
|
||||||
p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
|
p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
|
||||||
self._human.push_render_image(p)
|
self._human.push_render_image(p)
|
||||||
# out.write(f)
|
# out.write(f)
|
||||||
|
@ -14,8 +14,11 @@ from io import BytesIO
|
|||||||
from queue import Queue
|
from queue import Queue
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
|
|
||||||
|
from IPython.core.display_functions import display
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
import audio
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -55,6 +58,16 @@ class TTSBase:
|
|||||||
self._io_stream.seek(0)
|
self._io_stream.seek(0)
|
||||||
stream = self.__create_bytes_stream(self._io_stream)
|
stream = self.__create_bytes_stream(self._io_stream)
|
||||||
stream_len = stream.shape[0]
|
stream_len = stream.shape[0]
|
||||||
|
|
||||||
|
sr = 16000
|
||||||
|
soundfile.read('./temp/audio/audio.wav', stream, sr)
|
||||||
|
# audio_chunks = audio.split_audio(stream, sr, 4)
|
||||||
|
|
||||||
|
# display(audio.play_audio_chunk(audio_chunks[0], sr=sr))
|
||||||
|
|
||||||
|
# 保存切割后的片段
|
||||||
|
# audio.save_chunks(stream[0:-1], sr, './temp/audio/')
|
||||||
|
# audio.save_chunks(audio_chunks, sr, './temp/audio/')
|
||||||
# try:
|
# try:
|
||||||
# sounddevice.play(stream, samplerate=self._human.get_audio_sample_rate())
|
# sounddevice.play(stream, samplerate=self._human.get_audio_sample_rate())
|
||||||
# sounddevice.wait() # 等待音频播放完毕
|
# sounddevice.wait() # 等待音频播放完毕
|
||||||
|
Loading…
Reference in New Issue
Block a user