From f8485298594041c7f39284b5fe9c980dcc1d9895 Mon Sep 17 00:00:00 2001
From: brige <jiegeaiai@163.com>
Date: Fri, 27 Sep 2024 19:31:36 +0800
Subject: [PATCH] modify human mel

---
 Human.py       |  8 ++++----
 audio.py       | 38 ++++++++++++++++++++++++++++++++++++++
 infer.py       |  6 +++---
 tts/TTSBase.py | 13 +++++++++++++
 4 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/Human.py b/Human.py
index 80709fc..2450c20 100644
--- a/Human.py
+++ b/Human.py
@@ -310,10 +310,10 @@ class Human:
         self.mel_chunks_queue_ = Queue()
         self.audio_chunks_queue_ = Queue()
         self._test_image_queue = Queue()
-
-        self._thread = None
-        thread = threading.Thread(target=self.test)
-        thread.start()
+        #
+        # self._thread = None
+        # thread = threading.Thread(target=self.test)
+        # thread.start()
         # self.test()
         # self.play_pcm()
 
diff --git a/audio.py b/audio.py
index 32ab5fa..9892f65 100644
--- a/audio.py
+++ b/audio.py
@@ -5,6 +5,8 @@ import numpy as np
 from scipy import signal
 from scipy.io import wavfile
 from hparams import hparams as hp
+import soundfile as sf
+from IPython.display import Audio
 
 def load_wav(path, sr):
     return librosa.core.load(path, sr=sr)[0]
@@ -134,3 +136,39 @@ def _denormalize(D):
         return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
     else:
         return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
+
+
+
+def load_audio(file_path, sr=16000):
+    """加载音频文件并返回音频数据和采样率"""
+    wav, sr = librosa.load(file_path, sr=sr)
+    return wav, sr
+
+
+def split_audio(wav, sr, chunk_duration):
+    """将音频按指定时长切割"""
+    # 计算每个片段包含的采样点数量
+    chunk_size = int(chunk_duration * sr)
+    num_chunks = int(np.ceil(len(wav) / chunk_size))
+
+    audio_chunks = []
+    for i in range(num_chunks):
+        start_idx = i * chunk_size
+        end_idx = min((i + 1) * chunk_size, len(wav))
+        chunk = wav[start_idx:end_idx]
+        audio_chunks.append(chunk)
+
+    return audio_chunks
+
+
+def save_chunks(chunks, sr, output_folder, base_filename="chunk"):
+    """保存切割的音频块"""
+    for idx, chunk in enumerate(chunks):
+        output_path = f"{output_folder}/{base_filename}_{idx}.wav"
+        sf.write(output_path, chunk, sr)
+        print(f"Saved {output_path}")
+
+
+def play_audio_chunk(chunk, sr):
+    """播放指定音频块"""
+    return Audio(chunk, rate=sr)
diff --git a/infer.py b/infer.py
index b18f972..5415dd1 100644
--- a/infer.py
+++ b/infer.py
@@ -189,9 +189,9 @@ class Infer:
                 p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
 
                 f[y1:y2, x1:x2] = p
-                # name = "%04d" % j
-                # cv2.imwrite(f'temp/images/{j}.jpg', p)
-                # j = j + 1
+                name = "%04d" % j
+                cv2.imwrite(f'temp/images/{j}.jpg', p)
+                j = j + 1
                 p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
                 self._human.push_render_image(p)
                 # out.write(f)
diff --git a/tts/TTSBase.py b/tts/TTSBase.py
index 9c93c1a..e747517 100644
--- a/tts/TTSBase.py
+++ b/tts/TTSBase.py
@@ -14,8 +14,11 @@ from io import BytesIO
 from queue import Queue
 from threading import Thread, Event
 
+from IPython.core.display_functions import display
 from pydub import AudioSegment
 
+import audio
+
 logger = logging.getLogger(__name__)
 
 
@@ -55,6 +58,16 @@ class TTSBase:
         self._io_stream.seek(0)
         stream = self.__create_bytes_stream(self._io_stream)
         stream_len = stream.shape[0]
+
+        sr = 16000
+        soundfile.read('./temp/audio/audio.wav', stream, sr)
+        # audio_chunks = audio.split_audio(stream, sr, 4)
+
+        # display(audio.play_audio_chunk(audio_chunks[0], sr=sr))
+
+        # 保存切割后的片段
+        # audio.save_chunks(stream[0:-1], sr, './temp/audio/')
+        # audio.save_chunks(audio_chunks, sr, './temp/audio/')
         # try:
         #     sounddevice.play(stream, samplerate=self._human.get_audio_sample_rate())
         #     sounddevice.wait()  # 等待音频播放完毕