diff --git a/Human.py b/Human.py
index 02c4097..41f7d17 100644
--- a/Human.py
+++ b/Human.py
@@ -9,6 +9,7 @@ import time
 
 
 import numpy as np
+import pyaudio
 
 import audio
 import face_detection
@@ -291,14 +292,16 @@ class Human:
         self._output_queue = mp.Queue()
         self._res_frame_queue = mp.Queue(self._batch_size * 2)
 
-        # self._chunk_2_mal = Chunk2Mal(self)
-        # self._tts = TTSBase(self)
+        self._chunk_2_mal = Chunk2Mal(self)
+        self._tts = TTSBase(self)
 
         self.mel_chunks_queue_ = Queue()
+        self.audio_chunks_queue_ = Queue()
         self._test_image_queue = Queue()
 
         self._thread = None
         # self.test()
+        # self.play_pcm()
 
         # face_images_path = r'./face/'
         # self._face_image_paths = utils.read_files_path(face_images_path)
@@ -309,6 +312,19 @@ class Human:
         #                                    )).start()
         # self.render_event.set()
 
+    # def play_pcm(self):
+    #     p = pyaudio.PyAudio()
+    #     stream = p.open(format=p.get_format_from_width(2), channels=1, rate=16000, output=True)
+    #     file1 = r'./audio/en_weather.pcm'
+    #
+    #     # 将 pcm 数据直接写入 PyAudio 的数据流
+    #     with open(file1, "rb") as f:
+    #         stream.write(f.read())
+    #
+    #     stream.stop_stream()
+    #     stream.close()
+    #     p.terminate()
+
     def test(self):
         wav = audio.load_wav(r'./audio/audio1.wav', 16000)
         mel = audio.melspectrogram(wav)
@@ -346,8 +362,8 @@ class Human:
         print("Model loaded")
 
         frame_h, frame_w = face_list_cycle[0].shape[:-1]
-        out = cv2.VideoWriter('temp/resul_tttt.avi',
-                              cv2.VideoWriter_fourcc(*'DIVX'), 25, (frame_w, frame_h))
+        # out = cv2.VideoWriter('temp/resul_tttt.avi',
+        #                       cv2.VideoWriter_fourcc(*'DIVX'), 25, (frame_w, frame_h))
 
         face_det_results = face_detect(face_list_cycle)
 
@@ -374,12 +390,12 @@ class Human:
                 # j = j + 1
                 p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
                 self._test_image_queue.put(p)
-                out.write(f)
-
-        out.release()
-        command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format('./audio/audio1.wav', 'temp/resul_tttt.avi',
-                                                                      'temp/resul_tttt.mp4')
-        subprocess.call(command, shell=platform.system() != 'Windows')
+                # out.write(f)
+        #
+        # out.release()
+        # command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format('./audio/audio1.wav', 'temp/resul_tttt.avi',
+        #                                                               'temp/resul_tttt.mp4')
+        # subprocess.call(command, shell=platform.system() != 'Windows')
 
 
         # gen = datagen(face_list_cycle, self.mel_chunks_queue_)
@@ -407,18 +423,18 @@ class Human:
         logging.info('human destroy')
 
     def read(self, txt):
-        # if self._tts is None:
-        #     logging.warning('tts is none')
-        #     return
-
-        if self._thread is None:
-            self._thread = threading.Thread(target=self.test)
-            self._thread.start()
-            # self._tts.push_txt(txt)
+        if self._tts is None:
+            logging.warning('tts is none')
+            return
+        self._tts.push_txt(txt)
 
     def push_audio_chunk(self, audio_chunk):
         self._chunk_2_mal.push_chunk(audio_chunk)
 
+    def push_mel_chunks_queue(self, mel_chunk):
+        self.mel_chunks_queue_.put(mel_chunk)
+        # self.audio_chunks_queue_.put(audio_chunk)
+
     def push_feat_queue(self, mel_chunks):
         print("push_feat_queue")
         self._feat_queue.put(mel_chunks)
diff --git a/edge_tts_test.py b/edge_tts_test.py
new file mode 100644
index 0000000..9b72790
--- /dev/null
+++ b/edge_tts_test.py
@@ -0,0 +1,102 @@
+#encoding = utf8
+
+import edge_tts
+import asyncio
+import pyaudio
+from pydub import AudioSegment
+from io import BytesIO
+
+# 如果在 Jupyter Notebook 中使用，解除事件循环限制
+try:
+    import nest_asyncio
+    nest_asyncio.apply()
+except ImportError:
+    pass
+
+def play_audio(data: bytes, stream: pyaudio.Stream) -> None:
+  stream.write(AudioSegment.from_mp3(BytesIO(data)).raw_data)
+
+CHUNK_SIZE = 20 * 1024
+async def play_tts(text, voice):
+    communicate = edge_tts.Communicate(text, voice)
+
+    # 设置 PyAudio
+    audio = pyaudio.PyAudio()
+    stream = audio.open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
+
+    # async for chunk in communicate.stream():  # 使用 stream 方法
+    #     if chunk['type'] == 'audio':  # 确保 chunk 是字节流
+    #         stream.write(chunk['data'])
+
+    total_data = b''
+    for chunk in communicate.stream_sync():
+      if chunk["type"] == "audio" and chunk["data"]:
+        total_data += chunk["data"]
+        if len(total_data) >= CHUNK_SIZE:
+          # print(f"Time elapsed: {time.time() - start_time:.2f} seconds")  # Print time
+          stream.write(AudioSegment.from_mp3(BytesIO(total_data[:CHUNK_SIZE])).raw_data)
+          # play_audio(total_data[:CHUNK_SIZE], stream)  # Play first CHUNK_SIZE bytes
+          total_data = total_data[CHUNK_SIZE:]  # Remove played data
+    # play_audio(total_data, stream)
+    # 停止和关闭音频流
+    stream.stop_stream()
+    stream.close()
+    audio.terminate()
+
+
+async def save_to_file(text, voice, filename):
+    communicate = edge_tts.Communicate(text, voice)
+
+    with open(filename, "wb") as f:
+        async for chunk in communicate.stream():
+            if chunk['type'] == 'audio':
+                f.write(chunk['data'])
+
+if __name__ == "__main__":
+    text = "Hello, this is a test of the Edge TTS service."
+    voice = "en-US-JessaNeural"
+
+    # 使用 asyncio.run() 运行异步函数
+    asyncio.run(play_tts(text, voice))
+    # asyncio.run(save_to_file(text, voice, "output.wav"))
+
+#
+# import edge_tts
+# import pyaudio
+# from io import BytesIO
+# from pydub import AudioSegment
+# import time
+#
+# TEXT = 'Hello World! How are you guys doing? I hope great, cause I am having fun and honestly it has been a blast'
+# VOICE = "en-US-AndrewMultilingualNeural"
+# CHUNK_SIZE = 20 * 1024  # Assuming around 1024 bytes per chunk (adjust based on format)
+#
+# def main() -> None:
+#   start_time = time.time()
+#   communicator = edge_tts.Communicate(TEXT, VOICE)
+#
+#   pyaudio_instance = pyaudio.PyAudio()
+#   audio_stream = pyaudio_instance.open(format=pyaudio.paInt16, channels=1, rate=16000, output=True)
+#
+#   total_data = b''  # Store audio data instead of chunks
+#
+#   for chunk in communicator.stream_sync():
+#     if chunk["type"] == "audio" and chunk["data"]:
+#       total_data += chunk["data"]
+#       if len(total_data) >= CHUNK_SIZE:
+#         print(f"Time elapsed: {time.time() - start_time:.2f} seconds")  # Print time
+#         play_audio(total_data[:CHUNK_SIZE], audio_stream)  # Play first CHUNK_SIZE bytes
+#         total_data = total_data[CHUNK_SIZE:]  # Remove played data
+#
+#   # Play remaining audio
+#   play_audio(total_data, audio_stream)
+#
+#   audio_stream.stop_stream()
+#   audio_stream.close()
+#   pyaudio_instance.terminate()
+#
+# def play_audio(data: bytes, stream: pyaudio.Stream) -> None:
+#   stream.write(AudioSegment.from_mp3(BytesIO(data)).raw_data)
+#
+# if __name__ == "__main__":
+#   main()
\ No newline at end of file
diff --git a/tts/Chunk2Mal.py b/tts/Chunk2Mal.py
index 783a2a7..7388b3b 100644
--- a/tts/Chunk2Mal.py
+++ b/tts/Chunk2Mal.py
@@ -36,19 +36,35 @@ class Chunk2Mal:
                 # print('Chunk2Mal queue.Empty')
                 continue
 
-            if len(self._chunks) <= self._human.get_stride_left_size() + self._human.get_stride_right_size():
-                # print('Chunk2Mal queue.Empty')
+            if type_ == 0:
                 continue
 
             logging.info('np.concatenate')
-            inputs = np.concatenate(self._chunks)  # [N * chunk]
-            mel = audio.melspectrogram(inputs)
-            left = max(0, self._human.get_stride_left_size() * 80 / 50)
-            right = min(len(mel[0]), len(mel[0]) - self._human.get_stride_right_size() * 80 / 50)
-            mel_idx_multiplier = 80. * 2 / self._human.get_fps()
+            mel = audio.melspectrogram(chunk)
+            if np.isnan(mel.reshape(-1)).sum() > 0:
+                raise ValueError(
+                    'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again')
+
             mel_step_size = 16
+
+            print('fps:', self._human.get_fps())
+            mel_idx_multiplier = 80. / self._human.get_fps()
+            print('mel_idx_multiplier:', mel_idx_multiplier)
+
             i = 0
-            mel_chunks = []
+            while 1:
+                start_idx = int(i * mel_idx_multiplier)
+                if start_idx + mel_step_size > len(mel[0]):
+                    # mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:])
+                    self._human.push_mel_chunks_queue(mel[:, len(mel[0]) - mel_step_size:])
+                    break
+                # mel_chunks.append(mel[:, start_idx: start_idx + mel_step_size])
+                self._human.push_mel_chunks_queue(mel[:, start_idx: start_idx + mel_step_size])
+                i += 1
+
+            batch_size = 128
+
+            '''
             while i < (len(self._chunks) - self._human.get_stride_left_size()
                        - self._human.get_stride_right_size()) / 2:
                 start_idx = int(left + i * mel_idx_multiplier)
@@ -62,6 +78,7 @@ class Chunk2Mal:
 
             # discard the old part to save memory
             self._chunks = self._chunks[-(self._human.get_stride_left_size() + self._human.get_stride_right_size()):]
+            '''
 
         logging.info('chunk2mal exit')
 
diff --git a/tts/TTSBase.py b/tts/TTSBase.py
index d38a539..49f3b01 100644
--- a/tts/TTSBase.py
+++ b/tts/TTSBase.py
@@ -5,6 +5,7 @@ import time
 
 import edge_tts
 import numpy as np
+import pyaudio
 import soundfile
 import resampy
 import queue
@@ -12,6 +13,8 @@ from io import BytesIO
 from queue import Queue
 from threading import Thread, Event
 
+from pydub import AudioSegment
+
 logger = logging.getLogger(__name__)
 
 
@@ -23,12 +26,15 @@ class TTSBase:
         self._exit_event = None
         self._io_stream = BytesIO()
         self._sample_rate = 16000
-        self._chunk = self._sample_rate // self._human.get_fps()
+        self._chunk_len = self._sample_rate // self._human.get_fps()
 
         self._exit_event = Event()
         self._thread = Thread(target=self._on_run)
         self._exit_event.set()
         self._thread.start()
+        self._pcm_player = pyaudio.PyAudio()
+        self._pcm_stream = self._pcm_player.open(format=pyaudio.paInt16,
+                                                 channels=1, rate=16000, output=True)
         logging.info('tts start')
 
     def _on_run(self):
@@ -51,10 +57,15 @@ class TTSBase:
         stream = self.__create_bytes_stream(self._io_stream)
         stream_len = stream.shape[0]
         index = 0
-        while stream_len >= self._chunk:
-            self._human.push_audio_chunk(stream[index:index + self._chunk])
-            stream_len -= self._chunk
-            index += self._chunk
+        while stream_len >= self._chunk_len:
+            audio_chunk = stream[index:index + self._chunk_len]
+            # self._pcm_stream.write(audio_chunk)
+            # self._pcm_stream.write(AudioSegment.from_mp3(audio_chunk))
+            # self._human.push_audio_chunk(audio_chunk)
+            # self._human.push_mel_chunks_queue(audio_chunk)
+            self._human.push_audio_chunk(audio_chunk)
+            stream_len -= self._chunk_len
+            index += self._chunk_len
 
     def __create_bytes_stream(self, io_stream):
         stream, sample_rate = soundfile.read(io_stream)
@@ -74,14 +85,38 @@ class TTSBase:
     async def __on_request(self, voice, txt):
         communicate = edge_tts.Communicate(txt, voice)
         first = True
-        async for chuck in communicate.stream():
-            if first:
-                first = False
+        # total_data = b''
+        # CHUNK_SIZE = self._chunk_len
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio" and chunk["data"]:
+                self._io_stream.write(chunk['data'])
+                # total_data += chunk["data"]
+                # if len(total_data) >= CHUNK_SIZE:
+                #     print(f"Time elapsed: {time.time() - start_time:.2f} seconds")  # Print time
+                    # audio_data = AudioSegment.from_mp3(BytesIO(total_data[:CHUNK_SIZE])) #.raw_data
+                    # audio_data = audio_data.set_frame_rate(self._human.get_audio_sample_rate())
+                    # self._human.push_audio_chunk(audio_data)
+                    # self._pcm_stream.write(audio_data.raw_data)
+                    # play_audio(total_data[:CHUNK_SIZE], stream)  # Play first CHUNK_SIZE bytes
+                    # total_data = total_data[CHUNK_SIZE:]  # Remove played data
 
-            if chuck['type'] == 'audio':
-                self._io_stream.write(chuck['data'])
+            # if first:
+            #     first = False
+
+            # if chuck['type'] == 'audio':
+            #     # self._io_stream.write(chuck['data'])
+            #     self._io_stream.write(AudioSegment.from_mp3(BytesIO(total_data[:CHUNK_SIZE])).raw_data)
+        # if len(total_data) > 0:
+             # self._pcm_stream.write(AudioSegment.from_mp3(BytesIO(total_data)).raw_data)
+             # audio_data = AudioSegment.from_mp3(BytesIO(total_data))  # .raw_data
+             # audio_data = audio_data.set_frame_rate(self._human.get_audio_sample_rate())
+             # self._human.push_audio_chunk(audio_data)
+        # self._io_stream.write(AudioSegment.from_mp3(BytesIO(total_data)).raw_data)
 
     def stop(self):
+        self._pcm_stream.stop_stream()
+        self._pcm_player.close(self._pcm_stream)
+        self._pcm_player.terminate()
         if self._exit_event is None:
             return
 
diff --git a/ui.py b/ui.py
index 12190f8..a4a80cc 100644
--- a/ui.py
+++ b/ui.py
@@ -1,14 +1,18 @@
 #encoding = utf8
 import json
 import logging
+import os
 from logging import handlers
 import tkinter
 import tkinter.messagebox
 import customtkinter
 import cv2
 import requests
+import winsound
 from PIL import Image, ImageTk
 
+from playsound import playsound
+
 from Human import Human
 from tts.EdgeTTS import EdgeTTS
 
@@ -25,7 +29,7 @@ class App(customtkinter.CTk):
         self._tts_url = 'http://localhost:8080'
 
         # configure window
-        self.title("数字人测试demo")
+        self.title("TTS demo")
         self.geometry(f"{1100}x{580}")
 
         self.grid_columnconfigure(1, weight=1)
@@ -49,13 +53,24 @@ class App(customtkinter.CTk):
 
         self._init_image_canvas()
 
+        self._is_play_audio = False
         self._human = Human()
         self._render()
+        # self.play_audio()
 
     def on_destroy(self):
         logger.info('------------App destroy------------')
         self._human.on_destroy()
 
+    def play_audio(self):
+        if self._is_play_audio:
+            return
+        self._is_play_audio = True
+        file = os.path.curdir + '/audio/audio1.wav'
+        print(file)
+        winsound.PlaySound(file, winsound.SND_ASYNC or winsound.SND_FILENAME)
+        # playsound(file)
+
     def _init_image_canvas(self):
         self._canvas = customtkinter.CTkCanvas(self.image_frame)
         self._canvas.pack(fill=customtkinter.BOTH, expand=customtkinter.YES)
@@ -66,6 +81,7 @@ class App(customtkinter.CTk):
             self.after(100, self._render)
             return
 
+        self.play_audio()
         iheight, iwidth = image.shape[0], image.shape[1]
         width = self.winfo_width()
         height = self.winfo_height()
@@ -88,10 +104,11 @@ class App(customtkinter.CTk):
         height = self.winfo_height() * 0.5
         self._canvas.create_image(width, height, anchor=customtkinter.CENTER, image=imgtk)
         self._canvas.update()
-        self.after(60, self._render)
+        self.after(34, self._render)
 
     def request_tts(self):
         content = self.entry.get()
+        content = 'Hello, this is a test of the Edge TTS service.'
         print('content:', content)
         self.entry.delete(0, customtkinter.END)
         self._human.read(content)