From b3bbf40d95f5b3679bff993e60d209079cea5d44 Mon Sep 17 00:00:00 2001
From: brige <jiegeaiai@163.com>
Date: Wed, 13 Nov 2024 12:58:56 +0800
Subject: [PATCH] add human txt

---
 human/audio_mal_handler.py |  6 +++--
 human/human_render.py      | 48 --------------------------------------
 nlp/nlp_doubao.py          | 16 ++++++++++---
 render/voice_render.py     |  7 +++---
 tts/tts_audio_handle.py    |  7 ++++--
 utils/async_task_queue.py  |  4 ++--
 6 files changed, 28 insertions(+), 60 deletions(-)

diff --git a/human/audio_mal_handler.py b/human/audio_mal_handler.py
index 6c46eb9..76f471c 100644
--- a/human/audio_mal_handler.py
+++ b/human/audio_mal_handler.py
@@ -63,7 +63,8 @@ class AudioMalHandler(AudioHandler):
         count = 0
         for _ in range(self._context.batch_size * 2):
             frame, _type = self.get_audio_frame()
-            self.frames.append(frame)
+            chunk, txt = frame
+            self.frames.append(chunk)
             self.on_next_handle((frame, _type), 0)
             count = count + 1
 
@@ -103,7 +104,8 @@ class AudioMalHandler(AudioHandler):
             frame = self._queue.get()
             type_ = 0
         else:
-            frame = np.zeros(self.chunk, dtype=np.float32)
+            chunk = np.zeros(self.chunk, dtype=np.float32)
+            frame = (chunk, '')
             type_ = 1
         # print('AudioMalHandler get_audio_frame type:', type_)
         return frame, type_
diff --git a/human/human_render.py b/human/human_render.py
index 78ef4cd..dbdddd0 100644
--- a/human/human_render.py
+++ b/human/human_render.py
@@ -121,52 +121,4 @@ class HumanRender(AudioHandler):
         # self._video_render.stop()
         # self._exit_event.clear()
         # self._thread.join()
-'''
-        self._exit_event = Event()
-        self._thread = Thread(target=self._on_run)
-        self._exit_event.set()
-        self._thread.start()
 
-    def _on_run(self):
-        logging.info('human render run')
-        while self._exit_event.is_set():
-            self._run_step()
-            time.sleep(0.02)
-
-        logging.info('human render exit')
-
-    def _run_step(self):
-        try:
-            res_frame, idx, audio_frames = self._queue.get(block=True, timeout=.002)
-        except queue.Empty:
-            # print('render queue.Empty:')
-            return None
-        if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
-            combine_frame = self._context.frame_list_cycle[idx]
-        else:
-            bbox = self._context.coord_list_cycle[idx]
-            combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
-            y1, y2, x1, x2 = bbox
-            try:
-                res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
-            except:
-                return
-            # combine_frame = get_image(ori_frame,res_frame,bbox)
-            # t=time.perf_counter()
-            combine_frame[y1:y2, x1:x2] = res_frame
-
-        image = combine_frame
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-
-        if self._image_render is not None:
-            self._image_render.on_render(image)
-
-        for audio_frame in audio_frames:
-            frame, type_ = audio_frame
-            frame = (frame * 32767).astype(np.int16)
-            if self._audio_render is not None:
-                self._audio_render.write(frame.tobytes(), int(frame.shape[0]*2))
-            # new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
-            # new_frame.planes[0].update(frame.tobytes())
-            # new_frame.sample_rate = 16000
-'''
diff --git a/nlp/nlp_doubao.py b/nlp/nlp_doubao.py
index 1a1af84..8b5e2f1 100644
--- a/nlp/nlp_doubao.py
+++ b/nlp/nlp_doubao.py
@@ -98,7 +98,7 @@ class DouBaoHttp:
         ]
         self._response = self.__request(msg_list)
         if not self._response.ok:
-            logger.info(f"请求失败，状态码：{self._response.status_code}")
+            logger.error(f"请求失败，状态码：{self._response.status_code}")
             return
         sec = ''
         for chunk in self._response.iter_lines():
@@ -106,13 +106,23 @@ class DouBaoHttp:
             if len(content) < 1:
                 continue
             content = content[5:]
-            content = json.loads(content)
+            content = content.strip()
+            if content == '[DONE]':
+                break
+
+            try:
+                content = json.loads(content)
+            except Exception as e:
+                logger.error(f"json解析失败，错误信息：{e, content}")
+                continue
             sec = sec + content["choices"][0]["delta"]["content"]
             sec, message = handle.handle(sec)
             if len(message) > 0:
                 logger.info(f'-------dou_bao nlp time:{time.time() - t:.4f}s')
                 callback(message)
-        callback(sec)
+        if len(sec) > 0:
+            callback(sec)
+
         self._requesting = False
         logger.info(f'-------dou_bao nlp time:{time.time() - t:.4f}s')
 
diff --git a/render/voice_render.py b/render/voice_render.py
index ad2bf5e..32cf389 100644
--- a/render/voice_render.py
+++ b/render/voice_render.py
@@ -23,12 +23,13 @@ class VoiceRender(BaseRender):
 
         for audio_frame in frame:
             frame, type_ = audio_frame
-            frame = (frame * 32767).astype(np.int16)
+            chunk, txt = frame
+            chunk = (chunk * 32767).astype(np.int16)
 
             if self._audio_render is not None:
                 try:
-                    chunk_len = int(frame.shape[0] * 2)
+                    chunk_len = int(chunk.shape[0] * 2)
                     # print('audio frame:', frame.shape, chunk_len)
-                    self._audio_render.write(frame.tobytes(), chunk_len)
+                    self._audio_render.write(chunk.tobytes(), chunk_len)
                 except Exception as e:
                     logging.error(f'Error writing audio frame: {e}')
diff --git a/tts/tts_audio_handle.py b/tts/tts_audio_handle.py
index 5c5d193..bd7643c 100644
--- a/tts/tts_audio_handle.py
+++ b/tts/tts_audio_handle.py
@@ -70,6 +70,7 @@ class TTSAudioSplitHandle(TTSAudioHandle):
             return
 
         s, txt = stream
+        current = 0
         with self._lock:
             if len(self._priority_queue) != 0:
                 current = self._priority_queue[0][0]
@@ -90,15 +91,17 @@ class TTSAudioSplitHandle(TTSAudioHandle):
                 idx += self._chunk
             if not self._is_running:
                 return
-            heapq.heappush(self._priority_queue, (index, chunks))
+            heapq.heappush(self._priority_queue, (index, (chunks, txt)))
 
         print('TTSAudioSplitHandle::on_handle', index, current, self._current, len(self._priority_queue))
         if current == self._current:
             self._current = self._current + 1
             chunks = heapq.heappop(self._priority_queue)[1]
+            chunks, txt = chunks
+
             if chunks is not None:
                 for chunk in chunks:
-                    self.on_next_handle(chunk, 0)
+                    self.on_next_handle((chunk, txt), 0)
 
     def stop(self):
         self._is_running = False
diff --git a/utils/async_task_queue.py b/utils/async_task_queue.py
index ff99f45..05c5e93 100644
--- a/utils/async_task_queue.py
+++ b/utils/async_task_queue.py
@@ -45,7 +45,7 @@ class AsyncTaskQueue:
 
                 await func(*args)  # Execute async function
             except Exception as e:
-                logging.error(f'{self._name} error: {e}')
+                logging.error(f'{self._name} error: {repr(e)}')
             finally:
                 self._queue.task_done()
 
@@ -68,4 +68,4 @@ class AsyncTaskQueue:
 
     def stop(self):
         self.stop_workers()
-        self._thread.join()
\ No newline at end of file
+        self._thread.join()