diff --git a/asr/sherpa_ncnn_asr.py b/asr/sherpa_ncnn_asr.py index 5c7a25f..629f90e 100644 --- a/asr/sherpa_ncnn_asr.py +++ b/asr/sherpa_ncnn_asr.py @@ -27,6 +27,7 @@ class SherpaNcnnAsr(AsrBase): super().__init__() self._recognizer = self._create_recognizer() logger.info('SherpaNcnnAsr init') + print('SherpaNcnnAsr init') def __del__(self): self.__del__() @@ -60,17 +61,10 @@ class SherpaNcnnAsr(AsrBase): time.sleep(3) last_result = "" logger.info(f'_recognize_loop') - while self._stop_event.is_set(): - logger.info(f'_recognize_loop000') - self._notify_complete('介绍中国5000年历史文学') - logger.info(f'_recognize_loop111') - segment_id += 1 - time.sleep(60) - logger.info(f'_recognize_loop222') - logger.info(f'_recognize_loop exit') -''' + print(f'_recognize_loop') + with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s: - while not self._stop_event.is_set(): + while self._stop_event.is_set(): samples, _ = s.read(self._samples_per_read) # a blocking read samples = samples.reshape(-1) self._recognizer.accept_waveform(self._sample_rate, samples) @@ -89,4 +83,13 @@ class SherpaNcnnAsr(AsrBase): self._notify_complete(result) segment_id += 1 self._recognizer.reset() +''' + while self._stop_event.is_set(): + logger.info(f'_recognize_loop000') + self._notify_complete('介绍中国5000年历史文学') + logger.info(f'_recognize_loop111') + segment_id += 1 + time.sleep(60) + logger.info(f'_recognize_loop222') + logger.info(f'_recognize_loop exit') ''' diff --git a/face/img00016.jpg b/face/img00016.jpg deleted file mode 100644 index 3bc141f..0000000 Binary files a/face/img00016.jpg and /dev/null differ diff --git a/face/img00020.png.bak b/face/img00020.png.bak deleted file mode 100644 index 22d9e15..0000000 Binary files a/face/img00020.png.bak and /dev/null differ diff --git a/human/audio_inference_handler.py b/human/audio_inference_handler.py index f24bce4..899fd2d 100644 --- a/human/audio_inference_handler.py +++ b/human/audio_inference_handler.py @@ -75,7 +75,7 @@ class AudioInferenceHandler(AudioHandler): batch_size = self._context.batch_size try: mel_batch = self._mal_queue.get(timeout=0.02) - print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size()) + # print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size()) except queue.Empty: continue @@ -100,7 +100,6 @@ class AudioInferenceHandler(AudioHandler): self.on_next_handle((None, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]), 0) index = index + 1 - print('AudioInferenceHandler all silence') else: logger.info('infer=======') t = time.perf_counter() diff --git a/human/audio_mal_handler.py b/human/audio_mal_handler.py index ed752ae..59bc98f 100644 --- a/human/audio_mal_handler.py +++ b/human/audio_mal_handler.py @@ -20,15 +20,16 @@ class AudioMalHandler(AudioHandler): EventBus().register('stop', self._on_stop) - self._queue = SyncQueue(context.batch_size, "AudioMalHandler_queue") - self._exit_event = Event() - self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread") - self._exit_event.set() - self._thread.start() + self._is_running = True + self._queue = SyncQueue(context.batch_size * 2, "AudioMalHandler_queue") self.frames = [] self.chunk = context.sample_rate // context.fps - self._is_running = True + + self._exit_event = Event() + self._exit_event.set() + self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread") + self._thread.start() logger.info("AudioMalHandler init") def __del__(self): diff --git a/human/human_render.py b/human/human_render.py index a627ff7..af01588 100644 --- a/human/human_render.py +++ b/human/human_render.py @@ -42,8 +42,14 @@ class HumanRender(AudioHandler): def _on_run(self): logging.info('human render run') while self._exit_event.is_set() and self._is_running: + # t = time.time() self._run_step() - time.sleep(0.038) + # delay = time.time() - t + delay = 0.03805 # - delay + # print(delay) + # if delay <= 0.0: + # continue + time.sleep(delay) logging.info('human render exit') @@ -53,7 +59,7 @@ class HumanRender(AudioHandler): if value is None: return res_frame, idx, audio_frames = value - print('voice render queue size', self._queue.size()) + # print('render queue size', self._queue.size()) if not self._empty_log: self._empty_log = True logging.info('render render:') diff --git a/nlp/nlp_base.py b/nlp/nlp_base.py index bf11ab5..62f91fc 100644 --- a/nlp/nlp_base.py +++ b/nlp/nlp_base.py @@ -55,7 +55,7 @@ class NLPBase(AsrObserver): def ask(self, question): logger.info(f'ask:{question}') self._is_running = True - task = self._ask_queue.add_task(self._request, question) + self._ask_queue.add_task(self._request, question) logger.info(f'ask:{question} completed') def stop(self): diff --git a/render/video_render.py b/render/video_render.py index 61ac308..2b53137 100644 --- a/render/video_render.py +++ b/render/video_render.py @@ -15,28 +15,9 @@ class VideoRender(BaseRender): def __init__(self, play_clock, context, human_render): super().__init__(play_clock, context, 'Video') self._human_render = human_render - self._diff_avg_count = 0 def render(self, frame, ps): res_frame, idx, type_ = frame - clock_time = self._play_clock.clock_time() - time_difference = clock_time - ps - if abs(time_difference) > self._play_clock.audio_diff_threshold: - if self._diff_avg_count < 5: - self._diff_avg_count += 1 - else: - if time_difference < -self._play_clock.audio_diff_threshold: - sleep_time = abs(time_difference) - print("Video frame waiting to catch up with audio", sleep_time) - if sleep_time <= 1.0: - time.sleep(sleep_time) - - # elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms - # print("Video frame dropped to catch up with audio") - # continue - - else: - self._diff_avg_count = 0 if type_ == 0: combine_frame = self._context.frame_list_cycle[idx] diff --git a/test/test_mzzsfy_tts.py b/test/test_mzzsfy_tts.py index 5211ca2..a244b2a 100644 --- a/test/test_mzzsfy_tts.py +++ b/test/test_mzzsfy_tts.py @@ -44,7 +44,8 @@ def main(): from io import BytesIO async def fetch_audio(): - url = "http://localhost:8082/v1/audio/speech" + # url = "http://localhost:8082/v1/audio/speech" + url = "https://tts.mzzsfy.eu.org/v1/audio/speech" data = { "model": "tts-1", "input": "写了一个高性能tts(文本转声音)工具,5千字仅需5秒,免费使用", diff --git a/tts/tts_base.py b/tts/tts_base.py index 940e399..5d14ae6 100644 --- a/tts/tts_base.py +++ b/tts/tts_base.py @@ -32,13 +32,13 @@ class TTSBase(NLPCallback): self._handle = value async def _request(self, txt: str, index): - print('_request:', txt) + # print('_request:', txt) t = time.time() stream = await self._on_request(txt) if stream is None: - print(f'-------stream is None') + logger.warn(f'-------stream is None') return - print(f'-------tts time:{time.time() - t:.4f}s') + logger.info(f'-------tts time:{time.time() - t:.4f}s, txt:{txt}') if self._handle is not None and self._is_running: await self._on_handle(stream, index) else: @@ -59,13 +59,13 @@ class TTSBase(NLPCallback): def message(self, txt): txt = txt.strip() if len(txt) == 0: - logger.info(f'message is empty') + # logger.info(f'message is empty') return logger.info(f'message:{txt}') index = 0 if self._handle is not None: index = self._handle.get_index() - print(f'message txt-index:{txt}, index {index}') + # print(f'message txt-index:{txt}, index {index}') self._message_queue.add_task(self._request, txt, index) def stop(self): diff --git a/tts/tts_edge_http.py b/tts/tts_edge_http.py index c1b9dbb..1410e1f 100644 --- a/tts/tts_edge_http.py +++ b/tts/tts_edge_http.py @@ -22,7 +22,7 @@ class TTSEdgeHttp(TTSBase): logger.info(f"TTSEdge init, {voice}") async def _on_request(self, txt: str): - print('TTSEdgeHttp, _on_request, txt:', txt) + logger.info(f'TTSEdgeHttp, _on_request, txt:{txt}') data = { "model": "tts-1", "input": txt, @@ -38,7 +38,7 @@ class TTSEdgeHttp(TTSBase): return stream else: byte_stream = None - return byte_stream + return byte_stream, None async def _on_handle(self, stream, index): print('-------tts _on_handle') diff --git a/ui/pygame_ui.py b/ui/pygame_ui.py index 6dca3ec..9898483 100644 --- a/ui/pygame_ui.py +++ b/ui/pygame_ui.py @@ -55,7 +55,11 @@ class PyGameUI: if self._queue.empty(): return image = self._queue.get() - self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], "RGB") + color_format = "RGB" + if 4 == image.shape[2]: + color_format = "RGBA" + + self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], color_format) def stop(self): logger.info('stop') diff --git a/utils/async_task_queue.py b/utils/async_task_queue.py index f95317b..ff99f45 100644 --- a/utils/async_task_queue.py +++ b/utils/async_task_queue.py @@ -45,7 +45,7 @@ class AsyncTaskQueue: await func(*args) # Execute async function except Exception as e: - logging.error(f'{self._name} error:', e) + logging.error(f'{self._name} error: {e}') finally: self._queue.task_done() diff --git a/utils/utils.py b/utils/utils.py index 78115b5..28c3a19 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -194,7 +194,7 @@ def config_logging(file_name: str, console_level: int = logging.INFO, file_level console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter( - '[%(asctime)s %(levelname)s] %(message)s', + '[%(asctime)s.%(msecs)03d %(levelname)s] %(message)s', datefmt="%Y/%m/%d %H:%M:%S" )) console_handler.setLevel(console_level)