diff --git a/asr/sherpa_ncnn_asr.py b/asr/sherpa_ncnn_asr.py index 6e96400..35df115 100644 --- a/asr/sherpa_ncnn_asr.py +++ b/asr/sherpa_ncnn_asr.py @@ -61,7 +61,7 @@ class SherpaNcnnAsr(AsrBase): last_result = "" logger.info(f'_recognize_loop') while not self._stop_event.is_set(): - self._notify_complete('中国人民万岁') + self._notify_complete('介绍中国5000年历史文学') segment_id += 1 time.sleep(60) # diff --git a/face/img00016.jpg b/face/img00016.jpg deleted file mode 100644 index 3bc141f..0000000 Binary files a/face/img00016.jpg and /dev/null differ diff --git a/human/audio_inference_handler.py b/human/audio_inference_handler.py index fbef781..dc78aea 100644 --- a/human/audio_inference_handler.py +++ b/human/audio_inference_handler.py @@ -40,7 +40,7 @@ class AudioInferenceHandler(AudioHandler): super().on_message(message) def __on_run(self): - wav2lip_path = os.path.join(current_file_path, '..', 'checkpoints', 'wav2lip.pth') + wav2lip_path = os.path.join(current_file_path, '..', 'checkpoints', 'wav2lip_gan.pth') logger.info(f'AudioInferenceHandler init, path:{wav2lip_path}') model = load_model(wav2lip_path) logger.info("Model loaded") diff --git a/human/human_render.py b/human/human_render.py index ae9b211..6f88673 100644 --- a/human/human_render.py +++ b/human/human_render.py @@ -17,7 +17,8 @@ class HumanRender(AudioHandler): self._voice_render = VoiceRender(play_clock, context) self._video_render = VideoRender(play_clock, context, self) self._image_render = None - self._last_ps = 0 + self._last_audio_ps = 0 + self._last_video_ps = 0 def set_image_render(self, render): self._image_render = render @@ -31,12 +32,13 @@ class HumanRender(AudioHandler): def on_handle(self, stream, index): res_frame, idx, audio_frames = stream - self._voice_render.put(audio_frames, self._last_ps) + self._voice_render.put(audio_frames, self._last_audio_ps) + self._last_audio_ps = self._last_audio_ps + 0.2 type_ = 1 if audio_frames[0][1] != 0 and audio_frames[1][1] != 0: type_ = 0 - self._video_render.put((res_frame, idx, type_), self._last_ps) - self._last_ps = self._last_ps + 0.2 + self._video_render.put((res_frame, idx, type_), self._last_video_ps) + self._last_video_ps = self._last_video_ps + 0.4 if self._voice_render.is_full(): self._context.notify({'msg_id': MessageType.Video_Render_Queue_Full}) diff --git a/nlp/nlp_doubao.py b/nlp/nlp_doubao.py index d11f786..a8a9a3c 100644 --- a/nlp/nlp_doubao.py +++ b/nlp/nlp_doubao.py @@ -44,7 +44,7 @@ class DouBao(NLPBase): sec = '' async for completion in stream: sec = sec + completion.choices[0].delta.content - # print(sec) + print('DouBao content:', sec) sec, message = self._split_handle.handle(sec) if len(message) > 0: self._on_callback(message) diff --git a/nlp/nlp_split.py b/nlp/nlp_split.py index 930ff46..75f6327 100644 --- a/nlp/nlp_split.py +++ b/nlp/nlp_split.py @@ -14,6 +14,7 @@ class PunctuationSplit(NLPSplit): self._pattern = r'(?