From 3f833a3c4299a32fd62018ffb9a1b4ff858a10e8 Mon Sep 17 00:00:00 2001 From: brige Date: Sun, 27 Oct 2024 13:51:22 +0800 Subject: [PATCH] modify render --- asr/sherpa_ncnn_asr.py | 3 ++- human/audio_inference_handler.py | 1 - human/audio_mal_handler.py | 2 +- human/human_render.py | 1 - render/video_render.py | 29 ++++++++++++++++++----------- render/voice_render.py | 12 ++++++++++-- 6 files changed, 31 insertions(+), 17 deletions(-) diff --git a/asr/sherpa_ncnn_asr.py b/asr/sherpa_ncnn_asr.py index cb94bff..6e96400 100644 --- a/asr/sherpa_ncnn_asr.py +++ b/asr/sherpa_ncnn_asr.py @@ -57,12 +57,13 @@ class SherpaNcnnAsr(AsrBase): def _recognize_loop(self): segment_id = 0 + time.sleep(3) last_result = "" logger.info(f'_recognize_loop') while not self._stop_event.is_set(): self._notify_complete('中国人民万岁') segment_id += 1 - time.sleep(10) + time.sleep(60) # # with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s: # while not self._stop_event.is_set(): diff --git a/human/audio_inference_handler.py b/human/audio_inference_handler.py index befe88b..a363cbc 100644 --- a/human/audio_inference_handler.py +++ b/human/audio_inference_handler.py @@ -36,7 +36,6 @@ class AudioInferenceHandler(AudioHandler): self._audio_queue.put(stream) def on_message(self, message): - print('human render notify:', message) super().on_message(message) def __on_run(self): diff --git a/human/audio_mal_handler.py b/human/audio_mal_handler.py index 34cc377..5cad0e0 100644 --- a/human/audio_mal_handler.py +++ b/human/audio_mal_handler.py @@ -36,7 +36,7 @@ class AudioMalHandler(AudioHandler): if self._wait: self._wait = False self._condition.notify() - print('AudioMalHandler notify') + print('AudioMalHandler notify') elif message['msg_id'] == MessageType.Video_Render_Queue_Full: if not self._wait: self._wait = True diff --git a/human/human_render.py b/human/human_render.py index a8bc60b..bb497db 100644 --- a/human/human_render.py +++ b/human/human_render.py @@ -34,7 +34,6 @@ class HumanRender(AudioHandler): self._image_render.on_render(image) def on_message(self, message): - print('human render notify:', message) super().on_message(message) def on_handle(self, stream, index): diff --git a/render/video_render.py b/render/video_render.py index e6ccb7b..0a27ff8 100644 --- a/render/video_render.py +++ b/render/video_render.py @@ -15,30 +15,37 @@ class VideoRender(BaseRender): def __init__(self, play_clock, context, human_render): super().__init__(play_clock, context, 'Video') self._human_render = human_render + self._diff_avg_count = 0 def _run_step(self): while self._exit_event.is_set(): try: - frame, ps = self._queue.get(block=True, timeout=0.01) + frame, ps = self._queue.get(block=True, timeout=0.02) res_frame, idx, type_ = frame except Empty: return clock_time = self._play_clock.clock_time() time_difference = clock_time - ps + if abs(time_difference) > self._play_clock.audio_diff_threshold: + if self._diff_avg_count < 10: + self._diff_avg_count += 1 + else: + if time_difference < -self._play_clock.audio_diff_threshold: + sleep_time = abs(time_difference ) + # print("Video frame waiting to catch up with audio", sleep_time) + if sleep_time <= 1.0: + time.sleep(sleep_time) - print('video render:', ps, ' ', clock_time, ' ', time_difference) - if time_difference < -self._play_clock.audio_diff_threshold: - sleep_time = abs(time_difference + self._play_clock.audio_diff_threshold) - print("Video frame waiting to catch up with audio", sleep_time) - if sleep_time > 0: - time.sleep(sleep_time) + # elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms + # print("Video frame dropped to catch up with audio") + # continue - elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms - print("Video frame dropped to catch up with audio") - continue + else: + self._diff_avg_count = 0 - print('get face', self._queue.qsize()) + print('video render:', ps, ' ', clock_time, ' ', time_difference, + 'get face', self._queue.qsize(), self._diff_avg_count) if type_ == 0: combine_frame = self._context.frame_list_cycle[idx] diff --git a/render/voice_render.py b/render/voice_render.py index 5e0dde5..53cd248 100644 --- a/render/voice_render.py +++ b/render/voice_render.py @@ -14,8 +14,9 @@ logger = logging.getLogger(__name__) class VoiceRender(BaseRender): def __init__(self, play_clock, context): - super().__init__(play_clock, context, 'Voice') self._audio_render = AudioRender() + self._is_empty = True + super().__init__(play_clock, context, 'Voice') def is_full(self): return self._queue.qsize() >= self._context.render_batch * 2 @@ -23,11 +24,18 @@ class VoiceRender(BaseRender): def _run_step(self): try: audio_frames, ps = self._queue.get(block=True, timeout=0.01) - print('voice render queue size', self._queue.qsize()) + # print('voice render queue size', self._queue.qsize()) except Empty: self._context.notify({'msg_id': MessageType.Video_Render_Queue_Empty}) + if not self._is_empty: + print('voice render queue empty') + self._is_empty = True return + if self._is_empty: + print('voice render queue not empty') + self._is_empty = False + status = MessageType.Video_Render_Queue_Not_Empty if self._queue.qsize() < self._context.render_batch: status = MessageType.Video_Render_Queue_Empty