diff --git a/human/human_render.py b/human/human_render.py index 8bd8537..68b561b 100644 --- a/human/human_render.py +++ b/human/human_render.py @@ -1,10 +1,14 @@ #encoding = utf8 import logging +import time +from queue import Empty +from threading import Event, Thread from human.message_type import MessageType from human_handler import AudioHandler from render import VoiceRender, VideoRender, PlayClock +from utils import SyncQueue logger = logging.getLogger(__name__) @@ -16,10 +20,44 @@ class HumanRender(AudioHandler): play_clock = PlayClock() self._voice_render = VoiceRender(play_clock, context) self._video_render = VideoRender(play_clock, context, self) + self._queue = SyncQueue(context.batch_size, "HumanRender_queue") + self._exit_event = Event() + self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread") + self._exit_event.set() + self._thread.start() self._image_render = None self._last_audio_ps = 0 self._last_video_ps = 0 + def _on_run(self): + logging.info('human render run') + while self._exit_event.is_set(): + self._run_step() + time.sleep(0.035) + + logging.info('human render exit') + + def _run_step(self): + try: + value = self._queue.get() + if value is None: + return + res_frame, idx, audio_frames = value + # print('voice render queue size', self._queue.size()) + except Empty: + print('render queue.Empty:') + return + + type_ = 1 + if audio_frames[0][1] != 0 and audio_frames[1][1] != 0: + type_ = 0 + if self._voice_render is not None: + self._voice_render.render(audio_frames, self._last_audio_ps) + self._last_audio_ps = self._last_audio_ps + 0.4 + if self._video_render is not None: + self._video_render.render((res_frame, idx, type_), self._last_video_ps) + self._last_video_ps = self._last_video_ps + 0.4 + def set_image_render(self, render): self._image_render = render @@ -31,17 +69,18 @@ class HumanRender(AudioHandler): super().on_message(message) def on_handle(self, stream, index): - res_frame, idx, audio_frames = stream - self._voice_render.put(audio_frames, self._last_audio_ps) - self._last_audio_ps = self._last_audio_ps + 0.4 - type_ = 1 - if audio_frames[0][1] != 0 and audio_frames[1][1] != 0: - type_ = 0 - self._video_render.put((res_frame, idx, type_), self._last_video_ps) - self._last_video_ps = self._last_video_ps + 0.4 - - if self._voice_render.is_full(): - self._context.notify({'msg_id': MessageType.Video_Render_Queue_Full}) + self._queue.put(stream) + # res_frame, idx, audio_frames = stream + # self._voice_render.put(audio_frames, self._last_audio_ps) + # self._last_audio_ps = self._last_audio_ps + 0.4 + # type_ = 1 + # if audio_frames[0][1] != 0 and audio_frames[1][1] != 0: + # type_ = 0 + # self._video_render.put((res_frame, idx, type_), self._last_video_ps) + # self._last_video_ps = self._last_video_ps + 0.4 + # + # if self._voice_render.is_full(): + # self._context.notify({'msg_id': MessageType.Video_Render_Queue_Full}) def get_audio_queue_size(self): return self._voice_render.size() @@ -51,8 +90,16 @@ class HumanRender(AudioHandler): self._video_render.pause_talk() def stop(self): - self._voice_render.stop() - self._video_render.stop() + logging.info('hunan render stop') + if self._exit_event is None: + return + + self._exit_event.clear() + if self._thread.is_alive(): + self._thread.join() + logging.info('hunan render stop') + # self._voice_render.stop() + # self._video_render.stop() # self._exit_event.clear() # self._thread.join() ''' diff --git a/render/base_render.py b/render/base_render.py index e9c80ff..7534cb1 100644 --- a/render/base_render.py +++ b/render/base_render.py @@ -16,17 +16,21 @@ class BaseRender(ABC): self._context = context self._type = type_ self._delay = delay - self._queue = SyncQueue(context.batch_size, f'{type_}RenderQueue') - self._exit_event = Event() - self._thread = Thread(target=self._on_run, name=thread_name) - self._exit_event.set() - self._thread.start() + # self._queue = SyncQueue(context.batch_size, f'{type_}RenderQueue') + # self._exit_event = Event() + # self._thread = Thread(target=self._on_run, name=thread_name) + # self._exit_event.set() + # self._thread.start() + + @abstractmethod + def render(self, frame, ps): + pass def _on_run(self): logging.info(f'{self._type} render run') - while self._exit_event.is_set(): - self._run_step() - time.sleep(self._delay) + # while self._exit_event.is_set(): + # self._run_step() + # time.sleep(self._delay) logging.info(f'{self._type} render exit') diff --git a/render/video_render.py b/render/video_render.py index ccc68b0..57135ea 100644 --- a/render/video_render.py +++ b/render/video_render.py @@ -17,6 +17,46 @@ class VideoRender(BaseRender): self._human_render = human_render self._diff_avg_count = 0 + def render(self, frame, ps): + res_frame, idx, type_ = frame + clock_time = self._play_clock.clock_time() + time_difference = clock_time - ps + print("Video frame time", clock_time, ps, time_difference) + if abs(time_difference) > self._play_clock.audio_diff_threshold: + if self._diff_avg_count < 5: + self._diff_avg_count += 1 + else: + if time_difference < -self._play_clock.audio_diff_threshold: + sleep_time = abs(time_difference) + print("Video frame waiting to catch up with audio", sleep_time) + if sleep_time <= 1.0: + time.sleep(sleep_time) + + # elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms + # print("Video frame dropped to catch up with audio") + # continue + + else: + self._diff_avg_count = 0 + + if type_ == 0: + combine_frame = self._context.frame_list_cycle[idx] + else: + bbox = self._context.coord_list_cycle[idx] + combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx]) + y1, y2, x1, x2 = bbox + try: + res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1)) + except: + print('resize error') + return + combine_frame[y1:y2, x1:x2, :3] = res_frame + + image = combine_frame + # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + if self._human_render is not None: + self._human_render.put_image(image) + def _run_step(self): while self._exit_event.is_set(): try: diff --git a/render/voice_render.py b/render/voice_render.py index 6ea22f5..19b776a 100644 --- a/render/voice_render.py +++ b/render/voice_render.py @@ -21,6 +21,22 @@ class VoiceRender(BaseRender): def is_full(self): return self._queue.size() >= self._context.render_batch * 2 + def render(self, frame, ps): + self._play_clock.update_display_time() + self._play_clock.current_time = ps + + for audio_frame in frame: + frame, type_ = audio_frame + frame = (frame * 32767).astype(np.int16) + + if self._audio_render is not None: + try: + chunk_len = int(frame.shape[0] * 2) + # print('audio frame:', frame.shape, chunk_len) + self._audio_render.write(frame.tobytes(), chunk_len) + except Exception as e: + logging.error(f'Error writing audio frame: {e}') + def _run_step(self): try: value = self._queue.get()