diff --git a/human/human_render.py b/human/human_render.py index 9f0dd9f..aa0a0c6 100644 --- a/human/human_render.py +++ b/human/human_render.py @@ -10,6 +10,7 @@ import cv2 import numpy as np from human_handler import AudioHandler +from render import VoiceRender, VideoRender, PlayClock logger = logging.getLogger(__name__) @@ -18,10 +19,33 @@ class HumanRender(AudioHandler): def __init__(self, context, handler): super().__init__(context, handler) + play_clock = PlayClock() + self._voice_render = VoiceRender(play_clock) + self._video_render = VideoRender(play_clock, context, self) self._queue = Queue(context.batch_size * 2) - self._audio_render = None self._image_render = None + def set_image_render(self, render): + self._image_render = render + + def put_image(self, image): + if self._image_render is not None: + self._image_render.on_render(image) + + def on_handle(self, stream, index): + res_frame, idx, audio_frames = stream + self._voice_render.put(audio_frames) + type_ = 1 + if audio_frames[0][1] != 0 and audio_frames[1][1] != 0: + type_ = 1 + self._video_render.put((res_frame, idx, type_)) + + def stop(self): + self._voice_render.stop() + self._video_render.stop() + # self._exit_event.clear() + # self._thread.join() +''' self._exit_event = Event() self._thread = Thread(target=self._on_run) self._exit_event.set() @@ -69,16 +93,4 @@ class HumanRender(AudioHandler): # new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0]) # new_frame.planes[0].update(frame.tobytes()) # new_frame.sample_rate = 16000 - - def set_audio_render(self, render): - self._audio_render = render - - def set_image_render(self, render): - self._image_render = render - - def on_handle(self, stream, index): - self._queue.put(stream) - - def stop(self): - self._exit_event.clear() - self._thread.join() +''' diff --git a/render/__init__.py b/render/__init__.py index ef7d113..8d7f244 100644 --- a/render/__init__.py +++ b/render/__init__.py @@ -1,4 +1,5 @@ #encoding = utf8 -from .audio_render import AudioRenderImpl -from .video_render import VideoRenderImpl +from .voice_render import VoiceRender +from .video_render import VideoRender +from .play_clock import PlayClock diff --git a/render/audio_render.py b/render/audio_render.py deleted file mode 100644 index 6ac145b..0000000 --- a/render/audio_render.py +++ /dev/null @@ -1,69 +0,0 @@ -#encoding = utf8 -import logging -import time -from queue import Queue, Empty -from threading import Event, Thread - -import numpy as np - -from audio_render import AudioRender -from base_render import BaseRender - -logger = logging.getLogger(__name__) - - -class AudioRenderImpl(BaseRender): - def __init__(self, start): - super().__init__(start) - - self._queue = Queue() - self._exit_event = Event() - self._thread = Thread(target=self._on_run) - self._exit_event.set() - self._thread.start() - self._audio_render = AudioRender() - self._current_time = 0 - self._display_time = 0 - - def _on_run(self): - logging.info('Audio render run') - while self._exit_event.is_set(): - self._run_step() - time.sleep(0.02) - - logging.info('Audio render exit') - - def _run_step(self): - try: - audio_frames, ps = self._queue.get(block=True, timeout=0.01) - except Empty: - return - - self._display_time = time.time() - self._current_time = ps - - for audio_frame in audio_frames: - frame, type_ = audio_frame - frame = (frame * 32767).astype(np.int16) - - if self._audio_render is not None: - try: - self._audio_render.write(frame.tobytes(), int(frame.shape[0] * 2)) - except Exception as e: - logging.error(f'Error writing audio frame: {e}') - - def put(self, frame): - ps = time.time() - self._start - self._queue.put_nowait((frame, ps)) - - def stop(self): - self._exit_event.clear() - self._thread.join() - - def play_time(self): - elapsed = time.time() - self._display_time - return self._current_time + elapsed - - - - diff --git a/render/base_render.py b/render/base_render.py index fa8f7fa..4138ad7 100644 --- a/render/base_render.py +++ b/render/base_render.py @@ -1,18 +1,41 @@ #encoding = utf8 - +import logging +import time from abc import ABC, abstractmethod +from queue import Queue +from threading import Event, Thread + +logger = logging.getLogger(__name__) class BaseRender(ABC): - def __init__(self, start): - self._start = start + def __init__(self, play_clock): + self._play_clock = play_clock + self._queue = Queue() + self._exit_event = Event() + self._thread = Thread(target=self._on_run) + self._exit_event.set() + self._thread.start() + + def _on_run(self): + logging.info('Audio render run') + while self._exit_event.is_set(): + self.__run_step() + time.sleep(0.02) + + logging.info('Audio render exit') - @abstractmethod def put(self, frame): - pass + ps = time.time() - self._play_clock.start_time() + self._queue.put_nowait((frame, ps)) + + def stop(self): + self._queue.queue.clear() + self._exit_event.clear() + self._thread.join() @abstractmethod - def stop(self): + def __run_step(self): pass diff --git a/render/play_clock.py b/render/play_clock.py new file mode 100644 index 0000000..0fc02c6 --- /dev/null +++ b/render/play_clock.py @@ -0,0 +1,37 @@ +#encoding = utf8 +import time + + +class PlayClock: + def __init__(self): + self._start = time.time() + self._current_time = 0 + self._display_time = 0 + self._audio_diff_threshold = 0.01 + + @property + def start_time(self): + return self._start + + @property + def current_time(self): + return self._current_time + + @property + def audio_diff_threshold(self): + return self._audio_diff_threshold + + @current_time.setter + def current_time(self, v): + self._current_time = v + + @property + def display_time(self): + return self._display_time + + def update_display_time(self): + self._display_time = time.time() + + def clock_time(self): + elapsed = time.time() - self._display_time + return self.current_time + elapsed diff --git a/render/video_render.py b/render/video_render.py index a862f60..8e5c215 100644 --- a/render/video_render.py +++ b/render/video_render.py @@ -1,7 +1,48 @@ #encoding = utf8 +import copy +from queue import Empty + +import cv2 +import numpy as np + from base_render import BaseRender -class VideoRenderImpl(BaseRender): - def __init__(self, start): - super().__init__(start) +class VideoRender(BaseRender): + def __init__(self, play_clock, context, human_render): + super().__init__(play_clock) + self._context = context + self._human_render = human_render + + def __run_step(self): + try: + res_frame, idx, type_, ps = self._queue.get(block=True, timeout=0.01) + except Empty: + return + + if type_ != 0: + combine_frame = self._context.frame_list_cycle[idx] + else: + bbox = self._context.coord_list_cycle[idx] + combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx]) + y1, y2, x1, x2 = bbox + try: + res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1)) + except: + return + # combine_frame = get_image(ori_frame,res_frame,bbox) + # t=time.perf_counter() + combine_frame[y1:y2, x1:x2] = res_frame + + clock_time = self._play_clock.clock_time() + time_difference = abs(clock_time - ps) + if time_difference > self._play_clock.audio_diff_threshold: + print('video is slow') + return + # elif time_difference < self._play_clock.audio_diff_threshold: + + image = combine_frame + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + if self._human_render is not None: + self._human_render.put_image(image) + diff --git a/render/voice_render.py b/render/voice_render.py new file mode 100644 index 0000000..0a703b0 --- /dev/null +++ b/render/voice_render.py @@ -0,0 +1,36 @@ +#encoding = utf8 +import logging +import time +from queue import Empty + +import numpy as np + +from audio_render import AudioRender +from base_render import BaseRender + +logger = logging.getLogger(__name__) + + +class VoiceRender(BaseRender): + def __init__(self, play_clock): + super().__init__(play_clock) + self._audio_render = AudioRender() + + def __run_step(self): + try: + audio_frames, ps = self._queue.get(block=True, timeout=0.01) + except Empty: + return + + self._play_clock.update_display_time() + self._play_clock.current_time = ps + + for audio_frame in audio_frames: + frame, type_ = audio_frame + frame = (frame * 32767).astype(np.int16) + + if self._audio_render is not None: + try: + self._audio_render.write(frame.tobytes(), int(frame.shape[0] * 2)) + except Exception as e: + logging.error(f'Error writing audio frame: {e}') diff --git a/ui.py b/ui.py index 200eb29..c9dd9fa 100644 --- a/ui.py +++ b/ui.py @@ -61,7 +61,6 @@ class App(customtkinter.CTk): self._init_image_canvas() - self._audio_render = AudioRender() # self._human = Human() self._queue = Queue() self._human_context = HumanContext()