human/human/human_render.py
2024-10-24 08:53:11 +08:00

105 lines
3.3 KiB
Python

#encoding = utf8
import copy
import logging
import queue
import time
from queue import Queue
from threading import Thread, Event
import cv2
import numpy as np
from human_handler import AudioHandler
from render import VoiceRender, VideoRender, PlayClock
logger = logging.getLogger(__name__)
class HumanRender(AudioHandler):
def __init__(self, context, handler):
super().__init__(context, handler)
play_clock = PlayClock()
self._voice_render = VoiceRender(play_clock)
self._video_render = VideoRender(play_clock, context, self)
self._queue = Queue(context.batch_size * 2)
self._image_render = None
def set_image_render(self, render):
self._image_render = render
def put_image(self, image):
if self._image_render is not None:
self._image_render.on_render(image)
def on_handle(self, stream, index):
res_frame, idx, audio_frames = stream
self._voice_render.put(audio_frames)
type_ = 1
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
type_ = 0
self._video_render.put((res_frame, idx, type_))
def pause_handle(self):
if self._video_render.size() > self._context.batch_size * 2:
super().pause_handle()
def pause_talk(self):
self._voice_render.pause_talk()
self._video_render.pause_talk()
def stop(self):
self._voice_render.stop()
self._video_render.stop()
# self._exit_event.clear()
# self._thread.join()
'''
self._exit_event = Event()
self._thread = Thread(target=self._on_run)
self._exit_event.set()
self._thread.start()
def _on_run(self):
logging.info('human render run')
while self._exit_event.is_set():
self._run_step()
time.sleep(0.02)
logging.info('human render exit')
def _run_step(self):
try:
res_frame, idx, audio_frames = self._queue.get(block=True, timeout=.002)
except queue.Empty:
# print('render queue.Empty:')
return None
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
combine_frame = self._context.frame_list_cycle[idx]
else:
bbox = self._context.coord_list_cycle[idx]
combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
y1, y2, x1, x2 = bbox
try:
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
except:
return
# combine_frame = get_image(ori_frame,res_frame,bbox)
# t=time.perf_counter()
combine_frame[y1:y2, x1:x2] = res_frame
image = combine_frame
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if self._image_render is not None:
self._image_render.on_render(image)
for audio_frame in audio_frames:
frame, type_ = audio_frame
frame = (frame * 32767).astype(np.int16)
if self._audio_render is not None:
self._audio_render.write(frame.tobytes(), int(frame.shape[0]*2))
# new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
# new_frame.planes[0].update(frame.tobytes())
# new_frame.sample_rate = 16000
'''