modify video && voice render

This commit is contained in:
brige 2024-10-22 19:57:30 +08:00
parent b55bf34a74
commit 7e4550717f
8 changed files with 175 additions and 95 deletions

View File

@ -10,6 +10,7 @@ import cv2
import numpy as np import numpy as np
from human_handler import AudioHandler from human_handler import AudioHandler
from render import VoiceRender, VideoRender, PlayClock
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -18,10 +19,33 @@ class HumanRender(AudioHandler):
def __init__(self, context, handler): def __init__(self, context, handler):
super().__init__(context, handler) super().__init__(context, handler)
play_clock = PlayClock()
self._voice_render = VoiceRender(play_clock)
self._video_render = VideoRender(play_clock, context, self)
self._queue = Queue(context.batch_size * 2) self._queue = Queue(context.batch_size * 2)
self._audio_render = None
self._image_render = None self._image_render = None
def set_image_render(self, render):
self._image_render = render
def put_image(self, image):
if self._image_render is not None:
self._image_render.on_render(image)
def on_handle(self, stream, index):
res_frame, idx, audio_frames = stream
self._voice_render.put(audio_frames)
type_ = 1
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
type_ = 1
self._video_render.put((res_frame, idx, type_))
def stop(self):
self._voice_render.stop()
self._video_render.stop()
# self._exit_event.clear()
# self._thread.join()
'''
self._exit_event = Event() self._exit_event = Event()
self._thread = Thread(target=self._on_run) self._thread = Thread(target=self._on_run)
self._exit_event.set() self._exit_event.set()
@ -69,16 +93,4 @@ class HumanRender(AudioHandler):
# new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0]) # new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
# new_frame.planes[0].update(frame.tobytes()) # new_frame.planes[0].update(frame.tobytes())
# new_frame.sample_rate = 16000 # new_frame.sample_rate = 16000
'''
def set_audio_render(self, render):
self._audio_render = render
def set_image_render(self, render):
self._image_render = render
def on_handle(self, stream, index):
self._queue.put(stream)
def stop(self):
self._exit_event.clear()
self._thread.join()

View File

@ -1,4 +1,5 @@
#encoding = utf8 #encoding = utf8
from .audio_render import AudioRenderImpl from .voice_render import VoiceRender
from .video_render import VideoRenderImpl from .video_render import VideoRender
from .play_clock import PlayClock

View File

@ -1,69 +0,0 @@
#encoding = utf8
import logging
import time
from queue import Queue, Empty
from threading import Event, Thread
import numpy as np
from audio_render import AudioRender
from base_render import BaseRender
logger = logging.getLogger(__name__)
class AudioRenderImpl(BaseRender):
def __init__(self, start):
super().__init__(start)
self._queue = Queue()
self._exit_event = Event()
self._thread = Thread(target=self._on_run)
self._exit_event.set()
self._thread.start()
self._audio_render = AudioRender()
self._current_time = 0
self._display_time = 0
def _on_run(self):
logging.info('Audio render run')
while self._exit_event.is_set():
self._run_step()
time.sleep(0.02)
logging.info('Audio render exit')
def _run_step(self):
try:
audio_frames, ps = self._queue.get(block=True, timeout=0.01)
except Empty:
return
self._display_time = time.time()
self._current_time = ps
for audio_frame in audio_frames:
frame, type_ = audio_frame
frame = (frame * 32767).astype(np.int16)
if self._audio_render is not None:
try:
self._audio_render.write(frame.tobytes(), int(frame.shape[0] * 2))
except Exception as e:
logging.error(f'Error writing audio frame: {e}')
def put(self, frame):
ps = time.time() - self._start
self._queue.put_nowait((frame, ps))
def stop(self):
self._exit_event.clear()
self._thread.join()
def play_time(self):
elapsed = time.time() - self._display_time
return self._current_time + elapsed

View File

@ -1,18 +1,41 @@
#encoding = utf8 #encoding = utf8
import logging
import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from queue import Queue
from threading import Event, Thread
logger = logging.getLogger(__name__)
class BaseRender(ABC): class BaseRender(ABC):
def __init__(self, start): def __init__(self, play_clock):
self._start = start self._play_clock = play_clock
self._queue = Queue()
self._exit_event = Event()
self._thread = Thread(target=self._on_run)
self._exit_event.set()
self._thread.start()
def _on_run(self):
logging.info('Audio render run')
while self._exit_event.is_set():
self.__run_step()
time.sleep(0.02)
logging.info('Audio render exit')
@abstractmethod
def put(self, frame): def put(self, frame):
pass ps = time.time() - self._play_clock.start_time()
self._queue.put_nowait((frame, ps))
def stop(self):
self._queue.queue.clear()
self._exit_event.clear()
self._thread.join()
@abstractmethod @abstractmethod
def stop(self): def __run_step(self):
pass pass

37
render/play_clock.py Normal file
View File

@ -0,0 +1,37 @@
#encoding = utf8
import time
class PlayClock:
def __init__(self):
self._start = time.time()
self._current_time = 0
self._display_time = 0
self._audio_diff_threshold = 0.01
@property
def start_time(self):
return self._start
@property
def current_time(self):
return self._current_time
@property
def audio_diff_threshold(self):
return self._audio_diff_threshold
@current_time.setter
def current_time(self, v):
self._current_time = v
@property
def display_time(self):
return self._display_time
def update_display_time(self):
self._display_time = time.time()
def clock_time(self):
elapsed = time.time() - self._display_time
return self.current_time + elapsed

View File

@ -1,7 +1,48 @@
#encoding = utf8 #encoding = utf8
import copy
from queue import Empty
import cv2
import numpy as np
from base_render import BaseRender from base_render import BaseRender
class VideoRenderImpl(BaseRender): class VideoRender(BaseRender):
def __init__(self, start): def __init__(self, play_clock, context, human_render):
super().__init__(start) super().__init__(play_clock)
self._context = context
self._human_render = human_render
def __run_step(self):
try:
res_frame, idx, type_, ps = self._queue.get(block=True, timeout=0.01)
except Empty:
return
if type_ != 0:
combine_frame = self._context.frame_list_cycle[idx]
else:
bbox = self._context.coord_list_cycle[idx]
combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
y1, y2, x1, x2 = bbox
try:
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
except:
return
# combine_frame = get_image(ori_frame,res_frame,bbox)
# t=time.perf_counter()
combine_frame[y1:y2, x1:x2] = res_frame
clock_time = self._play_clock.clock_time()
time_difference = abs(clock_time - ps)
if time_difference > self._play_clock.audio_diff_threshold:
print('video is slow')
return
# elif time_difference < self._play_clock.audio_diff_threshold:
image = combine_frame
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if self._human_render is not None:
self._human_render.put_image(image)

36
render/voice_render.py Normal file
View File

@ -0,0 +1,36 @@
#encoding = utf8
import logging
import time
from queue import Empty
import numpy as np
from audio_render import AudioRender
from base_render import BaseRender
logger = logging.getLogger(__name__)
class VoiceRender(BaseRender):
def __init__(self, play_clock):
super().__init__(play_clock)
self._audio_render = AudioRender()
def __run_step(self):
try:
audio_frames, ps = self._queue.get(block=True, timeout=0.01)
except Empty:
return
self._play_clock.update_display_time()
self._play_clock.current_time = ps
for audio_frame in audio_frames:
frame, type_ = audio_frame
frame = (frame * 32767).astype(np.int16)
if self._audio_render is not None:
try:
self._audio_render.write(frame.tobytes(), int(frame.shape[0] * 2))
except Exception as e:
logging.error(f'Error writing audio frame: {e}')

1
ui.py
View File

@ -61,7 +61,6 @@ class App(customtkinter.CTk):
self._init_image_canvas() self._init_image_canvas()
self._audio_render = AudioRender()
# self._human = Human() # self._human = Human()
self._queue = Queue() self._queue = Queue()
self._human_context = HumanContext() self._human_context = HumanContext()