modify render

This commit is contained in:
brige 2024-10-23 17:44:33 +08:00
parent 7e4550717f
commit b659e22708
9 changed files with 66 additions and 42 deletions

View File

@ -2,6 +2,7 @@
import logging import logging
import os import os
import sys import sys
import time
try: try:
import sounddevice as sd import sounddevice as sd
@ -58,23 +59,28 @@ class SherpaNcnnAsr(AsrBase):
segment_id = 0 segment_id = 0
last_result = "" last_result = ""
logger.info(f'_recognize_loop') logger.info(f'_recognize_loop')
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
while not self._stop_event.is_set(): while not self._stop_event.is_set():
samples, _ = s.read(self._samples_per_read) # a blocking read self._notify_complete('中国人民万岁')
samples = samples.reshape(-1)
self._recognizer.accept_waveform(self._sample_rate, samples)
is_endpoint = self._recognizer.is_endpoint
result = self._recognizer.text
if result and (last_result != result):
last_result = result
print("\r{}:{}".format(segment_id, result), end=".", flush=True)
self._notify_process(result)
if is_endpoint:
if result:
print("\r{}:{}".format(segment_id, result), flush=True)
self._notify_complete(result)
segment_id += 1 segment_id += 1
self._recognizer.reset() time.sleep(10)
#
# with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
# while not self._stop_event.is_set():
# samples, _ = s.read(self._samples_per_read) # a blocking read
# samples = samples.reshape(-1)
# self._recognizer.accept_waveform(self._sample_rate, samples)
#
# is_endpoint = self._recognizer.is_endpoint
#
# result = self._recognizer.text
# if result and (last_result != result):
# last_result = result
# print("\r{}:{}".format(segment_id, result), end=".", flush=True)
# self._notify_process(result)
#
# if is_endpoint:
# if result:
# print("\r{}:{}".format(segment_id, result), flush=True)
# self._notify_complete(result)
# segment_id += 1
# self._recognizer.reset()

View File

@ -37,7 +37,7 @@ class HumanRender(AudioHandler):
self._voice_render.put(audio_frames) self._voice_render.put(audio_frames)
type_ = 1 type_ = 1
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0: if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
type_ = 1 type_ = 0
self._video_render.put((res_frame, idx, type_)) self._video_render.put((res_frame, idx, type_))
def stop(self): def stop(self):

View File

@ -44,7 +44,7 @@ class DouBao(NLPBase):
sec = '' sec = ''
async for completion in stream: async for completion in stream:
sec = sec + completion.choices[0].delta.content sec = sec + completion.choices[0].delta.content
print(sec) # print(sec)
sec, message = self._split_handle.handle(sec) sec, message = self._split_handle.handle(sec)
if len(message) > 0: if len(message) > 0:
self._on_callback(message) self._on_callback(message)

View File

@ -9,8 +9,9 @@ logger = logging.getLogger(__name__)
class BaseRender(ABC): class BaseRender(ABC):
def __init__(self, play_clock): def __init__(self, play_clock, delay=0.02):
self._play_clock = play_clock self._play_clock = play_clock
self._delay = delay
self._queue = Queue() self._queue = Queue()
self._exit_event = Event() self._exit_event = Event()
self._thread = Thread(target=self._on_run) self._thread = Thread(target=self._on_run)
@ -20,13 +21,13 @@ class BaseRender(ABC):
def _on_run(self): def _on_run(self):
logging.info('Audio render run') logging.info('Audio render run')
while self._exit_event.is_set(): while self._exit_event.is_set():
self.__run_step() self._run_step()
time.sleep(0.02) time.sleep(self._delay)
logging.info('Audio render exit') logging.info('Audio render exit')
def put(self, frame): def put(self, frame):
ps = time.time() - self._play_clock.start_time() ps = time.time() - self._play_clock.start_time
self._queue.put_nowait((frame, ps)) self._queue.put_nowait((frame, ps))
def stop(self): def stop(self):
@ -35,7 +36,7 @@ class BaseRender(ABC):
self._thread.join() self._thread.join()
@abstractmethod @abstractmethod
def __run_step(self): def _run_step(self):
pass pass

View File

@ -6,7 +6,7 @@ class PlayClock:
def __init__(self): def __init__(self):
self._start = time.time() self._start = time.time()
self._current_time = 0 self._current_time = 0
self._display_time = 0 self._display_time = self._start
self._audio_diff_threshold = 0.01 self._audio_diff_threshold = 0.01
@property @property

View File

@ -1,44 +1,61 @@
#encoding = utf8 #encoding = utf8
import copy import copy
import time
from queue import Empty from queue import Empty
import cv2 import cv2
import numpy as np import numpy as np
from base_render import BaseRender from .base_render import BaseRender
class VideoRender(BaseRender): class VideoRender(BaseRender):
def __init__(self, play_clock, context, human_render): def __init__(self, play_clock, context, human_render):
super().__init__(play_clock) super().__init__(play_clock, 0.02)
self._context = context self._context = context
self._human_render = human_render self._human_render = human_render
def __run_step(self): def _run_step(self):
try: try:
res_frame, idx, type_, ps = self._queue.get(block=True, timeout=0.01) frame, ps = self._queue.get(block=True, timeout=0.01)
res_frame, idx, type_ = frame
print('video render queue size', self._queue.qsize())
except Empty: except Empty:
return return
if type_ != 0: if type_ == 0:
combine_frame = self._context.frame_list_cycle[idx] combine_frame = self._context.frame_list_cycle[idx]
else: else:
print('get face', self._queue.qsize())
bbox = self._context.coord_list_cycle[idx] bbox = self._context.coord_list_cycle[idx]
combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx]) combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
y1, y2, x1, x2 = bbox y1, y2, x1, x2 = bbox
try: try:
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1)) res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
except: except:
print('resize error')
return return
# combine_frame = get_image(ori_frame,res_frame,bbox) # combine_frame = get_image(ori_frame,res_frame,bbox)
# t=time.perf_counter() # t=time.perf_counter()
combine_frame[y1:y2, x1:x2] = res_frame combine_frame[y1:y2, x1:x2] = res_frame
clock_time = self._play_clock.clock_time() clock_time = self._play_clock.clock_time()
time_difference = abs(clock_time - ps) time_difference = clock_time - ps
if time_difference > self._play_clock.audio_diff_threshold:
print('video is slow') print('video render:', ps, ' ', clock_time, ' ', time_difference)
return if time_difference < -0.01: # 音频比视频快超过10ms
sleep_time = abs(time_difference + 0.01)
print("Video frame waiting to catch up with audio", sleep_time)
if sleep_time > 0:
time.sleep(sleep_time) # 只在正值时调用 sleep
return # 继续等待
elif time_difference < -0.01: # 视频比音频快超过10ms
print("Video frame dropped to catch up with audio")
return # 丢帧
# if time_difference > self._play_clock.audio_diff_threshold:
# # print('video is slow')
# return
# elif time_difference < self._play_clock.audio_diff_threshold: # elif time_difference < self._play_clock.audio_diff_threshold:
image = combine_frame image = combine_frame

View File

@ -6,7 +6,7 @@ from queue import Empty
import numpy as np import numpy as np
from audio_render import AudioRender from audio_render import AudioRender
from base_render import BaseRender from .base_render import BaseRender
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -16,9 +16,10 @@ class VoiceRender(BaseRender):
super().__init__(play_clock) super().__init__(play_clock)
self._audio_render = AudioRender() self._audio_render = AudioRender()
def __run_step(self): def _run_step(self):
try: try:
audio_frames, ps = self._queue.get(block=True, timeout=0.01) audio_frames, ps = self._queue.get(block=True, timeout=0.01)
print('voice render queue size', self._queue.qsize())
except Empty: except Empty:
return return

1
ui.py
View File

@ -67,7 +67,6 @@ class App(customtkinter.CTk):
self._human_context.build() self._human_context.build()
render = self._human_context.render_handler render = self._human_context.render_handler
render.set_image_render(self) render.set_image_render(self)
render.set_audio_render(self._audio_render)
self._render() self._render()
# self.play_audio() # self.play_audio()

View File

@ -37,7 +37,7 @@ def read_files_path(path):
file_paths = [] file_paths = []
files = os.listdir(path) files = os.listdir(path)
for file in files: for file in files:
if not os.path.isdir(file): if not os.path.isdir(file) and file.endswith('.png') or file.endswith('.jpg'):
file_paths.append(os.path.join(path, file)) file_paths.append(os.path.join(path, file))
return file_paths return file_paths
@ -177,7 +177,7 @@ def load_avatar(path, img_size, device):
return full_list_cycle, face_frames, coord_frames return full_list_cycle, face_frames, coord_frames
def config_logging(file_name: str, console_level: int=logging.INFO, file_level: int=logging.DEBUG): def config_logging(file_name: str, console_level: int = logging.INFO, file_level: int = logging.DEBUG):
file_handler = logging.FileHandler(file_name, mode='a', encoding="utf8") file_handler = logging.FileHandler(file_name, mode='a', encoding="utf8")
file_handler.setFormatter(logging.Formatter( file_handler.setFormatter(logging.Formatter(
'%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s' '%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s'