modify render
This commit is contained in:
parent
7e4550717f
commit
b659e22708
@ -2,6 +2,7 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
try:
|
||||
import sounddevice as sd
|
||||
@ -58,23 +59,28 @@ class SherpaNcnnAsr(AsrBase):
|
||||
segment_id = 0
|
||||
last_result = ""
|
||||
logger.info(f'_recognize_loop')
|
||||
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
|
||||
while not self._stop_event.is_set():
|
||||
samples, _ = s.read(self._samples_per_read) # a blocking read
|
||||
samples = samples.reshape(-1)
|
||||
self._recognizer.accept_waveform(self._sample_rate, samples)
|
||||
|
||||
is_endpoint = self._recognizer.is_endpoint
|
||||
|
||||
result = self._recognizer.text
|
||||
if result and (last_result != result):
|
||||
last_result = result
|
||||
print("\r{}:{}".format(segment_id, result), end=".", flush=True)
|
||||
self._notify_process(result)
|
||||
|
||||
if is_endpoint:
|
||||
if result:
|
||||
print("\r{}:{}".format(segment_id, result), flush=True)
|
||||
self._notify_complete(result)
|
||||
segment_id += 1
|
||||
self._recognizer.reset()
|
||||
while not self._stop_event.is_set():
|
||||
self._notify_complete('中国人民万岁')
|
||||
segment_id += 1
|
||||
time.sleep(10)
|
||||
#
|
||||
# with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
|
||||
# while not self._stop_event.is_set():
|
||||
# samples, _ = s.read(self._samples_per_read) # a blocking read
|
||||
# samples = samples.reshape(-1)
|
||||
# self._recognizer.accept_waveform(self._sample_rate, samples)
|
||||
#
|
||||
# is_endpoint = self._recognizer.is_endpoint
|
||||
#
|
||||
# result = self._recognizer.text
|
||||
# if result and (last_result != result):
|
||||
# last_result = result
|
||||
# print("\r{}:{}".format(segment_id, result), end=".", flush=True)
|
||||
# self._notify_process(result)
|
||||
#
|
||||
# if is_endpoint:
|
||||
# if result:
|
||||
# print("\r{}:{}".format(segment_id, result), flush=True)
|
||||
# self._notify_complete(result)
|
||||
# segment_id += 1
|
||||
# self._recognizer.reset()
|
||||
|
@ -37,7 +37,7 @@ class HumanRender(AudioHandler):
|
||||
self._voice_render.put(audio_frames)
|
||||
type_ = 1
|
||||
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
|
||||
type_ = 1
|
||||
type_ = 0
|
||||
self._video_render.put((res_frame, idx, type_))
|
||||
|
||||
def stop(self):
|
||||
|
@ -44,7 +44,7 @@ class DouBao(NLPBase):
|
||||
sec = ''
|
||||
async for completion in stream:
|
||||
sec = sec + completion.choices[0].delta.content
|
||||
print(sec)
|
||||
# print(sec)
|
||||
sec, message = self._split_handle.handle(sec)
|
||||
if len(message) > 0:
|
||||
self._on_callback(message)
|
||||
|
@ -9,8 +9,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseRender(ABC):
|
||||
def __init__(self, play_clock):
|
||||
def __init__(self, play_clock, delay=0.02):
|
||||
self._play_clock = play_clock
|
||||
self._delay = delay
|
||||
self._queue = Queue()
|
||||
self._exit_event = Event()
|
||||
self._thread = Thread(target=self._on_run)
|
||||
@ -20,13 +21,13 @@ class BaseRender(ABC):
|
||||
def _on_run(self):
|
||||
logging.info('Audio render run')
|
||||
while self._exit_event.is_set():
|
||||
self.__run_step()
|
||||
time.sleep(0.02)
|
||||
self._run_step()
|
||||
time.sleep(self._delay)
|
||||
|
||||
logging.info('Audio render exit')
|
||||
|
||||
def put(self, frame):
|
||||
ps = time.time() - self._play_clock.start_time()
|
||||
ps = time.time() - self._play_clock.start_time
|
||||
self._queue.put_nowait((frame, ps))
|
||||
|
||||
def stop(self):
|
||||
@ -35,7 +36,7 @@ class BaseRender(ABC):
|
||||
self._thread.join()
|
||||
|
||||
@abstractmethod
|
||||
def __run_step(self):
|
||||
def _run_step(self):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -6,7 +6,7 @@ class PlayClock:
|
||||
def __init__(self):
|
||||
self._start = time.time()
|
||||
self._current_time = 0
|
||||
self._display_time = 0
|
||||
self._display_time = self._start
|
||||
self._audio_diff_threshold = 0.01
|
||||
|
||||
@property
|
||||
|
@ -1,44 +1,61 @@
|
||||
#encoding = utf8
|
||||
import copy
|
||||
import time
|
||||
from queue import Empty
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from base_render import BaseRender
|
||||
from .base_render import BaseRender
|
||||
|
||||
|
||||
class VideoRender(BaseRender):
|
||||
def __init__(self, play_clock, context, human_render):
|
||||
super().__init__(play_clock)
|
||||
super().__init__(play_clock, 0.02)
|
||||
self._context = context
|
||||
self._human_render = human_render
|
||||
|
||||
def __run_step(self):
|
||||
def _run_step(self):
|
||||
try:
|
||||
res_frame, idx, type_, ps = self._queue.get(block=True, timeout=0.01)
|
||||
frame, ps = self._queue.get(block=True, timeout=0.01)
|
||||
res_frame, idx, type_ = frame
|
||||
print('video render queue size', self._queue.qsize())
|
||||
except Empty:
|
||||
return
|
||||
|
||||
if type_ != 0:
|
||||
if type_ == 0:
|
||||
combine_frame = self._context.frame_list_cycle[idx]
|
||||
else:
|
||||
print('get face', self._queue.qsize())
|
||||
bbox = self._context.coord_list_cycle[idx]
|
||||
combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
|
||||
y1, y2, x1, x2 = bbox
|
||||
try:
|
||||
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
|
||||
except:
|
||||
print('resize error')
|
||||
return
|
||||
# combine_frame = get_image(ori_frame,res_frame,bbox)
|
||||
# t=time.perf_counter()
|
||||
combine_frame[y1:y2, x1:x2] = res_frame
|
||||
|
||||
clock_time = self._play_clock.clock_time()
|
||||
time_difference = abs(clock_time - ps)
|
||||
if time_difference > self._play_clock.audio_diff_threshold:
|
||||
print('video is slow')
|
||||
return
|
||||
time_difference = clock_time - ps
|
||||
|
||||
print('video render:', ps, ' ', clock_time, ' ', time_difference)
|
||||
if time_difference < -0.01: # 音频比视频快超过10ms
|
||||
sleep_time = abs(time_difference + 0.01)
|
||||
print("Video frame waiting to catch up with audio", sleep_time)
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time) # 只在正值时调用 sleep
|
||||
return # 继续等待
|
||||
|
||||
elif time_difference < -0.01: # 视频比音频快超过10ms
|
||||
print("Video frame dropped to catch up with audio")
|
||||
return # 丢帧
|
||||
# if time_difference > self._play_clock.audio_diff_threshold:
|
||||
# # print('video is slow')
|
||||
# return
|
||||
# elif time_difference < self._play_clock.audio_diff_threshold:
|
||||
|
||||
image = combine_frame
|
||||
|
@ -6,7 +6,7 @@ from queue import Empty
|
||||
import numpy as np
|
||||
|
||||
from audio_render import AudioRender
|
||||
from base_render import BaseRender
|
||||
from .base_render import BaseRender
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -16,9 +16,10 @@ class VoiceRender(BaseRender):
|
||||
super().__init__(play_clock)
|
||||
self._audio_render = AudioRender()
|
||||
|
||||
def __run_step(self):
|
||||
def _run_step(self):
|
||||
try:
|
||||
audio_frames, ps = self._queue.get(block=True, timeout=0.01)
|
||||
print('voice render queue size', self._queue.qsize())
|
||||
except Empty:
|
||||
return
|
||||
|
||||
|
1
ui.py
1
ui.py
@ -67,7 +67,6 @@ class App(customtkinter.CTk):
|
||||
self._human_context.build()
|
||||
render = self._human_context.render_handler
|
||||
render.set_image_render(self)
|
||||
render.set_audio_render(self._audio_render)
|
||||
self._render()
|
||||
# self.play_audio()
|
||||
|
||||
|
@ -37,7 +37,7 @@ def read_files_path(path):
|
||||
file_paths = []
|
||||
files = os.listdir(path)
|
||||
for file in files:
|
||||
if not os.path.isdir(file):
|
||||
if not os.path.isdir(file) and file.endswith('.png') or file.endswith('.jpg'):
|
||||
file_paths.append(os.path.join(path, file))
|
||||
return file_paths
|
||||
|
||||
@ -177,7 +177,7 @@ def load_avatar(path, img_size, device):
|
||||
return full_list_cycle, face_frames, coord_frames
|
||||
|
||||
|
||||
def config_logging(file_name: str, console_level: int=logging.INFO, file_level: int=logging.DEBUG):
|
||||
def config_logging(file_name: str, console_level: int = logging.INFO, file_level: int = logging.DEBUG):
|
||||
file_handler = logging.FileHandler(file_name, mode='a', encoding="utf8")
|
||||
file_handler.setFormatter(logging.Formatter(
|
||||
'%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s'
|
||||
|
Loading…
Reference in New Issue
Block a user