modify render
This commit is contained in:
parent
7e4550717f
commit
b659e22708
@ -2,6 +2,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
||||||
@ -58,23 +59,28 @@ class SherpaNcnnAsr(AsrBase):
|
|||||||
segment_id = 0
|
segment_id = 0
|
||||||
last_result = ""
|
last_result = ""
|
||||||
logger.info(f'_recognize_loop')
|
logger.info(f'_recognize_loop')
|
||||||
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
|
|
||||||
while not self._stop_event.is_set():
|
while not self._stop_event.is_set():
|
||||||
samples, _ = s.read(self._samples_per_read) # a blocking read
|
self._notify_complete('中国人民万岁')
|
||||||
samples = samples.reshape(-1)
|
|
||||||
self._recognizer.accept_waveform(self._sample_rate, samples)
|
|
||||||
|
|
||||||
is_endpoint = self._recognizer.is_endpoint
|
|
||||||
|
|
||||||
result = self._recognizer.text
|
|
||||||
if result and (last_result != result):
|
|
||||||
last_result = result
|
|
||||||
print("\r{}:{}".format(segment_id, result), end=".", flush=True)
|
|
||||||
self._notify_process(result)
|
|
||||||
|
|
||||||
if is_endpoint:
|
|
||||||
if result:
|
|
||||||
print("\r{}:{}".format(segment_id, result), flush=True)
|
|
||||||
self._notify_complete(result)
|
|
||||||
segment_id += 1
|
segment_id += 1
|
||||||
self._recognizer.reset()
|
time.sleep(10)
|
||||||
|
#
|
||||||
|
# with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
|
||||||
|
# while not self._stop_event.is_set():
|
||||||
|
# samples, _ = s.read(self._samples_per_read) # a blocking read
|
||||||
|
# samples = samples.reshape(-1)
|
||||||
|
# self._recognizer.accept_waveform(self._sample_rate, samples)
|
||||||
|
#
|
||||||
|
# is_endpoint = self._recognizer.is_endpoint
|
||||||
|
#
|
||||||
|
# result = self._recognizer.text
|
||||||
|
# if result and (last_result != result):
|
||||||
|
# last_result = result
|
||||||
|
# print("\r{}:{}".format(segment_id, result), end=".", flush=True)
|
||||||
|
# self._notify_process(result)
|
||||||
|
#
|
||||||
|
# if is_endpoint:
|
||||||
|
# if result:
|
||||||
|
# print("\r{}:{}".format(segment_id, result), flush=True)
|
||||||
|
# self._notify_complete(result)
|
||||||
|
# segment_id += 1
|
||||||
|
# self._recognizer.reset()
|
||||||
|
@ -37,7 +37,7 @@ class HumanRender(AudioHandler):
|
|||||||
self._voice_render.put(audio_frames)
|
self._voice_render.put(audio_frames)
|
||||||
type_ = 1
|
type_ = 1
|
||||||
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
|
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
|
||||||
type_ = 1
|
type_ = 0
|
||||||
self._video_render.put((res_frame, idx, type_))
|
self._video_render.put((res_frame, idx, type_))
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
@ -44,7 +44,7 @@ class DouBao(NLPBase):
|
|||||||
sec = ''
|
sec = ''
|
||||||
async for completion in stream:
|
async for completion in stream:
|
||||||
sec = sec + completion.choices[0].delta.content
|
sec = sec + completion.choices[0].delta.content
|
||||||
print(sec)
|
# print(sec)
|
||||||
sec, message = self._split_handle.handle(sec)
|
sec, message = self._split_handle.handle(sec)
|
||||||
if len(message) > 0:
|
if len(message) > 0:
|
||||||
self._on_callback(message)
|
self._on_callback(message)
|
||||||
|
@ -9,8 +9,9 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class BaseRender(ABC):
|
class BaseRender(ABC):
|
||||||
def __init__(self, play_clock):
|
def __init__(self, play_clock, delay=0.02):
|
||||||
self._play_clock = play_clock
|
self._play_clock = play_clock
|
||||||
|
self._delay = delay
|
||||||
self._queue = Queue()
|
self._queue = Queue()
|
||||||
self._exit_event = Event()
|
self._exit_event = Event()
|
||||||
self._thread = Thread(target=self._on_run)
|
self._thread = Thread(target=self._on_run)
|
||||||
@ -20,13 +21,13 @@ class BaseRender(ABC):
|
|||||||
def _on_run(self):
|
def _on_run(self):
|
||||||
logging.info('Audio render run')
|
logging.info('Audio render run')
|
||||||
while self._exit_event.is_set():
|
while self._exit_event.is_set():
|
||||||
self.__run_step()
|
self._run_step()
|
||||||
time.sleep(0.02)
|
time.sleep(self._delay)
|
||||||
|
|
||||||
logging.info('Audio render exit')
|
logging.info('Audio render exit')
|
||||||
|
|
||||||
def put(self, frame):
|
def put(self, frame):
|
||||||
ps = time.time() - self._play_clock.start_time()
|
ps = time.time() - self._play_clock.start_time
|
||||||
self._queue.put_nowait((frame, ps))
|
self._queue.put_nowait((frame, ps))
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
@ -35,7 +36,7 @@ class BaseRender(ABC):
|
|||||||
self._thread.join()
|
self._thread.join()
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def __run_step(self):
|
def _run_step(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ class PlayClock:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._start = time.time()
|
self._start = time.time()
|
||||||
self._current_time = 0
|
self._current_time = 0
|
||||||
self._display_time = 0
|
self._display_time = self._start
|
||||||
self._audio_diff_threshold = 0.01
|
self._audio_diff_threshold = 0.01
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -1,44 +1,61 @@
|
|||||||
#encoding = utf8
|
#encoding = utf8
|
||||||
import copy
|
import copy
|
||||||
|
import time
|
||||||
from queue import Empty
|
from queue import Empty
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from base_render import BaseRender
|
from .base_render import BaseRender
|
||||||
|
|
||||||
|
|
||||||
class VideoRender(BaseRender):
|
class VideoRender(BaseRender):
|
||||||
def __init__(self, play_clock, context, human_render):
|
def __init__(self, play_clock, context, human_render):
|
||||||
super().__init__(play_clock)
|
super().__init__(play_clock, 0.02)
|
||||||
self._context = context
|
self._context = context
|
||||||
self._human_render = human_render
|
self._human_render = human_render
|
||||||
|
|
||||||
def __run_step(self):
|
def _run_step(self):
|
||||||
try:
|
try:
|
||||||
res_frame, idx, type_, ps = self._queue.get(block=True, timeout=0.01)
|
frame, ps = self._queue.get(block=True, timeout=0.01)
|
||||||
|
res_frame, idx, type_ = frame
|
||||||
|
print('video render queue size', self._queue.qsize())
|
||||||
except Empty:
|
except Empty:
|
||||||
return
|
return
|
||||||
|
|
||||||
if type_ != 0:
|
if type_ == 0:
|
||||||
combine_frame = self._context.frame_list_cycle[idx]
|
combine_frame = self._context.frame_list_cycle[idx]
|
||||||
else:
|
else:
|
||||||
|
print('get face', self._queue.qsize())
|
||||||
bbox = self._context.coord_list_cycle[idx]
|
bbox = self._context.coord_list_cycle[idx]
|
||||||
combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
|
combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
|
||||||
y1, y2, x1, x2 = bbox
|
y1, y2, x1, x2 = bbox
|
||||||
try:
|
try:
|
||||||
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
|
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
|
||||||
except:
|
except:
|
||||||
|
print('resize error')
|
||||||
return
|
return
|
||||||
# combine_frame = get_image(ori_frame,res_frame,bbox)
|
# combine_frame = get_image(ori_frame,res_frame,bbox)
|
||||||
# t=time.perf_counter()
|
# t=time.perf_counter()
|
||||||
combine_frame[y1:y2, x1:x2] = res_frame
|
combine_frame[y1:y2, x1:x2] = res_frame
|
||||||
|
|
||||||
clock_time = self._play_clock.clock_time()
|
clock_time = self._play_clock.clock_time()
|
||||||
time_difference = abs(clock_time - ps)
|
time_difference = clock_time - ps
|
||||||
if time_difference > self._play_clock.audio_diff_threshold:
|
|
||||||
print('video is slow')
|
print('video render:', ps, ' ', clock_time, ' ', time_difference)
|
||||||
return
|
if time_difference < -0.01: # 音频比视频快超过10ms
|
||||||
|
sleep_time = abs(time_difference + 0.01)
|
||||||
|
print("Video frame waiting to catch up with audio", sleep_time)
|
||||||
|
if sleep_time > 0:
|
||||||
|
time.sleep(sleep_time) # 只在正值时调用 sleep
|
||||||
|
return # 继续等待
|
||||||
|
|
||||||
|
elif time_difference < -0.01: # 视频比音频快超过10ms
|
||||||
|
print("Video frame dropped to catch up with audio")
|
||||||
|
return # 丢帧
|
||||||
|
# if time_difference > self._play_clock.audio_diff_threshold:
|
||||||
|
# # print('video is slow')
|
||||||
|
# return
|
||||||
# elif time_difference < self._play_clock.audio_diff_threshold:
|
# elif time_difference < self._play_clock.audio_diff_threshold:
|
||||||
|
|
||||||
image = combine_frame
|
image = combine_frame
|
||||||
|
@ -6,7 +6,7 @@ from queue import Empty
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from audio_render import AudioRender
|
from audio_render import AudioRender
|
||||||
from base_render import BaseRender
|
from .base_render import BaseRender
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -16,9 +16,10 @@ class VoiceRender(BaseRender):
|
|||||||
super().__init__(play_clock)
|
super().__init__(play_clock)
|
||||||
self._audio_render = AudioRender()
|
self._audio_render = AudioRender()
|
||||||
|
|
||||||
def __run_step(self):
|
def _run_step(self):
|
||||||
try:
|
try:
|
||||||
audio_frames, ps = self._queue.get(block=True, timeout=0.01)
|
audio_frames, ps = self._queue.get(block=True, timeout=0.01)
|
||||||
|
print('voice render queue size', self._queue.qsize())
|
||||||
except Empty:
|
except Empty:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
1
ui.py
1
ui.py
@ -67,7 +67,6 @@ class App(customtkinter.CTk):
|
|||||||
self._human_context.build()
|
self._human_context.build()
|
||||||
render = self._human_context.render_handler
|
render = self._human_context.render_handler
|
||||||
render.set_image_render(self)
|
render.set_image_render(self)
|
||||||
render.set_audio_render(self._audio_render)
|
|
||||||
self._render()
|
self._render()
|
||||||
# self.play_audio()
|
# self.play_audio()
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ def read_files_path(path):
|
|||||||
file_paths = []
|
file_paths = []
|
||||||
files = os.listdir(path)
|
files = os.listdir(path)
|
||||||
for file in files:
|
for file in files:
|
||||||
if not os.path.isdir(file):
|
if not os.path.isdir(file) and file.endswith('.png') or file.endswith('.jpg'):
|
||||||
file_paths.append(os.path.join(path, file))
|
file_paths.append(os.path.join(path, file))
|
||||||
return file_paths
|
return file_paths
|
||||||
|
|
||||||
@ -177,7 +177,7 @@ def load_avatar(path, img_size, device):
|
|||||||
return full_list_cycle, face_frames, coord_frames
|
return full_list_cycle, face_frames, coord_frames
|
||||||
|
|
||||||
|
|
||||||
def config_logging(file_name: str, console_level: int=logging.INFO, file_level: int=logging.DEBUG):
|
def config_logging(file_name: str, console_level: int = logging.INFO, file_level: int = logging.DEBUG):
|
||||||
file_handler = logging.FileHandler(file_name, mode='a', encoding="utf8")
|
file_handler = logging.FileHandler(file_name, mode='a', encoding="utf8")
|
||||||
file_handler.setFormatter(logging.Formatter(
|
file_handler.setFormatter(logging.Formatter(
|
||||||
'%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s'
|
'%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s'
|
||||||
|
Loading…
Reference in New Issue
Block a user