Compare commits

...

5 Commits

Author SHA1 Message Date
e3c2a79ce7 add background image 2024-11-05 22:22:40 +08:00
1e1967ef93 modify audio handler 2024-11-05 22:06:02 +08:00
4998d0b422 modify code 2024-11-05 19:40:03 +08:00
a92789c3b5 use pygame to render 2024-11-04 21:44:51 +08:00
4495f5182b modify audio and video in same queue 2024-11-04 13:40:05 +08:00
20 changed files with 244 additions and 178 deletions

View File

@ -13,6 +13,7 @@ class AsrBase:
self._observers = []
self._stop_event = threading.Event()
self._stop_event.set()
self._thread = threading.Thread(target=self._recognize_loop)
self._thread.start()
@ -28,7 +29,7 @@ class AsrBase:
observer.completed(message)
def stop(self):
self._stop_event.set()
self._stop_event.clear()
self._thread.join()
def attach(self, observer: AsrObserver):

View File

@ -60,10 +60,14 @@ class SherpaNcnnAsr(AsrBase):
time.sleep(3)
last_result = ""
logger.info(f'_recognize_loop')
while not self._stop_event.is_set():
while self._stop_event.is_set():
logger.info(f'_recognize_loop000')
self._notify_complete('介绍中国5000年历史文学')
logger.info(f'_recognize_loop111')
segment_id += 1
time.sleep(60)
time.sleep(50)
logger.info(f'_recognize_loop222')
logger.info(f'_recognize_loop exit')
'''
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
while not self._stop_event.is_set():

Binary file not shown.

After

Width:  |  Height:  |  Size: 166 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 452 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 258 KiB

View File

@ -133,6 +133,6 @@ class AudioInferenceHandler(AudioHandler):
self._run_thread.join()
def pause_talk(self):
print('AudioInferenceHandler pause_talk', self._audio_queue.size(), self._mal_queue.qsize())
print('AudioInferenceHandler pause_talk', self._audio_queue.size(), self._mal_queue.size())
self._audio_queue.clear()
self._mal_queue.queue.clear()
self._mal_queue.clear()

View File

@ -80,7 +80,7 @@ class AudioMalHandler(AudioHandler):
def get_audio_frame(self):
try:
# print('AudioMalHandler get_audio_frame')
frame = self._queue.get(timeout=0.02)
frame = self._queue.get()
type_ = 0
except queue.Empty:
frame = np.zeros(self.chunk, dtype=np.float32)

View File

@ -46,13 +46,6 @@ class HumanContext:
def __del__(self):
print(f'HumanContext: __del__')
object_stop(self._asr)
object_stop(self._nlp)
object_stop(self._tts)
object_stop(self._tts_handle)
object_stop(self._mal_handler)
object_stop(self._infer_handler)
object_stop(self._render_handler)
@property
def fps(self):
@ -119,6 +112,15 @@ class HumanContext:
self._asr = SherpaNcnnAsr()
self._asr.attach(self._nlp)
def stop(self):
object_stop(self._asr)
object_stop(self._nlp)
object_stop(self._tts)
object_stop(self._tts_handle)
object_stop(self._mal_handler)
object_stop(self._infer_handler)
object_stop(self._render_handler)
def pause_talk(self):
self._nlp.pause_talk()
self._tts.pause_talk()

View File

@ -1,10 +1,14 @@
#encoding = utf8
import logging
import time
from queue import Empty
from threading import Event, Thread
from human.message_type import MessageType
from human_handler import AudioHandler
from render import VoiceRender, VideoRender, PlayClock
from utils import SyncQueue
logger = logging.getLogger(__name__)
@ -16,10 +20,44 @@ class HumanRender(AudioHandler):
play_clock = PlayClock()
self._voice_render = VoiceRender(play_clock, context)
self._video_render = VideoRender(play_clock, context, self)
self._queue = SyncQueue(context.batch_size, "HumanRender_queue")
self._exit_event = Event()
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
self._exit_event.set()
self._thread.start()
self._image_render = None
self._last_audio_ps = 0
self._last_video_ps = 0
def _on_run(self):
logging.info('human render run')
while self._exit_event.is_set():
self._run_step()
time.sleep(0.038)
logging.info('human render exit')
def _run_step(self):
try:
value = self._queue.get()
if value is None:
return
res_frame, idx, audio_frames = value
# print('voice render queue size', self._queue.size())
except Empty:
print('render queue.Empty:')
return
type_ = 1
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
type_ = 0
if self._voice_render is not None:
self._voice_render.render(audio_frames, self._last_audio_ps)
self._last_audio_ps = self._last_audio_ps + 0.4
if self._video_render is not None:
self._video_render.render((res_frame, idx, type_), self._last_video_ps)
self._last_video_ps = self._last_video_ps + 0.4
def set_image_render(self, render):
self._image_render = render
@ -31,28 +69,38 @@ class HumanRender(AudioHandler):
super().on_message(message)
def on_handle(self, stream, index):
res_frame, idx, audio_frames = stream
self._voice_render.put(audio_frames, self._last_audio_ps)
self._last_audio_ps = self._last_audio_ps + 0.4
type_ = 1
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
type_ = 0
self._video_render.put((res_frame, idx, type_), self._last_video_ps)
self._last_video_ps = self._last_video_ps + 0.4
self._queue.put(stream)
# res_frame, idx, audio_frames = stream
# self._voice_render.put(audio_frames, self._last_audio_ps)
# self._last_audio_ps = self._last_audio_ps + 0.4
# type_ = 1
# if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
# type_ = 0
# self._video_render.put((res_frame, idx, type_), self._last_video_ps)
# self._last_video_ps = self._last_video_ps + 0.4
#
# if self._voice_render.is_full():
# self._context.notify({'msg_id': MessageType.Video_Render_Queue_Full})
if self._voice_render.is_full():
self._context.notify({'msg_id': MessageType.Video_Render_Queue_Full})
def get_audio_queue_size(self):
return self._voice_render.size()
# def get_audio_queue_size(self):
# return self._voice_render.size()
def pause_talk(self):
self._voice_render.pause_talk()
self._video_render.pause_talk()
pass
# self._voice_render.pause_talk()
# self._video_render.pause_talk()
def stop(self):
self._voice_render.stop()
self._video_render.stop()
logging.info('hunan render stop')
if self._exit_event is None:
return
self._exit_event.clear()
if self._thread.is_alive():
self._thread.join()
logging.info('hunan render stop')
# self._voice_render.stop()
# self._video_render.stop()
# self._exit_event.clear()
# self._thread.join()
'''

View File

@ -18,13 +18,6 @@ class AudioHandler(ABC):
if self._handler is not None:
self._handler.on_message(message)
# @abstractmethod
# def pause_handle(self):
# if self._handler is not None:
# self._handler.pause_handle()
# else:
# logging.info(f'_handler is None')
@abstractmethod
def stop(self):
pass

View File

@ -13,6 +13,7 @@ class NLPBase(AsrObserver):
self._context = context
self._split_handle = split
self._callback = callback
self._is_running = False
@property
def callback(self):
@ -23,7 +24,7 @@ class NLPBase(AsrObserver):
self._callback = value
def _on_callback(self, txt: str):
if self._callback is not None:
if self._callback is not None and self._is_running:
self._callback.on_message(txt)
async def _request(self, question):
@ -42,12 +43,18 @@ class NLPBase(AsrObserver):
def ask(self, question):
logger.info(f'ask:{question}')
self._is_running = True
self._ask_queue.add_task(self._request, question)
logger.info(f'ask:{question} completed')
def stop(self):
logger.info('NLPBase stop')
self._is_running = False
self._ask_queue.add_task(self._on_close)
self._ask_queue.stop()
def pause_talk(self):
logger.info('NLPBase pause_talk')
self._is_running = False
self._ask_queue.clear()

View File

@ -11,41 +11,15 @@ logger = logging.getLogger(__name__)
class BaseRender(ABC):
def __init__(self, play_clock, context, type_, delay=0.02, thread_name="BaseRenderThread"):
def __init__(self, play_clock, context, type_):
self._play_clock = play_clock
self._context = context
self._type = type_
self._delay = delay
self._queue = SyncQueue(context.batch_size, f'{type_}RenderQueue')
self._exit_event = Event()
self._thread = Thread(target=self._on_run, name=thread_name)
self._exit_event.set()
self._thread.start()
def _on_run(self):
logging.info(f'{self._type} render run')
while self._exit_event.is_set():
self._run_step()
time.sleep(self._delay)
logging.info(f'{self._type} render exit')
def put(self, frame, ps):
self._queue.put((frame, ps))
def size(self):
return self._queue.size()
def pause_talk(self):
self._queue.clear()
def stop(self):
self._queue.clear()
self._exit_event.clear()
self._thread.join()
# self._queue = SyncQueue(context.batch_size, f'{type_}RenderQueue')
# self._exit_event = Event()
# self._thread = Thread(target=self._on_run, name=thread_name)
# self._exit_event.set()
# self._thread.start()
@abstractmethod
def _run_step(self):
def render(self, frame, ps):
pass

View File

@ -13,60 +13,45 @@ from human.message_type import MessageType
class VideoRender(BaseRender):
def __init__(self, play_clock, context, human_render):
super().__init__(play_clock, context, 'Video', 0.038, "VideoRenderThread")
super().__init__(play_clock, context, 'Video')
self._human_render = human_render
self._diff_avg_count = 0
def _run_step(self):
while self._exit_event.is_set():
def render(self, frame, ps):
res_frame, idx, type_ = frame
clock_time = self._play_clock.clock_time()
time_difference = clock_time - ps
if abs(time_difference) > self._play_clock.audio_diff_threshold:
if self._diff_avg_count < 5:
self._diff_avg_count += 1
else:
if time_difference < -self._play_clock.audio_diff_threshold:
sleep_time = abs(time_difference)
print("Video frame waiting to catch up with audio", sleep_time)
if sleep_time <= 1.0:
time.sleep(sleep_time)
# elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms
# print("Video frame dropped to catch up with audio")
# continue
else:
self._diff_avg_count = 0
if type_ == 0:
combine_frame = self._context.frame_list_cycle[idx]
else:
bbox = self._context.coord_list_cycle[idx]
combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
y1, y2, x1, x2 = bbox
try:
value = self._queue.get()
if value is None:
return
frame, ps = value
res_frame, idx, type_ = frame
except Empty:
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
except:
print('resize error')
return
combine_frame[y1:y2, x1:x2, :3] = res_frame
clock_time = self._play_clock.clock_time()
time_difference = clock_time - ps
print("Video frame time", clock_time, ps, time_difference)
if abs(time_difference) > self._play_clock.audio_diff_threshold:
if self._diff_avg_count < 5:
self._diff_avg_count += 1
else:
if time_difference < -self._play_clock.audio_diff_threshold:
sleep_time = abs(time_difference)
print("Video frame waiting to catch up with audio", sleep_time)
if sleep_time <= 1.0:
time.sleep(sleep_time)
# elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms
# print("Video frame dropped to catch up with audio")
# continue
else:
self._diff_avg_count = 0
print('video render:',
'get face', self._queue.size(),
'audio queue', self._human_render.get_audio_queue_size())
if type_ == 0:
combine_frame = self._context.frame_list_cycle[idx]
else:
bbox = self._context.coord_list_cycle[idx]
combine_frame = copy.deepcopy(self._context.frame_list_cycle[idx])
y1, y2, x1, x2 = bbox
try:
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
except:
print('resize error')
return
combine_frame[y1:y2, x1:x2, :3] = res_frame
image = combine_frame
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if self._human_render is not None:
self._human_render.put_image(image)
return
image = combine_frame
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if self._human_render is not None:
self._human_render.put_image(image)

View File

@ -15,41 +15,13 @@ logger = logging.getLogger(__name__)
class VoiceRender(BaseRender):
def __init__(self, play_clock, context):
self._audio_render = AudioRender()
self._is_empty = True
super().__init__(play_clock, context, 'Voice', 0.03, "VoiceRenderThread")
def is_full(self):
return self._queue.size() >= self._context.render_batch * 2
def _run_step(self):
try:
value = self._queue.get()
if value is None:
return
audio_frames, ps = value
# print('voice render queue size', self._queue.size())
except Empty:
self._context.notify({'msg_id': MessageType.Video_Render_Queue_Empty})
if not self._is_empty:
print('voice render queue empty')
self._is_empty = True
return
if self._is_empty:
print('voice render queue not empty')
self._is_empty = False
status = MessageType.Video_Render_Queue_Not_Empty
if self._queue.size() < self._context.render_batch:
status = MessageType.Video_Render_Queue_Empty
elif self._queue.size() >= self._context.render_batch * 2:
status = MessageType.Video_Render_Queue_Full
self._context.notify({'msg_id': status})
super().__init__(play_clock, context, 'Voice')
def render(self, frame, ps):
self._play_clock.update_display_time()
self._play_clock.current_time = ps
for audio_frame in audio_frames:
for audio_frame in frame:
frame, type_ = audio_frame
frame = (frame * 32767).astype(np.int16)

View File

@ -13,6 +13,7 @@ class TTSBase(NLPCallback):
def __init__(self, handle):
self._handle = handle
self._message_queue = AsyncTaskQueue(5)
self._is_running = False
@property
def handle(self):
@ -30,7 +31,10 @@ class TTSBase(NLPCallback):
print(f'-------stream is None')
return
print(f'-------tts time:{time.time() - t:.4f}s')
await self._on_handle(stream, index)
if self._handle is not None and self._is_running:
await self._on_handle(stream, index)
else:
logger.info(f'handle is None, running:{self._is_running}')
async def _on_request(self, text: str):
pass
@ -45,6 +49,7 @@ class TTSBase(NLPCallback):
self.message(txt)
def message(self, txt):
self._is_running = True
txt = txt.strip()
if len(txt) == 0:
logger.info(f'message is empty')
@ -61,4 +66,6 @@ class TTSBase(NLPCallback):
self._message_queue.stop()
def pause_talk(self):
logger.info(f'TTSBase pause_talk')
self._is_running = False
self._message_queue.clear()

14
ui.py
View File

@ -57,7 +57,7 @@ class App(customtkinter.CTk):
# self.main_button_1.grid(row=2, column=2, padx=(20, 20), pady=(20, 20), sticky="nsew")
background = os.path.join(current_file_path, 'data', 'background', 'background.webp')
logger.info(f'background: {background}')
self._background = read_image(background).convert("RGBA")
self._background = read_image(background).convert("RGB")
self._init_image_canvas()
@ -105,13 +105,13 @@ class App(customtkinter.CTk):
image = cv2.resize(image, (int(iwidth * height / iheight), int(height)), interpolation=cv2.INTER_AREA)
img = Image.fromarray(image)
bg_width, bg_height = self._background.size
fg_width, fg_height = img.size
x = (bg_width - fg_width) // 2
y = (bg_height - fg_height) // 2
self._background.paste(img, (x, y), img)
# bg_width, bg_height = self._background.size
# fg_width, fg_height = img.size
# x = (bg_width - fg_width) // 2
# y = (bg_height - fg_height) // 2
# self._background.paste(img, (x, y))
imgtk = ImageTk.PhotoImage(self._background)
imgtk = ImageTk.PhotoImage(img)
self._canvas.delete("all")

3
ui/__init__.py Normal file
View File

@ -0,0 +1,3 @@
#encoding = utf8

76
ui/pygame_ui.py Normal file
View File

@ -0,0 +1,76 @@
#encoding = utf8
import logging
import os
from queue import Queue
import pygame
from pygame.locals import *
from human import HumanContext
from utils import config_logging
logger = logging.getLogger(__name__)
current_file_path = os.path.dirname(os.path.abspath(__file__))
class PyGameUI:
def __init__(self):
self._human_context = None
self._queue = None
self.screen_ = pygame.display.set_mode((800, 600), HWSURFACE | DOUBLEBUF | RESIZABLE)
self.clock = pygame.time.Clock()
background = os.path.join(current_file_path, '..', 'data', 'background', 'background.jpg')
logger.info(f'background: {background}')
self._background = pygame.image.load(background).convert()
self.background_display_ = pygame.transform.scale(self._background, (800, 600))
self._human_image = None
self.running = True
def start(self):
self._queue = Queue()
self._human_context = HumanContext()
self._human_context.build()
render = self._human_context.render_handler
render.set_image_render(self)
def run(self):
self.start()
while self.running:
self.clock.tick(60)
for event in pygame.event.get():
if event.type == pygame.QUIT:
self.running = False
elif event.type == VIDEORESIZE:
self.background_display_ = pygame.transform.scale(self._background, event.dict['size'])
self.screen_.blit(self.background_display_, (0, 0))
self._update_human()
if self._human_image is not None:
self.screen_.blit(self._human_image, (0, 0))
pygame.display.flip()
self.stop()
pygame.quit()
def _update_human(self):
if self._queue.empty():
return
image = self._queue.get()
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], "RGB")
def stop(self):
logger.info('stop')
if self._human_context is not None:
self._human_context.pause_talk()
self._human_context.stop()
def on_render(self, image):
self._queue.put(image)
if __name__ == '__main__':
config_logging('../logs/info.log', logging.INFO, logging.INFO)
logger.info('------------start------------')
ui = PyGameUI()
ui.run()
logger.info('------------finish------------')

View File

@ -9,7 +9,6 @@ class AsyncTaskQueue:
self._queue = asyncio.Queue()
self._worker_num = work_num
self._current_worker_num = work_num
self._condition = threading.Condition()
self._thread = threading.Thread(target=self._run_loop)
self._thread.start()
self.__loop = None
@ -27,35 +26,31 @@ class AsyncTaskQueue:
async def _worker(self):
print('_worker')
while True:
with self._condition:
self._condition.wait_for(lambda: not self._queue.empty())
task = await self._queue.get()
func, *args = task # 解包任务
if func is None: # None 作为结束信号
break
task = await self._queue.get()
if task is None: # None as a stop signal
break
print(f"Executing task with args: {args}")
await func(*args) # 执行异步函数
self._queue.task_done()
func, *args = task # Unpack task
print(f"Executing task with args: {args}")
await func(*args) # Execute async function
self._queue.task_done()
print('_worker finish')
self._current_worker_num = self._current_worker_num - 1
self._current_worker_num -= 1
if self._current_worker_num == 0:
print('loop stop')
self.__loop.stop()
def add_task(self, func, *args):
with self._condition:
self._queue.put_nowait((func, *args))
self._condition.notify()
self.__loop.call_soon_threadsafe(self._queue.put_nowait, (func, *args))
def stop_workers(self):
for _ in range(self._worker_num):
self.add_task(None) # 发送结束信号
self.add_task(None) # Send stop signal
def clear(self):
while not self._queue.empty():
self._queue.get()
self._queue.get_nowait()
self._queue.task_done()
def stop(self):

View File

@ -36,7 +36,6 @@ def read_images(img_list):
print(f'read image path:{img_path}')
# frame = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
frame = Image.open(img_path)
# frame = frame.convert("RGBA")
frame = np.array(frame)
frames.append(frame)
return frames