modify render
This commit is contained in:
parent
23cab9d86b
commit
34787ae4d4
@ -5,4 +5,4 @@ from .audio_mal_handler import AudioMalHandler
|
||||
from .audio_inference_handler import AudioInferenceHandler
|
||||
from .audio_inference_onnx_handler import AudioInferenceOnnxHandler
|
||||
from .huaman_status import HumanStatusEnum, HumanStatus
|
||||
from .human_render import HumanRender
|
||||
from .human_render import HumanRender, RenderStatus
|
||||
|
@ -103,10 +103,10 @@ class AudioInferenceHandler(AudioHandler):
|
||||
for i in range(batch_size):
|
||||
if not self._is_running:
|
||||
break
|
||||
# self.on_next_handle((None, mirror_index(silence_length, index),
|
||||
self.on_next_handle((None, human_status.get_index(),
|
||||
self.on_next_handle((None, mirror_index(length, index),
|
||||
# self.on_next_handle((None, human_status.get_index(),
|
||||
audio_frames[i * 2:i * 2 + 2]), 0)
|
||||
# index = index + 1
|
||||
index = index + 1
|
||||
else:
|
||||
human_status.start_talking()
|
||||
logger.info(f'infer======= {current_text}')
|
||||
@ -116,9 +116,9 @@ class AudioInferenceHandler(AudioHandler):
|
||||
index_list = []
|
||||
# for i in range(batch_size):
|
||||
for i in range(len(mel_batch)):
|
||||
# idx = mirror_index(length, index + i)
|
||||
idx = human_status.get_index()
|
||||
index_list.append(idx)
|
||||
idx = mirror_index(length, index + i)
|
||||
# idx = human_status.get_index()
|
||||
# index_list.append(idx)
|
||||
face = face_list_cycle[idx]
|
||||
img_batch.append(face)
|
||||
|
||||
@ -152,9 +152,10 @@ class AudioInferenceHandler(AudioHandler):
|
||||
if not self._is_running:
|
||||
break
|
||||
self.on_next_handle(
|
||||
# (res_frame, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
|
||||
(res_frame, index_list[i], audio_frames[i * 2:i * 2 + 2]),
|
||||
(res_frame, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
|
||||
# (res_frame, index_list[i], audio_frames[i * 2:i * 2 + 2]),
|
||||
0)
|
||||
index = index + 1
|
||||
|
||||
logger.info(f'total batch time: {time.perf_counter() - start_time}')
|
||||
else:
|
||||
@ -174,6 +175,4 @@ class AudioInferenceHandler(AudioHandler):
|
||||
def pause_talk(self):
|
||||
print('AudioInferenceHandler pause_talk', self._audio_queue.size(), self._mal_queue.size())
|
||||
self._audio_queue.clear()
|
||||
print('AudioInferenceHandler111')
|
||||
self._mal_queue.clear()
|
||||
print('AudioInferenceHandler222')
|
||||
|
@ -55,8 +55,7 @@ class AudioMalHandler(AudioHandler):
|
||||
logging.info('chunk2mal run')
|
||||
while self._exit_event.is_set() and self._is_running:
|
||||
self._run_step()
|
||||
time.sleep(0.02)
|
||||
|
||||
# time.sleep(0.01)
|
||||
logging.info('chunk2mal exit')
|
||||
|
||||
def _run_step(self):
|
||||
@ -107,6 +106,7 @@ class AudioMalHandler(AudioHandler):
|
||||
chunk = np.zeros(self.chunk, dtype=np.float32)
|
||||
frame = (chunk, '')
|
||||
type_ = 1
|
||||
# time.sleep(0.02)
|
||||
# logging.info(f'AudioMalHandler get_audio_frame type:{type_}')
|
||||
return frame, type_
|
||||
|
||||
|
@ -4,10 +4,8 @@ import os
|
||||
|
||||
from asr import SherpaNcnnAsr
|
||||
from eventbus import EventBus
|
||||
from .audio_inference_onnx_handler import AudioInferenceOnnxHandler
|
||||
from .audio_inference_handler import AudioInferenceHandler
|
||||
from .audio_mal_handler import AudioMalHandler
|
||||
from .human_render import HumanRender
|
||||
from nlp import PunctuationSplit, DouBao, Kimi
|
||||
from tts import TTSEdge, TTSAudioSplitHandle, TTSEdgeHttp
|
||||
from utils import load_avatar, get_device, object_stop, load_avatar_from_processed, load_avatar_from_256_processed
|
||||
@ -18,7 +16,7 @@ current_file_path = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
class HumanContext:
|
||||
def __init__(self):
|
||||
self._fps = 25 # 20 ms per frame
|
||||
self._fps = 50 # 20 ms per frame
|
||||
self._image_size = 288
|
||||
self._batch_size = 16
|
||||
self._sample_rate = 16000
|
||||
@ -118,8 +116,8 @@ class HumanContext:
|
||||
else:
|
||||
logger.info(f'notify message:{message}')
|
||||
|
||||
def build(self):
|
||||
self._render_handler = HumanRender(self, None)
|
||||
def build(self, render_handler):
|
||||
self._render_handler = render_handler
|
||||
self._infer_handler = AudioInferenceHandler(self, self._render_handler)
|
||||
self._mal_handler = AudioMalHandler(self, self._infer_handler)
|
||||
self._tts_handle = TTSAudioSplitHandle(self, self._mal_handler)
|
||||
|
@ -2,61 +2,73 @@
|
||||
|
||||
import logging
|
||||
import time
|
||||
from enum import Enum
|
||||
from queue import Empty
|
||||
from threading import Event, Thread
|
||||
|
||||
from eventbus import EventBus
|
||||
from human.message_type import MessageType
|
||||
from human_handler import AudioHandler
|
||||
from render import VoiceRender, VideoRender, PlayClock
|
||||
from utils import SyncQueue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RenderStatus(Enum):
|
||||
E_Normal = 0,
|
||||
E_Full = 1,
|
||||
E_Empty = 2
|
||||
|
||||
|
||||
class HumanRender(AudioHandler):
|
||||
def __init__(self, context, handler):
|
||||
super().__init__(context, handler)
|
||||
|
||||
EventBus().register('stop', self._on_stop)
|
||||
play_clock = PlayClock()
|
||||
self._voice_render = VoiceRender(play_clock, context)
|
||||
self._video_render = VideoRender(play_clock, context, self)
|
||||
self._is_running = True
|
||||
self._queue = SyncQueue(context.batch_size, "HumanRender_queue")
|
||||
self._exit_event = Event()
|
||||
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
|
||||
self._exit_event.set()
|
||||
self._thread.start()
|
||||
self._image_render = None
|
||||
self._last_audio_ps = 0
|
||||
self._last_video_ps = 0
|
||||
self._empty_log = True
|
||||
self._should_exit = False
|
||||
self._render_status = RenderStatus.E_Empty
|
||||
|
||||
def __del__(self):
|
||||
EventBus().unregister('stop', self._on_stop)
|
||||
|
||||
def _on_stop(self, *args, **kwargs):
|
||||
self._should_exit = True
|
||||
self.stop()
|
||||
|
||||
def _on_run(self):
|
||||
def _render(self, video_frame, voice_frame):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
logging.info('human render run')
|
||||
while self._exit_event.is_set() and self._is_running:
|
||||
self._run_step()
|
||||
delay = 0.075
|
||||
delay = 1000 / self._context.fps * 0.001
|
||||
while not self._should_exit:
|
||||
if self._render_status is RenderStatus.E_Full:
|
||||
time.sleep(delay)
|
||||
continue
|
||||
|
||||
t = time.perf_counter()
|
||||
self._run_step()
|
||||
use = time.perf_counter() - t
|
||||
if self._render_status is RenderStatus.E_Empty:
|
||||
continue
|
||||
real_delay = delay - use
|
||||
print(f'send voice {use}')
|
||||
if real_delay > 0:
|
||||
time.sleep(real_delay)
|
||||
else:
|
||||
print(f'send voice {real_delay}')
|
||||
|
||||
logging.info('human render exit')
|
||||
|
||||
def _run_step(self):
|
||||
try:
|
||||
value = self._queue.get(timeout=.005)
|
||||
value = self._queue.get(timeout=1)
|
||||
if value is None:
|
||||
return
|
||||
res_frame, idx, audio_frames = value
|
||||
if not self._empty_log:
|
||||
self._empty_log = True
|
||||
logging.info('render render:')
|
||||
logging.info('human render:')
|
||||
except Empty:
|
||||
if self._empty_log:
|
||||
self._empty_log = False
|
||||
@ -66,27 +78,15 @@ class HumanRender(AudioHandler):
|
||||
type_ = 1
|
||||
if audio_frames[0][1] != 0 and audio_frames[1][1] != 0:
|
||||
type_ = 0
|
||||
if self._voice_render is not None:
|
||||
self._voice_render.render(audio_frames, self._last_audio_ps)
|
||||
self._last_audio_ps = self._last_audio_ps + 0.4
|
||||
if self._video_render is not None:
|
||||
self._video_render.render((res_frame, idx, type_), self._last_video_ps)
|
||||
self._last_video_ps = self._last_video_ps + 0.4
|
||||
|
||||
def set_image_render(self, render):
|
||||
self._image_render = render
|
||||
|
||||
def put_image(self, image):
|
||||
if self._image_render is not None:
|
||||
self._image_render.on_render(image)
|
||||
self._render((res_frame, idx, type_), audio_frames)
|
||||
|
||||
def on_message(self, message):
|
||||
super().on_message(message)
|
||||
|
||||
def on_handle(self, stream, index):
|
||||
if not self._is_running:
|
||||
if self._should_exit:
|
||||
return
|
||||
|
||||
self._queue.put(stream)
|
||||
|
||||
def pause_talk(self):
|
||||
@ -96,17 +96,7 @@ class HumanRender(AudioHandler):
|
||||
|
||||
def stop(self):
|
||||
logging.info('hunan render stop')
|
||||
self._is_running = False
|
||||
if self._exit_event is None:
|
||||
return
|
||||
|
||||
self._should_exit = True
|
||||
self._queue.clear()
|
||||
self._exit_event.clear()
|
||||
if self._thread.is_alive():
|
||||
self._thread.join()
|
||||
logging.info('hunan render stop')
|
||||
# self._voice_render.stop()
|
||||
# self._video_render.stop()
|
||||
# self._exit_event.clear()
|
||||
# self._thread.join()
|
||||
|
||||
logging.info('hunan render stop')
|
||||
|
5
main.py
5
main.py
@ -3,6 +3,7 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from human import HumanContext
|
||||
from ui import IpcRender
|
||||
from utils import config_logging
|
||||
|
||||
@ -13,7 +14,9 @@ if __name__ == '__main__':
|
||||
config_logging('./logs/info.log', logging.INFO, logging.INFO)
|
||||
|
||||
logger.info('------------start------------')
|
||||
render = IpcRender()
|
||||
context = HumanContext()
|
||||
render = IpcRender(context)
|
||||
context.build(render)
|
||||
render.run()
|
||||
render.stop()
|
||||
logger.info('------------finish------------')
|
@ -1,5 +0,0 @@
|
||||
#encoding = utf8
|
||||
|
||||
from .voice_render import VoiceRender
|
||||
from .video_render import VideoRender
|
||||
from .play_clock import PlayClock
|
@ -1,25 +0,0 @@
|
||||
#encoding = utf8
|
||||
import logging
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from queue import Queue
|
||||
from threading import Event, Thread
|
||||
|
||||
from utils import SyncQueue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseRender(ABC):
|
||||
def __init__(self, play_clock, context, type_):
|
||||
self._play_clock = play_clock
|
||||
self._context = context
|
||||
# self._queue = SyncQueue(context.batch_size, f'{type_}RenderQueue')
|
||||
# self._exit_event = Event()
|
||||
# self._thread = Thread(target=self._on_run, name=thread_name)
|
||||
# self._exit_event.set()
|
||||
# self._thread.start()
|
||||
|
||||
@abstractmethod
|
||||
def render(self, frame, ps):
|
||||
pass
|
@ -1,37 +0,0 @@
|
||||
#encoding = utf8
|
||||
import time
|
||||
|
||||
|
||||
class PlayClock:
|
||||
def __init__(self):
|
||||
self._start = time.time()
|
||||
self._current_time = 0
|
||||
self._display_time = self._start
|
||||
self._audio_diff_threshold = 0.01
|
||||
|
||||
@property
|
||||
def start_time(self):
|
||||
return self._start
|
||||
|
||||
@property
|
||||
def current_time(self):
|
||||
return self._current_time
|
||||
|
||||
@current_time.setter
|
||||
def current_time(self, v):
|
||||
self._current_time = v
|
||||
|
||||
@property
|
||||
def audio_diff_threshold(self):
|
||||
return self._audio_diff_threshold
|
||||
|
||||
@property
|
||||
def display_time(self):
|
||||
return self._display_time
|
||||
|
||||
def update_display_time(self):
|
||||
self._display_time = time.time()
|
||||
|
||||
def clock_time(self):
|
||||
elapsed = time.time() - self._display_time
|
||||
return self.current_time + elapsed
|
@ -1,23 +0,0 @@
|
||||
#encoding = utf8
|
||||
import copy
|
||||
import logging
|
||||
import time
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from .base_render import BaseRender
|
||||
|
||||
|
||||
class VideoRender(BaseRender):
|
||||
def __init__(self, play_clock, context, human_render):
|
||||
super().__init__(play_clock, context, 'Video')
|
||||
self._human_render = human_render
|
||||
self.index = 0
|
||||
|
||||
def render(self, frame, ps):
|
||||
if self._human_render is not None:
|
||||
self._human_render.put_image(frame)
|
||||
|
||||
# image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
|
||||
|
@ -1,39 +0,0 @@
|
||||
#encoding = utf8
|
||||
import logging
|
||||
import time
|
||||
from queue import Empty
|
||||
|
||||
import numpy as np
|
||||
|
||||
from audio_render import AudioRender
|
||||
from human.message_type import MessageType
|
||||
from .base_render import BaseRender
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VoiceRender(BaseRender):
|
||||
def __init__(self, play_clock, context):
|
||||
self._audio_render = AudioRender()
|
||||
super().__init__(play_clock, context, 'Voice')
|
||||
self._current_text = ''
|
||||
|
||||
def render(self, frame, ps):
|
||||
self._play_clock.update_display_time()
|
||||
self._play_clock.current_time = ps
|
||||
|
||||
for audio_frame in frame:
|
||||
frame, type_ = audio_frame
|
||||
chunk, txt = frame
|
||||
if txt != self._current_text:
|
||||
self._current_text = txt
|
||||
logging.info(f'VoiceRender: {txt}')
|
||||
chunk = (chunk * 32767).astype(np.int16)
|
||||
|
||||
if self._audio_render is not None:
|
||||
try:
|
||||
chunk_len = int(chunk.shape[0] * 2)
|
||||
# print('audio frame:', frame.shape, chunk_len)
|
||||
self._audio_render.write(chunk.tobytes(), chunk_len)
|
||||
except Exception as e:
|
||||
logging.error(f'Error writing audio frame: {e}')
|
@ -72,7 +72,7 @@ class TTSAudioSplitHandle(TTSAudioHandle):
|
||||
if chunks is not None:
|
||||
for chunk in chunks:
|
||||
self.on_next_handle((chunk, txt), 0)
|
||||
time.sleep(0.01) # Sleep briefly to prevent busy-waiting
|
||||
time.sleep(0.001) # Sleep briefly to prevent busy-waiting
|
||||
|
||||
def on_handle(self, stream, index):
|
||||
if not self._is_running:
|
||||
|
@ -3,9 +3,10 @@
|
||||
import os
|
||||
import logging
|
||||
import time
|
||||
from queue import Queue
|
||||
|
||||
from human import HumanContext
|
||||
import numpy as np
|
||||
|
||||
from human import HumanRender, RenderStatus
|
||||
from ipc import IPCUtil
|
||||
from utils import render_image
|
||||
|
||||
@ -13,60 +14,60 @@ logger = logging.getLogger(__name__)
|
||||
current_file_path = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class IpcRender:
|
||||
def __init__(self):
|
||||
self._human_context = None
|
||||
self._queue = None
|
||||
self._exit = False
|
||||
self._ipc = None
|
||||
class IpcRender(HumanRender):
|
||||
def __init__(self, context):
|
||||
super().__init__(context, None)
|
||||
self._ipc = IPCUtil('human_product', 'human_render')
|
||||
self._current_text = ''
|
||||
|
||||
def _send_image(self, identifier, image):
|
||||
def _send_image(self, image):
|
||||
height, width, channels = image.shape
|
||||
|
||||
t = time.perf_counter()
|
||||
width_bytes = width.to_bytes(4, byteorder='little')
|
||||
height_bytes = height.to_bytes(4, byteorder='little')
|
||||
bit_depth_bytes = channels.to_bytes(4, byteorder='little')
|
||||
|
||||
img_bytes = image.tobytes()
|
||||
identifier = b'\x01'
|
||||
data = identifier + width_bytes + height_bytes + bit_depth_bytes + img_bytes
|
||||
self._ipc.send_binary(data, len(data))
|
||||
|
||||
def _send_voice(self, voice):
|
||||
voice_identifier = b'\x02'
|
||||
data = voice_identifier
|
||||
for audio_frame in voice:
|
||||
frame, type_ = audio_frame
|
||||
chunk, txt = frame
|
||||
if txt != self._current_text:
|
||||
self._current_text = txt
|
||||
logging.info(f'VoiceRender: {txt}')
|
||||
chunk = (chunk * 32767).astype(np.int16)
|
||||
voice_bytes = chunk.tobytes()
|
||||
data = data + voice_bytes
|
||||
|
||||
self._ipc.send_binary(data, len(data))
|
||||
|
||||
def _on_reader_callback(self, data_str, size):
|
||||
data_str = data_str.decode('utf-8')
|
||||
print(f'on_reader_callback: {data_str}, size:{size}')
|
||||
if 'quit' == data_str:
|
||||
self._exit = True
|
||||
self._context.stop()
|
||||
elif 'heartbeat' == data_str:
|
||||
pass
|
||||
elif 'full' == data_str:
|
||||
self._render_status = RenderStatus.E_Full
|
||||
elif 'empty' == data_str:
|
||||
self._render_status = RenderStatus.E_Empty
|
||||
elif 'normal' == data_str:
|
||||
self._render_status = RenderStatus.E_Normal
|
||||
|
||||
def run(self):
|
||||
self._queue = Queue()
|
||||
self._human_context = HumanContext()
|
||||
self._human_context.build()
|
||||
self._ipc = IPCUtil('human_product', 'human_render')
|
||||
self._ipc.set_reader_callback(self._on_reader_callback)
|
||||
logger.info(f'ipc listen:{self._ipc.listen()}')
|
||||
super().run()
|
||||
|
||||
render = self._human_context.render_handler
|
||||
render.set_image_render(self)
|
||||
|
||||
while not self._exit:
|
||||
if not self._queue.empty():
|
||||
while self._queue.qsize() > 5:
|
||||
self._queue.get()
|
||||
print('render queue is slower')
|
||||
|
||||
image = self._queue.get()
|
||||
image = render_image(self._human_context, image)
|
||||
self._send_image(b'\x01', image)
|
||||
else:
|
||||
time.sleep(0.02)
|
||||
logger.info('ipc render exit')
|
||||
|
||||
def stop(self):
|
||||
if self._human_context is not None:
|
||||
self._human_context.stop()
|
||||
|
||||
def on_render(self, image):
|
||||
self._queue.put(image)
|
||||
def _render(self, video_frame, voice_frame):
|
||||
image = render_image(self._context, video_frame)
|
||||
self._send_image(image)
|
||||
self._send_voice(voice_frame)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user