human/human/human_context.py

128 lines
3.6 KiB
Python
Raw Normal View History

2024-10-12 11:57:24 +00:00
#encoding = utf8
import logging
2024-10-17 15:26:21 +00:00
import os
2024-10-15 00:31:43 +00:00
from asr import SherpaNcnnAsr
2024-10-17 15:26:21 +00:00
from .audio_inference_handler import AudioInferenceHandler
from .audio_mal_handler import AudioMalHandler
from .human_render import HumanRender
2024-10-15 00:31:43 +00:00
from nlp import PunctuationSplit, DouBao
2024-10-19 10:47:34 +00:00
from tts import TTSEdge, TTSAudioSplitHandle, TTSEdgeHttp
2024-10-16 11:04:12 +00:00
from utils import load_avatar, get_device
2024-10-12 11:57:24 +00:00
logger = logging.getLogger(__name__)
2024-10-17 15:26:21 +00:00
current_file_path = os.path.dirname(os.path.abspath(__file__))
2024-10-12 11:57:24 +00:00
class HumanContext:
def __init__(self):
self._fps = 50 # 20 ms per frame
2024-10-16 11:04:12 +00:00
self._image_size = 96
2024-10-12 11:57:24 +00:00
self._batch_size = 16
self._sample_rate = 16000
self._stride_left_size = 10
self._stride_right_size = 10
2024-10-25 00:23:55 +00:00
self._render_batch = 5
2024-10-12 11:57:24 +00:00
2024-10-16 11:04:12 +00:00
self._device = get_device()
2024-10-17 00:25:53 +00:00
print(f'device:{self._device}')
2024-10-17 15:26:21 +00:00
base_path = os.path.join(current_file_path, '..', 'face')
logger.info(f'_create_recognizer init, path:{base_path}')
full_images, face_frames, coord_frames = load_avatar(base_path, self._image_size, self._device)
self._frame_list_cycle = full_images
self._face_list_cycle = face_frames
self._coord_list_cycle = coord_frames
face_images_length = len(self._face_list_cycle)
logging.info(f'face images length: {face_images_length}')
print(f'face images length: {face_images_length}')
2024-10-17 15:26:21 +00:00
self._asr = None
self._nlp = None
self._tts = None
self._tts_handle = None
self._mal_handler = None
self._infer_handler = None
2024-10-17 00:25:53 +00:00
self._render_handler = None
2024-10-17 15:26:21 +00:00
def __del__(self):
print(f'HumanContext: __del__')
self._asr.stop()
self._nlp.stop()
self._tts.stop()
self._tts_handle.stop()
self._mal_handler.stop()
self._infer_handler.stop()
self._render_handler.stop()
2024-10-12 11:57:24 +00:00
@property
def fps(self):
return self._fps
2024-10-16 11:04:12 +00:00
@property
def image_size(self):
return self._image_size
2024-10-25 00:23:55 +00:00
@property
def render_batch(self):
return self._render_batch
2024-10-16 11:04:12 +00:00
@property
def device(self):
return self._device
2024-10-12 11:57:24 +00:00
@property
def batch_size(self):
return self._batch_size
@property
def sample_rate(self):
return self._sample_rate
@property
def stride_left_size(self):
return self._stride_left_size
@property
def stride_right_size(self):
return self._stride_right_size
2024-10-16 11:04:12 +00:00
@property
def face_list_cycle(self):
return self._face_list_cycle
2024-10-17 00:25:53 +00:00
@property
def frame_list_cycle(self):
return self._frame_list_cycle
@property
def coord_list_cycle(self):
return self._coord_list_cycle
@property
def render_handler(self):
2024-10-17 15:26:21 +00:00
return self._render_handler
2024-10-17 00:25:53 +00:00
2024-10-25 00:23:55 +00:00
def notify(self, message):
if self._tts_handle is not None:
self._tts_handle.on_message(message)
else:
logger.info(f'notify message:{message}')
2024-10-15 00:31:43 +00:00
def build(self):
2024-10-17 00:25:53 +00:00
self._render_handler = HumanRender(self, None)
2024-10-17 15:26:21 +00:00
self._infer_handler = AudioInferenceHandler(self, self._render_handler)
self._mal_handler = AudioMalHandler(self, self._infer_handler)
self._tts_handle = TTSAudioSplitHandle(self, self._mal_handler)
2024-10-19 10:47:34 +00:00
self._tts = TTSEdgeHttp(self._tts_handle)
2024-10-15 00:31:43 +00:00
split = PunctuationSplit()
2024-10-23 11:58:41 +00:00
self._nlp = DouBao(self, split, self._tts)
2024-10-17 15:26:21 +00:00
self._asr = SherpaNcnnAsr()
self._asr.attach(self._nlp)
2024-10-15 00:31:43 +00:00
2024-10-19 10:47:34 +00:00
def pause_talk(self):
self._nlp.pause_talk()
self._tts.pause_talk()
self._mal_handler.pause_talk()
self._infer_handler.pause_talk()
2024-10-23 11:58:41 +00:00
self._render_handler.pause_talk()