modify render talking
This commit is contained in:
parent
31f9ec50cb
commit
0eefa7b1ce
@ -3,7 +3,6 @@ import logging
|
||||
import os
|
||||
import queue
|
||||
import time
|
||||
from queue import Queue
|
||||
from threading import Event, Thread
|
||||
|
||||
import cv2
|
||||
@ -13,6 +12,7 @@ import torch
|
||||
from eventbus import EventBus
|
||||
from human_handler import AudioHandler
|
||||
from utils import load_model, mirror_index, get_device, SyncQueue
|
||||
from .huaman_status import HumanStatus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
current_file_path = os.path.dirname(os.path.abspath(__file__))
|
||||
@ -74,7 +74,7 @@ class AudioInferenceHandler(AudioHandler):
|
||||
count_time = 0
|
||||
logger.info('start inference')
|
||||
silence_length = 133
|
||||
# human_status = HumanStatus(length, silence_length)
|
||||
human_status = HumanStatus(length, silence_length)
|
||||
|
||||
device = get_device()
|
||||
logger.info(f'use device:{device}')
|
||||
@ -109,18 +109,22 @@ class AudioInferenceHandler(AudioHandler):
|
||||
for i in range(batch_size):
|
||||
if not self._is_running:
|
||||
break
|
||||
self.on_next_handle((None, mirror_index(silence_length, index),
|
||||
# self.on_next_handle((None, human_status.get_index(),
|
||||
# self.on_next_handle((None, mirror_index(silence_length, index),
|
||||
self.on_next_handle((None, human_status.get_index(),
|
||||
audio_frames[i * 2:i * 2 + 2]), 0)
|
||||
index = index + 1
|
||||
# index = index + 1
|
||||
else:
|
||||
human_status.start_talking()
|
||||
logger.info(f'infer======= {current_text}')
|
||||
# human_status.try_to_talk()
|
||||
t = time.perf_counter()
|
||||
img_batch = []
|
||||
index_list = []
|
||||
# for i in range(batch_size):
|
||||
for i in range(len(mel_batch)):
|
||||
idx = mirror_index(length, index + i)
|
||||
# idx = mirror_index(length, index + i)
|
||||
idx = human_status.get_index()
|
||||
index_list.append(idx)
|
||||
face = face_list_cycle[idx]
|
||||
img_batch.append(face)
|
||||
|
||||
@ -154,9 +158,10 @@ class AudioInferenceHandler(AudioHandler):
|
||||
if not self._is_running:
|
||||
break
|
||||
self.on_next_handle(
|
||||
(res_frame, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
|
||||
# (res_frame, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
|
||||
(res_frame, index_list[i], audio_frames[i * 2:i * 2 + 2]),
|
||||
0)
|
||||
index = index + 1
|
||||
|
||||
logger.info(f'total batch time: {time.perf_counter() - start_time}')
|
||||
else:
|
||||
time.sleep(1)
|
||||
|
@ -12,11 +12,13 @@ class HumanStatusEnum(Enum):
|
||||
|
||||
|
||||
class HumanStatus:
|
||||
def __init__(self, total_frames=0, last_silence_frame=0):
|
||||
def __init__(self, total_frames=0, silence_length=0):
|
||||
self._status = HumanStatusEnum.silence
|
||||
self._total_frames = total_frames
|
||||
self._last_silence_frame = last_silence_frame
|
||||
self._silence_length = silence_length
|
||||
self._talking_length = total_frames - silence_length
|
||||
self._current_frame = 0
|
||||
self._is_talking = False
|
||||
|
||||
def get_status(self):
|
||||
return self._status
|
||||
@ -27,10 +29,26 @@ class HumanStatus:
|
||||
|
||||
def try_to_talk(self):
|
||||
if self._status == HumanStatusEnum.silence:
|
||||
if self._current_frame - self._last_silence_frame < 0:
|
||||
if self._current_frame - self._silence_length < 0:
|
||||
return False
|
||||
self._status = HumanStatusEnum.talking
|
||||
return True
|
||||
|
||||
def get_index(self):
|
||||
return self._total_frames
|
||||
if self._is_talking:
|
||||
if self._current_frame < self._silence_length:
|
||||
index = self._current_frame
|
||||
else:
|
||||
index = self._silence_length + (self._current_frame - self._silence_length) % self._talking_length
|
||||
else:
|
||||
index = self._current_frame % self._silence_length
|
||||
|
||||
self._current_frame = (self._current_frame + 1) % self._total_frames
|
||||
return index
|
||||
|
||||
def start_talking(self):
|
||||
self._is_talking = True
|
||||
|
||||
def stop_talking(self):
|
||||
self._is_talking = False
|
||||
self._current_frame = 0
|
||||
|
@ -125,8 +125,8 @@ class HumanContext:
|
||||
self._tts_handle = TTSAudioSplitHandle(self, self._mal_handler)
|
||||
self._tts = TTSEdgeHttp(self._tts_handle)
|
||||
split = PunctuationSplit()
|
||||
# self._nlp = DouBao(self, split, self._tts)
|
||||
self._nlp = Kimi(self, split, self._tts)
|
||||
self._nlp = DouBao(self, split, self._tts)
|
||||
# self._nlp = Kimi(self, split, self._tts)
|
||||
self._asr = SherpaNcnnAsr()
|
||||
self._asr.attach(self._nlp)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user