Compare commits

..

1 Commits

Author SHA1 Message Date
jocelyn
c2871cac69 [ADD]add logic of loop frame 2025-06-17 16:13:12 +08:00
3 changed files with 18 additions and 13 deletions

View File

@ -21,14 +21,16 @@ current_file_path = os.path.dirname(os.path.abspath(__file__))
class AudioInferenceHandler(AudioHandler):
def __init__(self, context, handler):
def __init__(self, context, handler, person_config):
super().__init__(context, handler)
EventBus().register('stop', self._on_stop)
self._mal_queue = SyncQueue(1, "AudioInferenceHandler_Mel")
self._audio_queue = SyncQueue(context.batch_size * 2, "AudioInferenceHandler_Audio")
self.person_config = person_config
self._is_running = True
self.last_direction = 1
self._exit_event = Event()
self._run_thread = Thread(target=self.__on_run, name="AudioInferenceHandlerThread")
self._exit_event.set()
@ -102,12 +104,12 @@ class AudioInferenceHandler(AudioHandler):
break
if is_all_silence:
for i in range(batch_size):
frame_indexes, self.last_direction = human_status.get_index_v2(self.person_config["frame_config"], self.last_direction, batch_size) # [1,3,4]
for i, frame_idx in zip(range(batch_size), frame_indexes):
if not self._is_running:
break
# self.on_next_handle((None, mirror_index(length, index),
self.on_next_handle((None, human_status.get_index(),
audio_frames[i * 2:i * 2 + 2]), 0)
self.on_next_handle((None,frame_idx, audio_frames[i * 2:i * 2 + 2]), 0)
# index = index + 1
else:
human_status.start_talking()
@ -117,12 +119,12 @@ class AudioInferenceHandler(AudioHandler):
img_batch = []
index_list = []
# for i in range(batch_size):
frame_indexes,self.last_direction = human_status.get_index_v2(self.person_config["frame_config"], self.last_direction, batch_size) # [1,3,4]
# TODO: 推理状态下获取循环帧逻辑
for i in range(len(mel_batch)):
for i, frame_idx in zip(range(len(mel_batch)), frame_indexes):
# idx = mirror_index(length, index + i)
idx = human_status.get_index()
index_list.append(idx)
face = face_list_cycle[idx]
index_list.append(frame_idx)
face = face_list_cycle[frame_idx]
img_batch.append(face)
# print('orign img_batch:', len(img_batch), 'origin mel_batch:', len(mel_batch))

View File

@ -52,11 +52,10 @@ class HumanStatus:
self._current_frame = (self._current_frame + 1) % self._total_frames
return index
def get_index_v2(self):
def get_index_v2(self, frame_config:list, last_direction:int=1, batch_size:int=5):
"""
"""
frame_config = []
audio_frame_length = batch_size = 5
audio_frame_length = batch_size
is_silent = True if not self._is_talking else False
first_speak = True if self._is_talking and self.last_frame_talking_status == "silent" else False
last_speak = True if self.last_frame_talking_status == "talk" and self.next_frame_talking_status == "silent" else False
@ -71,7 +70,7 @@ class HumanStatus:
)
startfrom = start_idx_list[-1]
# 一次返回一个batch帧号
return start_idx_list
return start_idx_list, last_direction
def start_talking(self):

View File

@ -47,6 +47,10 @@ class HumanContext:
self._m_frames = m_frames
self._inv_m_frames = inv_m_frames
face_images_length = len(self._face_list_cycle)
# TODO: get person config
self.person_config ={
"frame_config": [[1,face_frames-1, True]],
}
logging.info(f'face images length: {face_images_length}')
print(f'face images length: {face_images_length}')
@ -113,7 +117,7 @@ class HumanContext:
def build(self, render_handler):
self._render_handler = render_handler
self._infer_handler = AudioInferenceHandler(self, self._render_handler)
self._infer_handler = AudioInferenceHandler(self, self._render_handler, self.person_config)
self._mal_handler = AudioMalHandler(self, self._infer_handler)
self._tts_handle = TTSAudioSplitHandle(self, self._mal_handler)
self._tts = TTSEdgeHttp(self._tts_handle)