diff --git a/human/audio_inference_handler.py b/human/audio_inference_handler.py index 407e146..30e430b 100644 --- a/human/audio_inference_handler.py +++ b/human/audio_inference_handler.py @@ -88,9 +88,11 @@ class AudioInferenceHandler(AudioHandler): # print('origin mel_batch:', len(mel_batch)) is_all_silence = True audio_frames = [] + current_text = '' for _ in range(batch_size * 2): frame, type_ = self._audio_queue.get() # print('AudioInferenceHandler type_', type_) + current_text = frame[1] audio_frames.append((frame, type_)) if type_ == 0: is_all_silence = False @@ -107,7 +109,7 @@ class AudioInferenceHandler(AudioHandler): 0) index = index + 1 else: - logger.info('infer=======') + logger.info(f'infer======= {current_text}') t = time.perf_counter() img_batch = [] # for i in range(batch_size): diff --git a/human/human_render.py b/human/human_render.py index dbdddd0..87c1666 100644 --- a/human/human_render.py +++ b/human/human_render.py @@ -50,7 +50,7 @@ class HumanRender(AudioHandler): # t = time.time() self._run_step() # delay = time.time() - t - delay = 0.03805 # - delay + delay = 0.038 # - delay # print(delay) # if delay <= 0.0: # continue diff --git a/render/voice_render.py b/render/voice_render.py index 32cf389..eff4f19 100644 --- a/render/voice_render.py +++ b/render/voice_render.py @@ -16,6 +16,7 @@ class VoiceRender(BaseRender): def __init__(self, play_clock, context): self._audio_render = AudioRender() super().__init__(play_clock, context, 'Voice') + self._current_text = '' def render(self, frame, ps): self._play_clock.update_display_time() @@ -24,6 +25,9 @@ class VoiceRender(BaseRender): for audio_frame in frame: frame, type_ = audio_frame chunk, txt = frame + if txt != self._current_text: + self._current_text = txt + logging.info(f'VoiceRender: {txt}') chunk = (chunk * 32767).astype(np.int16) if self._audio_render is not None: