add txt render

This commit is contained in:
brige 2024-11-13 19:29:40 +08:00
parent b3bbf40d95
commit 742340971b
3 changed files with 8 additions and 2 deletions

View File

@ -88,9 +88,11 @@ class AudioInferenceHandler(AudioHandler):
# print('origin mel_batch:', len(mel_batch)) # print('origin mel_batch:', len(mel_batch))
is_all_silence = True is_all_silence = True
audio_frames = [] audio_frames = []
current_text = ''
for _ in range(batch_size * 2): for _ in range(batch_size * 2):
frame, type_ = self._audio_queue.get() frame, type_ = self._audio_queue.get()
# print('AudioInferenceHandler type_', type_) # print('AudioInferenceHandler type_', type_)
current_text = frame[1]
audio_frames.append((frame, type_)) audio_frames.append((frame, type_))
if type_ == 0: if type_ == 0:
is_all_silence = False is_all_silence = False
@ -107,7 +109,7 @@ class AudioInferenceHandler(AudioHandler):
0) 0)
index = index + 1 index = index + 1
else: else:
logger.info('infer=======') logger.info(f'infer======= {current_text}')
t = time.perf_counter() t = time.perf_counter()
img_batch = [] img_batch = []
# for i in range(batch_size): # for i in range(batch_size):

View File

@ -50,7 +50,7 @@ class HumanRender(AudioHandler):
# t = time.time() # t = time.time()
self._run_step() self._run_step()
# delay = time.time() - t # delay = time.time() - t
delay = 0.03805 # - delay delay = 0.038 # - delay
# print(delay) # print(delay)
# if delay <= 0.0: # if delay <= 0.0:
# continue # continue

View File

@ -16,6 +16,7 @@ class VoiceRender(BaseRender):
def __init__(self, play_clock, context): def __init__(self, play_clock, context):
self._audio_render = AudioRender() self._audio_render = AudioRender()
super().__init__(play_clock, context, 'Voice') super().__init__(play_clock, context, 'Voice')
self._current_text = ''
def render(self, frame, ps): def render(self, frame, ps):
self._play_clock.update_display_time() self._play_clock.update_display_time()
@ -24,6 +25,9 @@ class VoiceRender(BaseRender):
for audio_frame in frame: for audio_frame in frame:
frame, type_ = audio_frame frame, type_ = audio_frame
chunk, txt = frame chunk, txt = frame
if txt != self._current_text:
self._current_text = txt
logging.info(f'VoiceRender: {txt}')
chunk = (chunk * 32767).astype(np.int16) chunk = (chunk * 32767).astype(np.int16)
if self._audio_render is not None: if self._audio_render is not None: