human/tts/Chunk2Mal.py

122 lines
3.9 KiB
Python
Raw Permalink Normal View History

2024-09-04 16:51:14 +00:00
#encoding = utf8
2024-10-03 17:52:49 +00:00
2024-09-04 16:51:14 +00:00
import logging
import queue
2024-09-26 17:34:52 +00:00
import time
2024-09-04 16:51:14 +00:00
from queue import Queue
2024-10-03 17:52:49 +00:00
import multiprocessing as mp
2024-09-04 16:51:14 +00:00
from threading import Thread, Event
import numpy as np
import audio
2024-09-28 18:47:04 +00:00
from audio_render import AudioRender
2024-09-04 16:51:14 +00:00
class Chunk2Mal:
def __init__(self, human):
2024-10-03 17:52:49 +00:00
# self._audio_chunk_queue = Queue()
2024-09-04 16:51:14 +00:00
self._human = human
self._thread = None
2024-09-21 12:58:26 +00:00
2024-10-03 17:52:49 +00:00
self.frames = []
self.queue = Queue()
2024-10-04 06:37:50 +00:00
self.fps = human.get_fps()
self.batch_size = human.get_batch_size()
self.stride_left_size = human.get_stride_left_size()
self.stride_right_size = human.get_stride_right_size()
2024-10-04 08:16:36 +00:00
# self.output_queue = mp.Queue()
# self.feat_queue = mp.Queue(2)
2024-10-03 17:52:49 +00:00
2024-09-21 12:58:26 +00:00
# 320 samples per chunk (20ms * 16000 / 1000)audio_chunk
2024-10-03 17:52:49 +00:00
self.chunk = self._human.get_audio_sample_rate() // self._human.get_fps()
2024-09-21 12:58:26 +00:00
self._exit_event = Event()
self._thread = Thread(target=self._on_run)
self._exit_event.set()
self._thread.start()
2024-10-03 17:52:49 +00:00
# self._audio_render = AudioRender()
2024-09-29 07:12:49 +00:00
self._stream_len = 0
2024-09-21 12:58:26 +00:00
logging.info('chunk2mal start')
2024-09-04 16:51:14 +00:00
2024-10-03 17:52:49 +00:00
def _on_run(self):
logging.info('chunk2mal run')
while self._exit_event.is_set():
self._run_step()
time.sleep(0.01)
logging.info('chunk2mal exit')
def _run_step(self):
2024-10-04 06:37:50 +00:00
for _ in range(self.batch_size * 2):
2024-10-03 17:52:49 +00:00
frame, _type = self.get_audio_frame()
self.frames.append(frame)
2024-10-04 08:16:36 +00:00
self._human.push_out_put(frame, _type)
2024-10-03 17:52:49 +00:00
# context not enough, do not run network.
2024-10-04 06:37:50 +00:00
if len(self.frames) <= self.stride_left_size + self.stride_right_size:
2024-09-29 07:12:49 +00:00
return
2024-09-28 18:47:04 +00:00
2024-10-03 17:52:49 +00:00
inputs = np.concatenate(self.frames) # [N * chunk]
mel = audio.melspectrogram(inputs)
# print(mel.shape[0],mel.shape,len(mel[0]),len(self.frames))
# cut off stride
2024-10-04 06:37:50 +00:00
left = max(0, self.stride_left_size * 80 / 50)
right = min(len(mel[0]), len(mel[0]) - self.stride_right_size * 80 / 50)
mel_idx_multiplier = 80. * 2 / self.fps
2024-09-28 18:47:04 +00:00
mel_step_size = 16
i = 0
2024-10-03 17:52:49 +00:00
mel_chunks = []
2024-10-04 06:37:50 +00:00
while i < (len(self.frames) - self.stride_left_size - self.stride_right_size) / 2:
2024-10-03 17:52:49 +00:00
start_idx = int(left + i * mel_idx_multiplier)
# print(start_idx)
2024-09-28 18:47:04 +00:00
if start_idx + mel_step_size > len(mel[0]):
2024-10-03 17:52:49 +00:00
mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:])
else:
mel_chunks.append(mel[:, start_idx: start_idx + mel_step_size])
2024-09-28 18:47:04 +00:00
i += 1
2024-10-04 08:16:36 +00:00
# self.feat_queue.put(mel_chunks)
self._human.push_mel_chunks(mel_chunks)
2024-09-28 18:47:04 +00:00
2024-10-03 17:52:49 +00:00
# discard the old part to save memory
2024-10-04 06:37:50 +00:00
self.frames = self.frames[-(self.stride_left_size + self.stride_right_size):]
2024-09-04 16:51:14 +00:00
def stop(self):
if self._exit_event is None:
return
2024-09-21 12:58:26 +00:00
self._exit_event.clear()
2024-09-09 00:23:04 +00:00
if self._thread.is_alive():
self._thread.join()
2024-09-04 16:51:14 +00:00
logging.info('chunk2mal stop')
2024-10-03 17:52:49 +00:00
def pause_talk(self):
self.queue.queue.clear()
2024-09-04 16:51:14 +00:00
2024-10-03 17:52:49 +00:00
def put_audio_frame(self, audio_chunk): #16khz 20ms pcm
self.queue.put(audio_chunk)
def get_audio_frame(self):
2024-09-04 16:51:14 +00:00
try:
2024-10-03 17:52:49 +00:00
frame = self.queue.get(block=True, timeout=0.01)
2024-10-04 06:37:50 +00:00
type_ = 0
2024-10-03 17:52:49 +00:00
# print(f'[INFO] get frame {frame.shape}')
except queue.Empty:
frame = np.zeros(self.chunk, dtype=np.float32)
2024-10-04 06:37:50 +00:00
type_ = 1
2024-10-03 17:52:49 +00:00
2024-10-04 06:37:50 +00:00
return frame, type_
2024-10-03 17:52:49 +00:00
def warm_up(self):
2024-10-04 06:37:50 +00:00
for _ in range(self.stride_left_size + self.stride_right_size):
audio_frame, type_ = self.get_audio_frame()
2024-10-03 17:52:49 +00:00
self.frames.append(audio_frame)
2024-10-04 08:16:36 +00:00
# self.output_queue.put((audio_frame, type_))
self._human.push_out_put(audio_frame, type_)
2024-10-04 06:37:50 +00:00
for _ in range(self.stride_left_size):
2024-10-04 08:16:36 +00:00
# self.output_queue.get()
self._human.get_out_put()
2024-10-03 17:52:49 +00:00
2024-10-04 06:37:50 +00:00
#
# def get_next_feat(self, block, timeout):
# return self.feat_queue.get(block, timeout)