2024-10-15 13:00:24 +00:00
|
|
|
#encoding = utf8
|
|
|
|
import logging
|
|
|
|
import queue
|
|
|
|
import time
|
|
|
|
from queue import Queue
|
2024-10-25 00:23:55 +00:00
|
|
|
from threading import Thread, Event, Condition
|
2024-10-15 13:00:24 +00:00
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
2024-10-25 00:23:55 +00:00
|
|
|
from human.message_type import MessageType
|
2024-10-17 15:26:21 +00:00
|
|
|
from human_handler import AudioHandler
|
2024-10-16 00:01:11 +00:00
|
|
|
from utils import melspectrogram
|
2024-10-15 13:00:24 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
class AudioMalHandler(AudioHandler):
|
|
|
|
def __init__(self, context, handler):
|
|
|
|
super().__init__(context, handler)
|
|
|
|
|
|
|
|
self._queue = Queue()
|
2024-10-25 00:23:55 +00:00
|
|
|
self._wait = False
|
|
|
|
self._condition = Condition()
|
2024-10-15 13:00:24 +00:00
|
|
|
self._exit_event = Event()
|
|
|
|
self._thread = Thread(target=self._on_run)
|
|
|
|
self._exit_event.set()
|
|
|
|
self._thread.start()
|
|
|
|
|
|
|
|
self.frames = []
|
2024-10-17 00:25:53 +00:00
|
|
|
self.chunk = context.sample_rate // context.fps
|
2024-10-17 15:26:21 +00:00
|
|
|
logger.info("AudioMalHandler init")
|
2024-10-15 13:00:24 +00:00
|
|
|
|
2024-10-25 00:23:55 +00:00
|
|
|
def on_message(self, message):
|
|
|
|
if message['msg_id'] == MessageType.Video_Render_Queue_Empty:
|
|
|
|
with self._condition:
|
|
|
|
if self._wait:
|
|
|
|
self._wait = False
|
|
|
|
self._condition.notify()
|
2024-10-27 05:51:22 +00:00
|
|
|
print('AudioMalHandler notify')
|
2024-10-25 00:23:55 +00:00
|
|
|
elif message['msg_id'] == MessageType.Video_Render_Queue_Full:
|
|
|
|
if not self._wait:
|
|
|
|
self._wait = True
|
|
|
|
print('AudioMalHandler wait')
|
|
|
|
else:
|
|
|
|
super().on_message(message)
|
|
|
|
|
2024-10-15 13:00:24 +00:00
|
|
|
def on_handle(self, stream, index):
|
2024-10-16 11:04:12 +00:00
|
|
|
self._queue.put(stream)
|
2024-10-15 13:00:24 +00:00
|
|
|
|
|
|
|
def _on_run(self):
|
|
|
|
logging.info('chunk2mal run')
|
|
|
|
while self._exit_event.is_set():
|
2024-10-25 00:23:55 +00:00
|
|
|
with self._condition:
|
|
|
|
self._condition.wait_for(lambda: not self._wait)
|
|
|
|
print('AudioMalHandler run')
|
2024-10-15 13:00:24 +00:00
|
|
|
self._run_step()
|
2024-10-25 00:23:55 +00:00
|
|
|
time.sleep(0.02)
|
2024-10-15 13:00:24 +00:00
|
|
|
|
|
|
|
logging.info('chunk2mal exit')
|
|
|
|
|
|
|
|
def _run_step(self):
|
2024-10-17 00:25:53 +00:00
|
|
|
for _ in range(self._context.batch_size * 2):
|
2024-10-15 13:00:24 +00:00
|
|
|
frame, _type = self.get_audio_frame()
|
|
|
|
self.frames.append(frame)
|
2024-10-16 11:04:12 +00:00
|
|
|
self.on_next_handle((frame, _type), 0)
|
2024-10-15 13:00:24 +00:00
|
|
|
# context not enough, do not run network.
|
2024-10-17 00:25:53 +00:00
|
|
|
if len(self.frames) <= self._context.stride_left_size + self._context.stride_right_size:
|
2024-10-15 13:00:24 +00:00
|
|
|
return
|
|
|
|
|
|
|
|
inputs = np.concatenate(self.frames) # [N * chunk]
|
2024-10-16 00:01:11 +00:00
|
|
|
mel = melspectrogram(inputs)
|
2024-10-15 13:00:24 +00:00
|
|
|
# print(mel.shape[0],mel.shape,len(mel[0]),len(self.frames))
|
|
|
|
# cut off stride
|
2024-10-17 00:25:53 +00:00
|
|
|
left = max(0, self._context.stride_left_size * 80 / 50)
|
|
|
|
right = min(len(mel[0]), len(mel[0]) - self._context.stride_right_size * 80 / 50)
|
|
|
|
mel_idx_multiplier = 80. * 2 / self._context.fps
|
2024-10-15 13:00:24 +00:00
|
|
|
mel_step_size = 16
|
|
|
|
i = 0
|
|
|
|
mel_chunks = []
|
2024-10-17 00:25:53 +00:00
|
|
|
while i < (len(self.frames) - self._context.stride_left_size - self._context.stride_right_size) / 2:
|
2024-10-15 13:00:24 +00:00
|
|
|
start_idx = int(left + i * mel_idx_multiplier)
|
|
|
|
# print(start_idx)
|
|
|
|
if start_idx + mel_step_size > len(mel[0]):
|
|
|
|
mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:])
|
|
|
|
else:
|
|
|
|
mel_chunks.append(mel[:, start_idx: start_idx + mel_step_size])
|
|
|
|
i += 1
|
2024-10-16 11:04:12 +00:00
|
|
|
self.on_next_handle(mel_chunks, 1)
|
2024-10-15 13:00:24 +00:00
|
|
|
|
|
|
|
# discard the old part to save memory
|
2024-10-17 00:25:53 +00:00
|
|
|
self.frames = self.frames[-(self._context.stride_left_size + self._context.stride_right_size):]
|
2024-10-15 13:00:24 +00:00
|
|
|
|
|
|
|
def get_audio_frame(self):
|
|
|
|
try:
|
|
|
|
frame = self._queue.get(block=True, timeout=0.01)
|
|
|
|
type_ = 0
|
|
|
|
except queue.Empty:
|
|
|
|
frame = np.zeros(self.chunk, dtype=np.float32)
|
|
|
|
type_ = 1
|
|
|
|
|
|
|
|
return frame, type_
|
|
|
|
|
|
|
|
def stop(self):
|
|
|
|
logging.info('stop')
|
|
|
|
if self._exit_event is None:
|
|
|
|
return
|
|
|
|
|
|
|
|
self._exit_event.clear()
|
|
|
|
if self._thread.is_alive():
|
|
|
|
self._thread.join()
|
|
|
|
logging.info('chunk2mal stop')
|
2024-10-19 19:28:49 +00:00
|
|
|
|
|
|
|
def pause_talk(self):
|
|
|
|
self._queue.queue.clear()
|