human/human/audio_mal_handler.py

119 lines
3.8 KiB
Python
Raw Normal View History

2024-10-15 13:00:24 +00:00
#encoding = utf8
import logging
import queue
import time
2024-11-01 12:38:57 +00:00
2024-11-06 03:11:53 +00:00
from threading import Thread, Event
from eventbus import EventBus
2024-10-15 13:00:24 +00:00
import numpy as np
2024-10-17 15:26:21 +00:00
from human_handler import AudioHandler
2024-10-29 10:09:26 +00:00
from utils import melspectrogram, SyncQueue
2024-10-15 13:00:24 +00:00
logger = logging.getLogger(__name__)
class AudioMalHandler(AudioHandler):
def __init__(self, context, handler):
super().__init__(context, handler)
2024-11-06 03:11:53 +00:00
EventBus().register('stop', self._on_stop)
2024-11-01 12:38:57 +00:00
self._queue = SyncQueue(context.batch_size, "AudioMalHandler_queue")
2024-10-15 13:00:24 +00:00
self._exit_event = Event()
2024-11-01 12:38:57 +00:00
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
2024-10-15 13:00:24 +00:00
self._exit_event.set()
self._thread.start()
self.frames = []
2024-10-17 00:25:53 +00:00
self.chunk = context.sample_rate // context.fps
2024-11-06 12:31:23 +00:00
self._is_running = True
2024-10-17 15:26:21 +00:00
logger.info("AudioMalHandler init")
2024-10-15 13:00:24 +00:00
2024-11-06 03:11:53 +00:00
def __del__(self):
EventBus().unregister('stop', self._on_stop)
def _on_stop(self, *args, **kwargs):
self.stop()
2024-10-25 00:23:55 +00:00
def on_message(self, message):
2024-10-29 10:09:26 +00:00
super().on_message(message)
2024-10-25 00:23:55 +00:00
2024-10-15 13:00:24 +00:00
def on_handle(self, stream, index):
2024-11-01 12:38:57 +00:00
# print('AudioMalHandler on_handle', index)
2024-10-16 11:04:12 +00:00
self._queue.put(stream)
2024-10-15 13:00:24 +00:00
def _on_run(self):
logging.info('chunk2mal run')
2024-11-06 12:31:23 +00:00
while self._exit_event.is_set() and self._is_running:
2024-10-15 13:00:24 +00:00
self._run_step()
2024-10-25 00:23:55 +00:00
time.sleep(0.02)
2024-10-15 13:00:24 +00:00
logging.info('chunk2mal exit')
def _run_step(self):
2024-10-31 00:15:49 +00:00
count = 0
2024-10-17 00:25:53 +00:00
for _ in range(self._context.batch_size * 2):
2024-10-15 13:00:24 +00:00
frame, _type = self.get_audio_frame()
self.frames.append(frame)
2024-10-16 11:04:12 +00:00
self.on_next_handle((frame, _type), 0)
2024-10-31 00:15:49 +00:00
count = count + 1
2024-11-06 12:31:23 +00:00
if self._is_running is False:
return
2024-10-15 13:00:24 +00:00
# context not enough, do not run network.
2024-10-17 00:25:53 +00:00
if len(self.frames) <= self._context.stride_left_size + self._context.stride_right_size:
2024-10-15 13:00:24 +00:00
return
2024-11-01 12:38:57 +00:00
# print('AudioMalHandler _run_step', count)
2024-10-15 13:00:24 +00:00
inputs = np.concatenate(self.frames) # [N * chunk]
mel = melspectrogram(inputs)
2024-10-15 13:00:24 +00:00
# print(mel.shape[0],mel.shape,len(mel[0]),len(self.frames))
# cut off stride
2024-10-17 00:25:53 +00:00
left = max(0, self._context.stride_left_size * 80 / 50)
2024-10-30 12:07:04 +00:00
right = min(len(mel[0]), len(mel[0]) - self._context.stride_right_size * 80 / 50)
2024-10-17 00:25:53 +00:00
mel_idx_multiplier = 80. * 2 / self._context.fps
2024-10-15 13:00:24 +00:00
mel_step_size = 16
i = 0
mel_chunks = []
2024-11-06 12:31:23 +00:00
while i < (len(self.frames) - self._context.stride_left_size - self._context.stride_right_size) / 2\
and self._is_running:
2024-10-15 13:00:24 +00:00
start_idx = int(left + i * mel_idx_multiplier)
# print(start_idx)
if start_idx + mel_step_size > len(mel[0]):
mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:])
else:
mel_chunks.append(mel[:, start_idx: start_idx + mel_step_size])
i += 1
2024-10-16 11:04:12 +00:00
self.on_next_handle(mel_chunks, 1)
2024-10-15 13:00:24 +00:00
# discard the old part to save memory
2024-10-17 00:25:53 +00:00
self.frames = self.frames[-(self._context.stride_left_size + self._context.stride_right_size):]
2024-10-15 13:00:24 +00:00
def get_audio_frame(self):
try:
2024-11-01 12:38:57 +00:00
# print('AudioMalHandler get_audio_frame')
2024-11-04 13:44:51 +00:00
frame = self._queue.get()
2024-10-15 13:00:24 +00:00
type_ = 0
except queue.Empty:
frame = np.zeros(self.chunk, dtype=np.float32)
type_ = 1
2024-11-01 12:38:57 +00:00
# print('AudioMalHandler get_audio_frame type:', type_)
2024-10-15 13:00:24 +00:00
return frame, type_
def stop(self):
logging.info('stop')
2024-11-06 12:31:23 +00:00
self._is_running = False
2024-10-15 13:00:24 +00:00
if self._exit_event is None:
return
self._exit_event.clear()
if self._thread.is_alive():
self._thread.join()
logging.info('chunk2mal stop')
2024-10-19 19:28:49 +00:00
def pause_talk(self):
2024-10-31 13:38:35 +00:00
print('AudioMalHandler pause_talk', self._queue.size())
2024-10-29 10:09:26 +00:00
self._queue.clear()