human/tts/Chunk2Mal.py

#encoding = utf8
import ctypes
import logging
import queue
import time
from queue import Queue
from threading import Thread, Event

import numpy as np
import audio
from audio_render import AudioRender


class Chunk2Mal:
    def __init__(self, human):
        self._audio_chunk_queue = Queue()
        self._human = human
        self._thread = None

        self._chunks = []
        self._audio_chunks = []
        # 320 samples per chunk (20ms * 16000 / 1000)audio_chunk
        self._chunk_len = self._human.get_audio_sample_rate() // self._human.get_fps()

        self._exit_event = Event()
        self._thread = Thread(target=self._on_run)
        self._exit_event.set()
        self._thread.start()
        self._audio_render = AudioRender()
        self._stream_len = 0
        logging.info('chunk2mal start')

    def _concatenate(self):
        logging.info('np.concatenate')
        if len(self._chunks) < 3:
            logging.info(f'np.concatenate: {len(self._chunks)}')
            return
        inputs = np.concatenate(self._chunks)  # [5 * chunk]
        self._chunks = []
        mel = audio.melspectrogram(inputs)
        if np.isnan(mel.reshape(-1)).sum() > 0:
            raise ValueError(
                'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again')

        mel_step_size = 16
        # print('fps:', self._human.get_fps())
        mel_idx_multiplier = 80. / self._human.get_fps()
        # print('mel_idx_multiplier:', mel_idx_multiplier)
        count = 0
        i = 0
        while 1:
            count = count + 1
            start_idx = int(i * mel_idx_multiplier)
            print('i', i, 'start_idx', start_idx, 'mel len:', len(mel[0]))
            if start_idx + mel_step_size > len(mel[0]):
                self._human.push_mel_chunks_queue(mel[:, len(mel[0]) - mel_step_size:])
                break
            self._human.push_mel_chunks_queue(mel[:, start_idx: start_idx + mel_step_size])
            i += 1

        # wav = np.concatenate(self._audio_chunks)  # [5 * chunk]self._audio_chunks
        # print('_concatenate', len(wav))
        # audio.save_chunks([wav], 16000, "./temp/audio/")
        # wav *= 32767 / max(0.01, np.max(np.abs(wav)))
        # wav = wav.astype(np.int16)
        # self._audio_render.write(wav, len(wav))
        self._audio_chunks = []

        print('mel_chunks count:', count)

    def _on_run(self):
        logging.info('chunk2mal run')
        while self._exit_event.is_set():
            if self._audio_chunk_queue.empty():
                if len(self._chunks) > 0:
                    self._concatenate()
                else:
                    time.sleep(0.5)
                continue
            try:
                chunk = self._audio_chunk_queue.get(block=True, timeout=1)
                self._chunks.append(chunk)
                self._stream_len = self._stream_len + len(chunk)
                print('Chunk2Mal _stream_len:', self._stream_len)
                self._audio_chunks.append(chunk.copy())

                # self._human.push_audio_frames(chunk, 0)
                if len(self._chunks) < 10: # 200ms
                    continue
            except queue.Empty:
                # print('Chunk2Mal queue.Empty')
                continue

            print('len(self._chunks):', len(self._chunks))
            self._concatenate()


        logging.info('chunk2mal exit')

    def stop(self):
        if self._exit_event is None:
            return

        self._exit_event.clear()
        if self._thread.is_alive():
            self._thread.join()
        logging.info('chunk2mal stop')

    def push_chunk(self, chunk):
        self._audio_chunk_queue.put(chunk)

    def pull_chunk(self):
        try:
            chunk = self._audio_chunk_queue.get(block=True, timeout=1)
            type = 1
        except queue.Empty:
            chunk = np.zeros(self._chunk_len, dtype=np.float32)
            type = 0
        return chunk, type
添加chunk处理 2024-09-04 16:51:14 +00:00			`#encoding = utf8`
add audio render 2024-09-28 18:47:04 +00:00			`import ctypes`
添加chunk处理 2024-09-04 16:51:14 +00:00			`import logging`
			`import queue`
render image to ui 2024-09-26 17:34:52 +00:00			`import time`
添加chunk处理 2024-09-04 16:51:14 +00:00			`from queue import Queue`
			`from threading import Thread, Event`

			`import numpy as np`
			`import audio`
add audio render 2024-09-28 18:47:04 +00:00			`from audio_render import AudioRender`
添加chunk处理 2024-09-04 16:51:14 +00:00

			`class Chunk2Mal:`
			`def __init__(self, human):`
			`self._audio_chunk_queue = Queue()`
			`self._human = human`
			`self._thread = None`
modify human 2024-09-21 12:58:26 +00:00
添加chunk处理 2024-09-04 16:51:14 +00:00			`self._chunks = []`
add audio render 2024-09-28 18:47:04 +00:00			`self._audio_chunks = []`
modify human 2024-09-21 12:58:26 +00:00			`# 320 samples per chunk (20ms * 16000 / 1000)audio_chunk`
add test code 2024-09-22 08:41:19 +00:00			`self._chunk_len = self._human.get_audio_sample_rate() // self._human.get_fps()`
modify human 2024-09-21 12:58:26 +00:00
			`self._exit_event = Event()`
			`self._thread = Thread(target=self._on_run)`
			`self._exit_event.set()`
			`self._thread.start()`
add audio render 2024-09-28 18:47:04 +00:00			`self._audio_render = AudioRender()`
modify audio 2024-09-29 07:12:49 +00:00			`self._stream_len = 0`
modify human 2024-09-21 12:58:26 +00:00			`logging.info('chunk2mal start')`
添加chunk处理 2024-09-04 16:51:14 +00:00
add audio render 2024-09-28 18:47:04 +00:00			`def _concatenate(self):`
			`logging.info('np.concatenate')`
modify audio 2024-09-29 07:12:49 +00:00			`if len(self._chunks) < 3:`
			`logging.info(f'np.concatenate: {len(self._chunks)}')`
			`return`
add audio render 2024-09-28 18:47:04 +00:00			`inputs = np.concatenate(self._chunks) # [5 * chunk]`
			`self._chunks = []`
			`mel = audio.melspectrogram(inputs)`
			`if np.isnan(mel.reshape(-1)).sum() > 0:`
			`raise ValueError(`
			`'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again')`

			`mel_step_size = 16`
			`# print('fps:', self._human.get_fps())`
			`mel_idx_multiplier = 80. / self._human.get_fps()`
			`# print('mel_idx_multiplier:', mel_idx_multiplier)`
			`count = 0`
			`i = 0`
			`while 1:`
			`count = count + 1`
			`start_idx = int(i * mel_idx_multiplier)`
			`print('i', i, 'start_idx', start_idx, 'mel len:', len(mel[0]))`
			`if start_idx + mel_step_size > len(mel[0]):`
			`self._human.push_mel_chunks_queue(mel[:, len(mel[0]) - mel_step_size:])`
			`break`
			`self._human.push_mel_chunks_queue(mel[:, start_idx: start_idx + mel_step_size])`
			`i += 1`

modify audio 2024-09-29 07:12:49 +00:00			`# wav = np.concatenate(self._audio_chunks) # [5 * chunk]self._audio_chunks`
			`# print('_concatenate', len(wav))`
			`# audio.save_chunks([wav], 16000, "./temp/audio/")`
			`# wav *= 32767 / max(0.01, np.max(np.abs(wav)))`
			`# wav = wav.astype(np.int16)`
			`# self._audio_render.write(wav, len(wav))`
add audio render 2024-09-28 18:47:04 +00:00			`self._audio_chunks = []`

			`print('mel_chunks count:', count)`

添加chunk处理 2024-09-04 16:51:14 +00:00			`def _on_run(self):`
			`logging.info('chunk2mal run')`
modify human 2024-09-21 12:58:26 +00:00			`while self._exit_event.is_set():`
render image to ui 2024-09-26 17:34:52 +00:00			`if self._audio_chunk_queue.empty():`
add audio render 2024-09-28 18:47:04 +00:00			`if len(self._chunks) > 0:`
			`self._concatenate()`
			`else:`
			`time.sleep(0.5)`
render image to ui 2024-09-26 17:34:52 +00:00			`continue`
添加chunk处理 2024-09-04 16:51:14 +00:00			`try:`
render image to ui 2024-09-26 17:34:52 +00:00			`chunk = self._audio_chunk_queue.get(block=True, timeout=1)`
添加chunk处理 2024-09-04 16:51:14 +00:00			`self._chunks.append(chunk)`
modify audio 2024-09-29 07:12:49 +00:00			`self._stream_len = self._stream_len + len(chunk)`
			`print('Chunk2Mal _stream_len:', self._stream_len)`
add audio render 2024-09-28 18:47:04 +00:00			`self._audio_chunks.append(chunk.copy())`

			`# self._human.push_audio_frames(chunk, 0)`
modify audio 2024-09-29 07:12:49 +00:00			`if len(self._chunks) < 10: # 200ms`
render image to ui 2024-09-26 17:34:52 +00:00			`continue`
添加chunk处理 2024-09-04 16:51:14 +00:00			`except queue.Empty:`
modify human 2024-09-18 15:48:18 +00:00			`# print('Chunk2Mal queue.Empty')`
添加chunk处理 2024-09-04 16:51:14 +00:00			`continue`

add audio render 2024-09-28 18:47:04 +00:00			`print('len(self._chunks):', len(self._chunks))`
			`self._concatenate()`
添加chunk处理 2024-09-04 16:51:14 +00:00

			`logging.info('chunk2mal exit')`

			`def stop(self):`
			`if self._exit_event is None:`
			`return`

modify human 2024-09-21 12:58:26 +00:00			`self._exit_event.clear()`
添加chunk处理 2024-09-09 00:23:04 +00:00			`if self._thread.is_alive():`
			`self._thread.join()`
添加chunk处理 2024-09-04 16:51:14 +00:00			`logging.info('chunk2mal stop')`

			`def push_chunk(self, chunk):`
			`self._audio_chunk_queue.put(chunk)`

			`def pull_chunk(self):`
			`try:`
添加chunk处理 2024-09-09 00:23:04 +00:00			`chunk = self._audio_chunk_queue.get(block=True, timeout=1)`
添加chunk处理 2024-09-04 16:51:14 +00:00			`type = 1`
			`except queue.Empty:`
add test code 2024-09-22 08:41:19 +00:00			`chunk = np.zeros(self._chunk_len, dtype=np.float32)`
添加chunk处理 2024-09-04 16:51:14 +00:00			`type = 0`
			`return chunk, type`