human/tts/TTSBase.py
2024-09-21 20:58:26 +08:00

102 lines
2.8 KiB
Python

#encoding = utf8
import logging
import asyncio
import time
import edge_tts
import numpy as np
import soundfile
import resampy
import queue
from io import BytesIO
from queue import Queue
from threading import Thread, Event
logger = logging.getLogger(__name__)
class TTSBase:
def __init__(self, human):
self._human = human
self._thread = None
self._queue = Queue()
self._exit_event = None
self._io_stream = BytesIO()
self._sample_rate = 16000
self._chunk = self._sample_rate // self._human.get_fps()
self._exit_event = Event()
self._thread = Thread(target=self._on_run)
self._exit_event.set()
self._thread.start()
logging.info('tts start')
def _on_run(self):
logging.info('tts run')
while self._exit_event.is_set():
try:
txt = self._queue.get(block=True, timeout=1)
except queue.Empty:
continue
self._request(txt)
logging.info('tts exit')
def _request(self, txt):
voice = 'zh-CN-XiaoyiNeural'
t = time.time()
asyncio.new_event_loop().run_until_complete(self.__on_request(voice, txt))
logger.info(f'edge tts time:{time.time() - t : 0.4f}s')
self._io_stream.seek(0)
stream = self.__create_bytes_stream(self._io_stream)
stream_len = stream.shape[0]
index = 0
while stream_len >= self._chunk:
self._human.push_audio_chunk(stream[index:index + self._chunk])
stream_len -= self._chunk
index += self._chunk
def __create_bytes_stream(self, io_stream):
stream, sample_rate = soundfile.read(io_stream)
logger.info(f'tts audio stream {sample_rate} : {stream.shape}')
stream = stream.astype(np.float32)
if stream.ndim > 1:
logger.warning(f'tts audio has {stream.shape[1]} channels, only use the first')
stream = stream[:, 1]
if sample_rate != self._sample_rate and stream.shape[0] > 0:
logger.warning(f'tts audio sample rate is {sample_rate}, resample to {self._sample_rate}')
stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self._sample_rate)
return stream
async def __on_request(self, voice, txt):
communicate = edge_tts.Communicate(txt, voice)
first = True
async for chuck in communicate.stream():
if first:
first = False
if chuck['type'] == 'audio':
self._io_stream.write(chuck['data'])
def stop(self):
if self._exit_event is None:
return
self._exit_event.clear()
self._thread.join()
logging.info('tts stop')
def clear(self):
self._queue.queue.clear()
def push_txt(self, txt):
self._queue.put(txt)