#encoding = utf8 from io import BytesIO import numpy as np import soundfile as sf import edge_tts import resampy from audio import save_chunks, save_wav from .tts_base import TTSBase class TTSEdge(TTSBase): def __init__(self, voice='zh-CN-XiaoyiNeural'): super().__init__() self._voice = voice self._byte_stream = BytesIO() self._count = 1 async def _on_request(self, txt: str): communicate = edge_tts.Communicate(txt, self._voice) first = True async for chunk in communicate.stream(): if first: first = False if chunk["type"] == "audio": # self.push_audio(chunk["data"]) self._byte_stream.write(chunk["data"]) # file.write(chunk["data"]) elif chunk["type"] == "WordBoundary": pass async def _on_handle(self): self._byte_stream.seek(0) try: stream = self.__create_bytes_stream(self._byte_stream) stream_len = stream.shape[0] idx = 0 print('-------tts start push chunk') save_wav(stream, '../temp/audio/' + str(self._count) + '.wav', 16000) self._count = self._count + 1 # chunk = stream[0:] # save_chunks(chunk, 16000, './temp/audio') # while stream_len >= self.chunk: # self._human.put_audio_frame(stream[idx:idx + self.chunk]) # streamlen -= self.chunk # idx += self.chunk # if streamlen>0: #skip last frame(not 20ms) # self.queue.put(stream[idx:]) self._byte_stream.seek(0) self._byte_stream.truncate() print('-------tts finish push chunk') except Exception as e: self._byte_stream.seek(0) self._byte_stream.truncate() print('-------tts finish error:', e) def __create_bytes_stream(self, byte_stream): stream, sample_rate = sf.read(byte_stream) # [T*sample_rate,] float64 print(f'[INFO]tts audio stream {sample_rate}: {stream.shape}') stream = stream.astype(np.float32) if stream.ndim > 1: print(f'[WARN] audio has {stream.shape[1]} channels, only use the first.') stream = stream[:, 0] if sample_rate != self._sample_rate and stream.shape[0] > 0: print(f'[WARN] audio sample rate is {sample_rate}, resampling into {self._sample_rate}.') stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self._sample_rate) return stream