human/tts/tts_edge_http.py

#encoding = utf8
import logging
from io import BytesIO

import aiohttp
import numpy as np
import soundfile as sf
import edge_tts
import resampy

from .tts_base import TTSBase

logger = logging.getLogger(__name__)


class TTSEdgeHttp(TTSBase):
    def __init__(self, handle, voice='zh-CN-XiaoyiNeural'):
        super().__init__(handle)
        self._voice = voice
        # self._url = 'http://localhost:8082/v1/audio/speech'
        self._url = 'https://tts.mzzsfy.eu.org/v1/audio/speech'
        logger.info(f"TTSEdge init, {voice}")

    async def _on_request(self, txt: str):
        logger.info(f'TTSEdgeHttp, _on_request, txt:{txt}')
        data = {
            "model": "tts-1",
            "input": txt,
            "voice": "alloy",
            "speed": 1.0,
            "thread": 10
        }
        async with aiohttp.ClientSession() as session:
            async with session.post(self._url, json=data) as response:
                print('TTSEdgeHttp, _on_request, response:', response)
                if response.status == 200:
                    stream = BytesIO(await response.read())
                    return stream
                else:
                    byte_stream = None
                    return byte_stream, None

    async def _on_handle(self, stream, index):
        print('-------tts _on_handle')
        try:
            stream.seek(0)
            byte_stream = self.__create_bytes_stream(stream)
            print('-------tts start push chunk', index)
            self._handle.on_handle(byte_stream, index)
            stream.seek(0)
            stream.truncate()
            print('-------tts finish push chunk')

        except Exception as e:
            self._handle.on_handle(None, index)
            stream.seek(0)
            stream.truncate()
            print('-------tts finish error:', e)
        stream.close()

    def __create_bytes_stream(self, byte_stream):
        stream, sample_rate = sf.read(byte_stream)  # [T*sample_rate,] float64
        print(f'[INFO]tts audio stream {sample_rate}: {stream.shape}')
        stream = stream.astype(np.float32)

        if stream.ndim > 1:
            print(f'[WARN] audio has {stream.shape[1]} channels, only use the first.')
            stream = stream[:, 0]

        if sample_rate != self._handle.sample_rate and stream.shape[0] > 0:
            print(f'[WARN] audio sample rate is {sample_rate}, resampling into {self._handle.sample_rate}.')
            stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self._handle.sample_rate)

        return stream

    async def _on_close(self):
        print('TTSEdge close')
        # if self._byte_stream is not None and not self._byte_stream.closed:
        #     self._byte_stream.close()
modfiy tts source 2024-10-19 10:47:34 +00:00			`#encoding = utf8`
			`import logging`
			`from io import BytesIO`

			`import aiohttp`
			`import numpy as np`
			`import soundfile as sf`
			`import edge_tts`
			`import resampy`

			`from .tts_base import TTSBase`

			`logger = logging.getLogger(__name__)`


			`class TTSEdgeHttp(TTSBase):`
			`def __init__(self, handle, voice='zh-CN-XiaoyiNeural'):`
			`super().__init__(handle)`
			`self._voice = voice`
modify render delay 2024-10-31 13:38:35 +00:00			`# self._url = 'http://localhost:8082/v1/audio/speech'`
			`self._url = 'https://tts.mzzsfy.eu.org/v1/audio/speech'`
modfiy tts source 2024-10-19 10:47:34 +00:00			`logger.info(f"TTSEdge init, {voice}")`

			`async def _on_request(self, txt: str):`
modify buffer size 2024-11-07 23:27:00 +00:00			`logger.info(f'TTSEdgeHttp, _on_request, txt:{txt}')`
modfiy tts source 2024-10-19 10:47:34 +00:00			`data = {`
			`"model": "tts-1",`
			`"input": txt,`
			`"voice": "alloy",`
			`"speed": 1.0,`
			`"thread": 10`
			`}`
			`async with aiohttp.ClientSession() as session:`
			`async with session.post(self._url, json=data) as response:`
modiy crash 2024-10-30 08:34:12 +00:00			`print('TTSEdgeHttp, _on_request, response:', response)`
modfiy tts source 2024-10-19 10:47:34 +00:00			`if response.status == 200:`
			`stream = BytesIO(await response.read())`
			`return stream`
			`else:`
			`byte_stream = None`
modify buffer size 2024-11-07 23:27:00 +00:00			`return byte_stream, None`
modfiy tts source 2024-10-19 10:47:34 +00:00
			`async def _on_handle(self, stream, index):`
			`print('-------tts _on_handle')`
			`try:`
			`stream.seek(0)`
			`byte_stream = self.__create_bytes_stream(stream)`
modiy crash 2024-10-30 08:34:12 +00:00			`print('-------tts start push chunk', index)`
modfiy tts source 2024-10-19 10:47:34 +00:00			`self._handle.on_handle(byte_stream, index)`
			`stream.seek(0)`
			`stream.truncate()`
			`print('-------tts finish push chunk')`

			`except Exception as e:`
			`self._handle.on_handle(None, index)`
			`stream.seek(0)`
			`stream.truncate()`
			`print('-------tts finish error:', e)`
			`stream.close()`

			`def __create_bytes_stream(self, byte_stream):`
			`stream, sample_rate = sf.read(byte_stream) # [T*sample_rate,] float64`
			`print(f'[INFO]tts audio stream {sample_rate}: {stream.shape}')`
			`stream = stream.astype(np.float32)`

			`if stream.ndim > 1:`
			`print(f'[WARN] audio has {stream.shape[1]} channels, only use the first.')`
			`stream = stream[:, 0]`

			`if sample_rate != self._handle.sample_rate and stream.shape[0] > 0:`
			`print(f'[WARN] audio sample rate is {sample_rate}, resampling into {self._handle.sample_rate}.')`
			`stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self._handle.sample_rate)`

			`return stream`

			`async def _on_close(self):`
			`print('TTSEdge close')`
			`# if self._byte_stream is not None and not self._byte_stream.closed:`
			`# self._byte_stream.close()`