#encoding = utf8 import time import librosa import numpy as np import requests import resampy import soundfile as sf def download_tts(url): file_name = url[3:] print(file_name) download_url = url print('download tts', download_url) resp = requests.get(download_url) with open('./audio/mp3/' + file_name, 'wb') as mp3: mp3.write(resp.content) from pydub import AudioSegment sound = AudioSegment.from_mp3('./audio/mp3/' + file_name) sound.export('./audio/wav/' + file_name + '.wav', format="wav") def __create_bytes_stream(byte_stream): stream, sample_rate = sf.read(byte_stream) # [T*sample_rate,] float64 print(f'[INFO]tts audio stream {sample_rate}: {stream.shape}') stream = stream.astype(np.float32) if stream.ndim > 1: print(f'[WARN] audio has {stream.shape[1]} channels, only use the first.') stream = stream[:, 0] if sample_rate != 16000 and stream.shape[0] > 0: print(f'[WARN] audio sample rate is {sample_rate}, resampling into {16000}.') stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=16000) return stream def main(): import aiohttp import asyncio from io import BytesIO async def fetch_audio(): # url = "http://localhost:8082/v1/audio/speech" url = "https://tts.mzzsfy.eu.org/v1/audio/speech" data = { "model": "tts-1", "input": "写了一个高性能tts(文本转声音)工具,5千字仅需5秒,免费使用", "voice": "alloy", "speed": 1.0 } async with aiohttp.ClientSession() as session: async with session.post(url, json=data) as response: if response.status == 200: audio_data = BytesIO(await response.read()) audio_stream = __create_bytes_stream(audio_data) # 保存为新的音频文件 sf.write("output_audio.wav", audio_stream, 16000) print("Audio data received and saved to output_audio.wav") else: print("Error:", response.status, await response.text()) # Run the async function asyncio.run(fetch_audio()) if __name__ == "__main__": try: t = time.time() main() print(f'-------tts time:{time.time() - t:.4f}s') except KeyboardInterrupt: print("\nCaught Ctrl + C. Exiting") except Exception as e: print(e)