add audio render

This commit is contained in:
jiegeaiai 2024-09-29 02:47:04 +08:00
parent f848529859
commit 472a17f896
11 changed files with 213 additions and 173 deletions

View File

@ -246,12 +246,12 @@ def datagen(frames, mels):
def datagen_signal(frame, mel, face_det_results): def datagen_signal(frame, mel, face_det_results):
img_batch, mel_batch, frame_batch, coords_batch = [], [], [], [] img_batch, mel_batch, frame_batch, coord_batch = [], [], [], []
# for i, m in enumerate(mels): # for i, m in enumerate(mels):
idx = 0 idx = 0
frame_to_save = frame.copy() frame_to_save = frame.copy()
face, coords = face_det_results[idx].copy() face, coord = face_det_results[idx].copy()
face = cv2.resize(face, (img_size, img_size)) face = cv2.resize(face, (img_size, img_size))
m = mel m = mel
@ -259,7 +259,7 @@ def datagen_signal(frame, mel, face_det_results):
img_batch.append(face) img_batch.append(face)
mel_batch.append(m) mel_batch.append(m)
frame_batch.append(frame_to_save) frame_batch.append(frame_to_save)
coords_batch.append(coords) coord_batch.append(coord)
if len(img_batch) >= wav2lip_batch_size: if len(img_batch) >= wav2lip_batch_size:
img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch) img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
@ -269,7 +269,7 @@ def datagen_signal(frame, mel, face_det_results):
img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255. img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1]) mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
return img_batch, mel_batch, frame_batch, coords_batch return img_batch, mel_batch, frame_batch, coord_batch
if len(img_batch) > 0: if len(img_batch) > 0:
img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch) img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
@ -279,7 +279,7 @@ def datagen_signal(frame, mel, face_det_results):
img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255. img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1]) mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
return img_batch, mel_batch, frame_batch, coords_batch return img_batch, mel_batch, frame_batch, coord_batch
# 从字节流加载音频数据 # 从字节流加载音频数据
@ -294,7 +294,7 @@ def load_audio_from_bytes(byte_data):
class Human: class Human:
def __init__(self): def __init__(self):
self._fps = 25 # 20 ms per frame self._fps = 25 # 40 ms per frame
self._batch_size = 16 self._batch_size = 16
self._sample_rate = 16000 self._sample_rate = 16000
self._stride_left_size = 10 self._stride_left_size = 10
@ -340,13 +340,14 @@ class Human:
# p.terminate() # p.terminate()
def test(self): def test(self):
wav = audio.load_wav(r'./audio/test.wav', 16000) wav = audio.load_wav(r'./audio/audio.wav', 16000)
# with open(r'./audio/test.wav', 'rb') as f: # with open(r'./audio/test.wav', 'rb') as f:
# byte_data = f.read() # byte_data = f.read()
# #
# byte_data = byte_data[16:] # byte_data = byte_data[16:]
# inputs = np.concatenate(byte_data) # [N * chunk] # inputs = np.concatenate(byte_data) # [N * chunk]
# wav = load_audio_from_bytes(inputs) # wav = load_audio_from_bytes(inputs)
print('wav length:', len(wav))
mel = audio.melspectrogram(wav) mel = audio.melspectrogram(wav)
if np.isnan(mel.reshape(-1)).sum() > 0: if np.isnan(mel.reshape(-1)).sum() > 0:
raise ValueError( raise ValueError(
@ -405,9 +406,9 @@ class Human:
p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1)) p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
f[y1:y2, x1:x2] = p f[y1:y2, x1:x2] = p
# name = "%04d" % j name = "%04d" % j
# cv2.imwrite(f'temp/images/{j}.jpg', p) cv2.imwrite(f'temp/images/{j}.jpg', p)
# j = j + 1 j = j + 1
p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
self._test_image_queue.put(p) self._test_image_queue.put(p)
# out.write(f) # out.write(f)
@ -460,7 +461,6 @@ class Human:
self._feat_queue.put(mel_chunks) self._feat_queue.put(mel_chunks)
def push_audio_frames(self, chunk, type_): def push_audio_frames(self, chunk, type_):
print("push_audio_frames")
self._output_queue.put((chunk, type_)) self._output_queue.put((chunk, type_))
def push_render_image(self, image): def push_render_image(self, image):

Binary file not shown.

Binary file not shown.

Binary file not shown.

3
audio_render/__init__.py Normal file
View File

@ -0,0 +1,3 @@
#encoding = utf8
from .audio_render import AudioRender

View File

@ -0,0 +1,35 @@
#encoding = utf8
from ctypes import *
import os
current = os.path.dirname(__file__)
dynamic_path = os.path.join(current, 'AudioRender.dll')
def audio_render_log_callback(level, log, size):
print(f'level={level}, log={log}, len={size}')
class AudioRender:
def __init__(self):
self.__audio_render_obj = WinDLL(dynamic_path)
print(self.__audio_render_obj)
if self.__audio_render_obj is not None:
CALLBACK_TYPE = CFUNCTYPE(None, c_int, c_ubyte, c_uint)
c_callback = CALLBACK_TYPE(audio_render_log_callback)
self.__init = self.__audio_render_obj.Initialize(c_callback)
print('AudioRender init', self.__init)
def __del__(self):
print('AudioRender __del__')
if self.__audio_render_obj is None:
return
if self.__init:
self.__audio_render_obj.Uninitialize()
def write(self, data, size):
if not self.__init:
return False
self.__audio_render_obj.argtypes = (POINTER(c_ubyte), c_uint)
return self.__audio_render_obj.Write(data.ctypes.data_as(POINTER(c_ubyte)), size)

View File

@ -1,102 +1,105 @@
#encoding = utf8 #encoding = utf8
import edge_tts
import asyncio
import pyaudio
from pydub import AudioSegment
from io import BytesIO
# 如果在 Jupyter Notebook 中使用,解除事件循环限制
try:
import nest_asyncio
nest_asyncio.apply()
except ImportError:
pass
def play_audio(data: bytes, stream: pyaudio.Stream) -> None:
stream.write(AudioSegment.from_mp3(BytesIO(data)).raw_data)
CHUNK_SIZE = 20 * 1024
async def play_tts(text, voice):
communicate = edge_tts.Communicate(text, voice)
# 设置 PyAudio
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, output=True)
# async for chunk in communicate.stream(): # 使用 stream 方法
# if chunk['type'] == 'audio': # 确保 chunk 是字节流
# stream.write(chunk['data'])
total_data = b''
for chunk in communicate.stream_sync():
if chunk["type"] == "audio" and chunk["data"]:
total_data += chunk["data"]
if len(total_data) >= CHUNK_SIZE:
# print(f"Time elapsed: {time.time() - start_time:.2f} seconds") # Print time
stream.write(AudioSegment.from_mp3(BytesIO(total_data[:CHUNK_SIZE])).raw_data)
# play_audio(total_data[:CHUNK_SIZE], stream) # Play first CHUNK_SIZE bytes
total_data = total_data[CHUNK_SIZE:] # Remove played data
# play_audio(total_data, stream)
# 停止和关闭音频流
stream.stop_stream()
stream.close()
audio.terminate()
async def save_to_file(text, voice, filename):
communicate = edge_tts.Communicate(text, voice)
with open(filename, "wb") as f:
async for chunk in communicate.stream():
if chunk['type'] == 'audio':
f.write(chunk['data'])
if __name__ == "__main__":
text = "Hello, this is a test of the Edge TTS service."
voice = "en-US-JessaNeural"
# 使用 asyncio.run() 运行异步函数
asyncio.run(play_tts(text, voice))
# asyncio.run(save_to_file(text, voice, "output.wav"))
# #
# import edge_tts # import edge_tts
# import asyncio
# import pyaudio # import pyaudio
# from io import BytesIO
# from pydub import AudioSegment # from pydub import AudioSegment
# import time # from io import BytesIO
# #
# TEXT = 'Hello World! How are you guys doing? I hope great, cause I am having fun and honestly it has been a blast' # # 如果在 Jupyter Notebook 中使用,解除事件循环限制
# VOICE = "en-US-AndrewMultilingualNeural" # try:
# CHUNK_SIZE = 20 * 1024 # Assuming around 1024 bytes per chunk (adjust based on format) # import nest_asyncio
# # nest_asyncio.apply()
# def main() -> None: # except ImportError:
# start_time = time.time() # pass
# communicator = edge_tts.Communicate(TEXT, VOICE)
#
# pyaudio_instance = pyaudio.PyAudio()
# audio_stream = pyaudio_instance.open(format=pyaudio.paInt16, channels=1, rate=16000, output=True)
#
# total_data = b'' # Store audio data instead of chunks
#
# for chunk in communicator.stream_sync():
# if chunk["type"] == "audio" and chunk["data"]:
# total_data += chunk["data"]
# if len(total_data) >= CHUNK_SIZE:
# print(f"Time elapsed: {time.time() - start_time:.2f} seconds") # Print time
# play_audio(total_data[:CHUNK_SIZE], audio_stream) # Play first CHUNK_SIZE bytes
# total_data = total_data[CHUNK_SIZE:] # Remove played data
#
# # Play remaining audio
# play_audio(total_data, audio_stream)
#
# audio_stream.stop_stream()
# audio_stream.close()
# pyaudio_instance.terminate()
# #
# def play_audio(data: bytes, stream: pyaudio.Stream) -> None: # def play_audio(data: bytes, stream: pyaudio.Stream) -> None:
# stream.write(AudioSegment.from_mp3(BytesIO(data)).raw_data) # stream.write(AudioSegment.from_mp3(BytesIO(data)).raw_data)
# #
# CHUNK_SIZE = 20 * 1024
# async def play_tts(text, voice):
# communicate = edge_tts.Communicate(text, voice)
#
# # 设置 PyAudio
# audio = pyaudio.PyAudio()
# stream = audio.open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
#
# # async for chunk in communicate.stream(): # 使用 stream 方法
# # if chunk['type'] == 'audio': # 确保 chunk 是字节流
# # stream.write(chunk['data'])
#
# total_data = b''
# for chunk in communicate.stream_sync():
# if chunk["type"] == "audio" and chunk["data"]:
# total_data += chunk["data"]
# if len(total_data) >= CHUNK_SIZE:
# # print(f"Time elapsed: {time.time() - start_time:.2f} seconds") # Print time
# stream.write(AudioSegment.from_mp3(BytesIO(total_data[:CHUNK_SIZE])).raw_data)
# # play_audio(total_data[:CHUNK_SIZE], stream) # Play first CHUNK_SIZE bytes
# total_data = total_data[CHUNK_SIZE:] # Remove played data
# # play_audio(total_data, stream)
# # 停止和关闭音频流
# stream.stop_stream()
# stream.close()
# audio.terminate()
#
#
# async def save_to_file(text, voice, filename):
# communicate = edge_tts.Communicate(text, voice)
#
# with open(filename, "wb") as f:
# async for chunk in communicate.stream():
# if chunk['type'] == 'audio':
# f.write(chunk['data'])
#
# if __name__ == "__main__": # if __name__ == "__main__":
# main() # text = "Hello, this is a test of the Edge TTS service."
# voice = "en-US-JessaNeural"
#
# # 使用 asyncio.run() 运行异步函数
# asyncio.run(play_tts(text, voice))
# # asyncio.run(save_to_file(text, voice, "output.wav"))
import edge_tts
import pyaudio
from io import BytesIO
from pydub import AudioSegment
import time
TEXT = 'Hello World! How are you guys doing? I hope great, cause I am having fun and honestly it has been a blast'
VOICE = "en-US-AndrewMultilingualNeural"
CHUNK_SIZE = 20 * 1024 # Assuming around 1024 bytes per chunk (adjust based on format)
def main() -> None:
start_time = time.time()
communicator = edge_tts.Communicate(TEXT, VOICE)
pyaudio_instance = pyaudio.PyAudio()
audio_stream = pyaudio_instance.open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
total_data = b'' # Store audio data instead of chunks
for chunk in communicator.stream_sync():
if chunk["type"] == "audio" and chunk["data"]:
total_data += chunk["data"]
if len(total_data) >= CHUNK_SIZE:
print(f"Time elapsed: {time.time() - start_time:.2f} seconds") # Print time
play_audio(total_data[:CHUNK_SIZE], audio_stream) # Play first CHUNK_SIZE bytes
total_data = total_data[CHUNK_SIZE:] # Remove played data
# Play remaining audio
play_audio(total_data, audio_stream)
audio_stream.stop_stream()
audio_stream.close()
pyaudio_instance.terminate()
def play_audio(data: bytes, stream: pyaudio.Stream) -> None:
stream.write(AudioSegment.from_mp3(BytesIO(data)).raw_data)
if __name__ == "__main__":
main()

View File

@ -1,5 +1,6 @@
#encoding = utf8 #encoding = utf8
import queue import queue
import time
from queue import Queue from queue import Queue
from threading import Thread, Event from threading import Thread, Event
import logging import logging
@ -169,6 +170,7 @@ class Infer:
j = 0 j = 0
count = 0
while self._exit_event.is_set(): while self._exit_event.is_set():
try: try:
m = self._queue.get(block=True, timeout=1) m = self._queue.get(block=True, timeout=1)
@ -180,6 +182,8 @@ class Infer:
img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device) img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device)
mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device) mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device)
time.sleep(0.01)
with torch.no_grad(): with torch.no_grad():
pred = model(mel_batch, img_batch) pred = model(mel_batch, img_batch)
@ -189,12 +193,14 @@ class Infer:
p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1)) p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
f[y1:y2, x1:x2] = p f[y1:y2, x1:x2] = p
name = "%04d" % j # name = "%04d" % j
cv2.imwrite(f'temp/images/{j}.jpg', p) cv2.imwrite(f'temp/images/{j}.jpg', p)
j = j + 1 j = j + 1
# count = count + 1
p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
self._human.push_render_image(p) self._human.push_render_image(p)
# out.write(f) # out.write(f)
# print('infer count:', count)
def push(self, chunk): def push(self, chunk):
self._queue.put(chunk) self._queue.put(chunk)

View File

@ -1,5 +1,5 @@
#encoding = utf8 #encoding = utf8
import ctypes
import logging import logging
import queue import queue
import time import time
@ -8,6 +8,7 @@ from threading import Thread, Event
import numpy as np import numpy as np
import audio import audio
from audio_render import AudioRender
class Chunk2Mal: class Chunk2Mal:
@ -17,6 +18,7 @@ class Chunk2Mal:
self._thread = None self._thread = None
self._chunks = [] self._chunks = []
self._audio_chunks = []
# 320 samples per chunk (20ms * 16000 / 1000)audio_chunk # 320 samples per chunk (20ms * 16000 / 1000)audio_chunk
self._chunk_len = self._human.get_audio_sample_rate() // self._human.get_fps() self._chunk_len = self._human.get_audio_sample_rate() // self._human.get_fps()
@ -24,63 +26,67 @@ class Chunk2Mal:
self._thread = Thread(target=self._on_run) self._thread = Thread(target=self._on_run)
self._exit_event.set() self._exit_event.set()
self._thread.start() self._thread.start()
self._audio_render = AudioRender()
logging.info('chunk2mal start') logging.info('chunk2mal start')
def _concatenate(self):
logging.info('np.concatenate')
inputs = np.concatenate(self._chunks) # [5 * chunk]
self._chunks = []
mel = audio.melspectrogram(inputs)
if np.isnan(mel.reshape(-1)).sum() > 0:
raise ValueError(
'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again')
mel_step_size = 16
# print('fps:', self._human.get_fps())
mel_idx_multiplier = 80. / self._human.get_fps()
# print('mel_idx_multiplier:', mel_idx_multiplier)
count = 0
i = 0
while 1:
count = count + 1
start_idx = int(i * mel_idx_multiplier)
print('i', i, 'start_idx', start_idx, 'mel len:', len(mel[0]))
if start_idx + mel_step_size > len(mel[0]):
self._human.push_mel_chunks_queue(mel[:, len(mel[0]) - mel_step_size:])
break
self._human.push_mel_chunks_queue(mel[:, start_idx: start_idx + mel_step_size])
i += 1
wav = np.concatenate(self._audio_chunks) # [5 * chunk]self._audio_chunks
wav *= 32767 / max(0.01, np.max(np.abs(wav)))
wav = wav.astype(np.int16)
self._audio_render.write(wav, len(wav))
self._audio_chunks = []
print('mel_chunks count:', count)
def _on_run(self): def _on_run(self):
logging.info('chunk2mal run') logging.info('chunk2mal run')
while self._exit_event.is_set(): while self._exit_event.is_set():
if self._audio_chunk_queue.empty(): if self._audio_chunk_queue.empty():
time.sleep(0.5) if len(self._chunks) > 0:
self._concatenate()
else:
time.sleep(0.5)
continue continue
try: try:
chunk = self._audio_chunk_queue.get(block=True, timeout=1) chunk = self._audio_chunk_queue.get(block=True, timeout=1)
self._chunks.append(chunk) self._chunks.append(chunk)
self._human.push_audio_frames(chunk, 0) self._audio_chunks.append(chunk.copy())
if len(self._chunks) < 10: # print(type(chunk))
# self._human.push_audio_frames(chunk, 0)
if len(self._chunks) < 102: # 200ms
continue continue
except queue.Empty: except queue.Empty:
# print('Chunk2Mal queue.Empty') # print('Chunk2Mal queue.Empty')
continue continue
print('len(self._chunks):', len(self._chunks))
self._concatenate()
logging.info('np.concatenate')
inputs = np.concatenate(self._chunks) # [N * chunk]
mel = audio.melspectrogram(inputs)
if np.isnan(mel.reshape(-1)).sum() > 0:
raise ValueError(
'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again')
mel_step_size = 16
print('fps:', self._human.get_fps())
mel_idx_multiplier = 80. / self._human.get_fps()
print('mel_idx_multiplier:', mel_idx_multiplier)
i = 0
while 1:
start_idx = int(i * mel_idx_multiplier)
if start_idx + mel_step_size > len(mel[0]):
self._human.push_mel_chunks_queue(mel[:, len(mel[0]) - mel_step_size:])
break
self._human.push_mel_chunks_queue(mel[:, start_idx: start_idx + mel_step_size])
i += 1
batch_size = 128
'''
while i < (len(self._chunks) - self._human.get_stride_left_size()
- self._human.get_stride_right_size()) / 2:
start_idx = int(left + i * mel_idx_multiplier)
# print(start_idx)
if start_idx + mel_step_size > len(mel[0]):
mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:])
else:
mel_chunks.append(mel[:, start_idx: start_idx + mel_step_size])
i += 1
self._human.push_feat_queue(mel_chunks)
# discard the old part to save memory
self._chunks = self._chunks[-(self._human.get_stride_left_size() + self._human.get_stride_right_size()):]
'''
logging.info('chunk2mal exit') logging.info('chunk2mal exit')

View File

@ -57,33 +57,19 @@ class TTSBase:
self._io_stream.seek(0) self._io_stream.seek(0)
stream = self.__create_bytes_stream(self._io_stream) stream = self.__create_bytes_stream(self._io_stream)
# audio.save_wav(stream, "./temp/audio/test1.wav", 16000)
stream_len = stream.shape[0] stream_len = stream.shape[0]
sr = 16000 print("stream_len:", stream_len, " _chunk_len:", self._chunk_len)
soundfile.read('./temp/audio/audio.wav', stream, sr)
# audio_chunks = audio.split_audio(stream, sr, 4)
# display(audio.play_audio_chunk(audio_chunks[0], sr=sr))
# 保存切割后的片段
# audio.save_chunks(stream[0:-1], sr, './temp/audio/')
# audio.save_chunks(audio_chunks, sr, './temp/audio/')
# try:
# sounddevice.play(stream, samplerate=self._human.get_audio_sample_rate())
# sounddevice.wait() # 等待音频播放完毕
# except Exception as e:
# logger.error(f"播放音频出错: {e}") playrec
index = 0 index = 0
segment = 0
while stream_len >= self._chunk_len: while stream_len >= self._chunk_len:
audio_chunk = stream[index:index + self._chunk_len] audio_chunk = stream[index:index + self._chunk_len]
# sounddevice.play(audio_chunk, samplerate=self._human.get_audio_sample_rate())
# self._pcm_stream.write(audio_chunk)
# self._pcm_stream.write(audio_chunk.tobytes())
# self._human.push_audio_chunk(audio_chunk)
# self._human.push_mel_chunks_queue(audio_chunk)
self._human.push_audio_chunk(audio_chunk) self._human.push_audio_chunk(audio_chunk)
stream_len -= self._chunk_len stream_len -= self._chunk_len
index += self._chunk_len index += self._chunk_len
segment = segment + 1
print("segment:", segment)
self._io_stream.seek(0) self._io_stream.seek(0)
self._io_stream.truncate() self._io_stream.truncate()

19
ui.py
View File

@ -44,6 +44,7 @@ class App(customtkinter.CTk):
# self.logo_label.grid(row=0, column=0, padx=20, pady=(20, 10)) # self.logo_label.grid(row=0, column=0, padx=20, pady=(20, 10))
self.entry = customtkinter.CTkEntry(self, placeholder_text="输入内容") self.entry = customtkinter.CTkEntry(self, placeholder_text="输入内容")
self.entry.insert(0, "基本信息,北京九零科技有限公司成立于2015年位于北京市是一家以从事科技推广和应用服务业为主的企业。企业注册资本500万人民币。")
self.entry.grid(row=2, column=0, columnspan=2, padx=(20, 0), pady=(20, 20), sticky="nsew") self.entry.grid(row=2, column=0, columnspan=2, padx=(20, 0), pady=(20, 20), sticky="nsew")
self.main_button_1 = customtkinter.CTkButton(master=self, fg_color="transparent", border_width=2, self.main_button_1 = customtkinter.CTkButton(master=self, fg_color="transparent", border_width=2,
@ -63,13 +64,13 @@ class App(customtkinter.CTk):
self._human.on_destroy() self._human.on_destroy()
def play_audio(self): def play_audio(self):
# return return
if self._is_play_audio: # if self._is_play_audio:
return # return
self._is_play_audio = True # self._is_play_audio = True
file = os.path.curdir + '/audio/test.wav' # file = os.path.curdir + '/audio/test1.wav'
print(file) # print(file)
winsound.PlaySound(file, winsound.SND_ASYNC or winsound.SND_FILENAME) # winsound.PlaySound(file, winsound.SND_ASYNC or winsound.SND_FILENAME)
# playsound(file) # playsound(file)
def _init_image_canvas(self): def _init_image_canvas(self):
@ -105,11 +106,11 @@ class App(customtkinter.CTk):
height = self.winfo_height() * 0.5 height = self.winfo_height() * 0.5
self._canvas.create_image(width, height, anchor=customtkinter.CENTER, image=imgtk) self._canvas.create_image(width, height, anchor=customtkinter.CENTER, image=imgtk)
self._canvas.update() self._canvas.update()
self.after(33, self._render) self.after(40, self._render)
def request_tts(self): def request_tts(self):
content = self.entry.get() content = self.entry.get()
content = 'Hello, this is a test of the Edge TTS service.' # content = ''
print('content:', content) print('content:', content)
self.entry.delete(0, customtkinter.END) self.entry.delete(0, customtkinter.END)
self._human.read(content) self._human.read(content)