#encoding = utf8 import json import logging import os import queue from logging import handlers import tkinter import tkinter.messagebox from queue import Queue import customtkinter import cv2 import requests import winsound from PIL import Image, ImageTk from playsound import playsound from audio_render import AudioRender # from Human import Human from human import HumanContext from utils import config_logging # from tts.EdgeTTS import EdgeTTS logger = logging.getLogger(__name__) customtkinter.set_appearance_mode("System") # Modes: "System" (standard), "Dark", "Light" customtkinter.set_default_color_theme("green") # Themes: "blue" (standard), "green", "dark-blue" class App(customtkinter.CTk): def __init__(self): super().__init__() self._tts_url = 'http://localhost:8080' # configure window self.title("TTS demo") self.geometry(f"{1150}x{580}") self.grid_columnconfigure(1, weight=1) self.grid_rowconfigure((0, 1), weight=1) self.image_frame = customtkinter.CTkFrame(self, corner_radius=10) self.image_frame.grid(row=0, column=0, rowspan=2, columnspan=3, padx=(20, 20), pady=(20, 0), sticky="nsew") self.image_frame.grid_rowconfigure(0, weight=1) self.logo_label = customtkinter.CTkLabel(self.image_frame, text="CustomTkinter", font=customtkinter.CTkFont(size=20, weight="bold")) # self.logo_label.grid(row=0, column=0, padx=20, pady=(20, 10)) self.entry = customtkinter.CTkEntry(self, placeholder_text="输入内容") self.entry.insert(0, "大家好,测试虚拟数字人。") self.entry.grid(row=2, column=0, columnspan=2, padx=(20, 0), pady=(20, 20), sticky="nsew") self.main_button_1 = customtkinter.CTkButton(master=self, fg_color="transparent", border_width=2, text_color=("gray10", "#DCE4EE"), text='发送', command=self.request_tts) self.main_button_1.grid(row=2, column=2, padx=(20, 20), pady=(20, 20), sticky="nsew") self._init_image_canvas() # self._human = Human() self._queue = Queue() self._human_context = HumanContext() self._human_context.build() render = self._human_context.render_handler render.set_image_render(self) self._render() # self.play_audio() def destroy(self): self.on_destroy() super().destroy() def on_destroy(self): logger.info('------------App destroy------------') # self._human.on_destroy() def render_image(self, image): self._queue.put(image) def _init_image_canvas(self): self._canvas = customtkinter.CTkCanvas(self.image_frame) self._canvas.pack(fill=customtkinter.BOTH, expand=customtkinter.YES) def _render(self): after_time = 24 try: image = self._queue.get(block=True, timeout=0.003) if image is None: self.after(after_time, self._render) return except queue.Empty: self.after(after_time, self._render) return iheight, iwidth = image.shape[0], image.shape[1] width = self.winfo_width() height = self.winfo_height() if iheight / iwidth >= width / height: image = cv2.resize(image, (int(width), int(iheight * width / iwidth))) else: image = cv2.resize(image, (int(iwidth * height / iheight), int(height)), interpolation=cv2.INTER_AREA) img = Image.fromarray(image) imgtk = ImageTk.PhotoImage(image=img) self._canvas.delete("all") self._canvas.imgtk = imgtk width = self.winfo_width() * 0.5 height = self.winfo_height() * 0.5 self._canvas.create_image(width, height, anchor=customtkinter.CENTER, image=imgtk) self._canvas.update() self.after(after_time, self._render) def request_tts(self): content = self.entry.get() # content = '' print('content:', content) self.entry.delete(0, customtkinter.END) self._human.pause_talk() self._human.read(content) # payload = { # 'text': content, # 'voice': 'zh-CN-XiaoyiNeural' # } # resp = requests.get(self._tts_url + '/tts', params=urlencode(payload)) # if resp.status_code != 200: # print('tts error', resp.status_code) # return # # print(resp.content) # # resJson = json.loads(resp.text) # url = resJson.get('url') # self.download_tts(url) def download_tts(self, url): file_name = url[3:] print(file_name) download_url = self._tts_url + url print('download tts', download_url) resp = requests.get(download_url) with open('./audio/mp3/' + file_name, 'wb') as mp3: mp3.write(resp.content) from pydub import AudioSegment sound = AudioSegment.from_mp3('./audio/mp3/' + file_name) sound.export('./audio/wav/' + file_name + '.wav', format="wav") def on_render(self, image): self._queue.put(image) if __name__ == "__main__": # logging.basicConfig(filename='./logs/info.log', level=logging.INFO) config_logging('./logs/info.log', logging.INFO, logging.INFO) logger.info('------------start------------') app = App() app.mainloop() # app.on_destroy() logger.info('------------exit------------')