add audio play

This commit is contained in:
brige 2024-10-04 16:16:36 +08:00
parent a71740f40c
commit 9c045df382
1106 changed files with 155 additions and 119 deletions

121
Human.py
View File

@ -19,6 +19,7 @@ import pyaudio
import audio import audio
import face_detection import face_detection
import utils import utils
from audio_render import AudioRender
from infer import Infer, read_images from infer import Infer, read_images
from models import Wav2Lip from models import Wav2Lip
from tts.Chunk2Mal import Chunk2Mal from tts.Chunk2Mal import Chunk2Mal
@ -189,84 +190,6 @@ img_size = 96
wav2lip_batch_size = 128 wav2lip_batch_size = 128
def datagen(frames, mels):
img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
face_det_results = face_detect(frames) # BGR2RGB for CNN face detection
# for i, m in enumerate(mels):
for i in range(mels.qsize()):
idx = 0 if True else i%len(frames)
frame_to_save = frames[mirror_index(1, i)].copy()
face, coords = face_det_results[idx].copy()
face = cv2.resize(face, (img_size, img_size))
m = mels.get()
img_batch.append(face)
mel_batch.append(m)
frame_batch.append(frame_to_save)
coords_batch.append(coords)
if len(img_batch) >= wav2lip_batch_size:
img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
img_masked = img_batch.copy()
img_masked[:, img_size//2:] = 0
img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
yield img_batch, mel_batch, frame_batch, coords_batch
img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
if len(img_batch) > 0:
img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
img_masked = img_batch.copy()
img_masked[:, img_size//2:] = 0
img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
yield img_batch, mel_batch, frame_batch, coords_batch
def datagen_signal(frame, mel, face_det_results):
img_batch, mel_batch, frame_batch, coord_batch = [], [], [], []
# for i, m in enumerate(mels):
idx = 0
frame_to_save = frame.copy()
face, coord = face_det_results[idx].copy()
face = cv2.resize(face, (img_size, img_size))
for i, m in enumerate(mel):
img_batch.append(face)
mel_batch.append(m)
frame_batch.append(frame_to_save)
coord_batch.append(coord)
if len(img_batch) >= wav2lip_batch_size:
img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
img_masked = img_batch.copy()
img_masked[:, img_size // 2:] = 0
img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
return img_batch, mel_batch, frame_batch, coord_batch
if len(img_batch) > 0:
img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
img_masked = img_batch.copy()
img_masked[:, img_size//2:] = 0
img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
return img_batch, mel_batch, frame_batch, coord_batch
# 从字节流加载音频数据 # 从字节流加载音频数据
def load_audio_from_bytes(byte_data): def load_audio_from_bytes(byte_data):
# 使用 BytesIO 创建一个字节流 # 使用 BytesIO 创建一个字节流
@ -288,20 +211,20 @@ class Human:
self._output_queue = mp.Queue() self._output_queue = mp.Queue()
self._res_frame_queue = mp.Queue(self._batch_size * 2) self._res_frame_queue = mp.Queue(self._batch_size * 2)
# full_images, face_frames, coord_frames = self._avatar() full_images, face_frames, coord_frames = self._avatar()
# self._frame_list_cycle = full_images self._frame_list_cycle = full_images
# self._face_list_cycle = face_frames self._face_list_cycle = face_frames
# self._coord_list_cycle = coord_frames self._coord_list_cycle = coord_frames
# face_images_length = len(self._face_list_cycle) face_images_length = len(self._face_list_cycle)
# logging.info(f'face images length: {face_images_length}') logging.info(f'face images length: {face_images_length}')
# print(f'face images length: {face_images_length}') print(f'face images length: {face_images_length}')
self.avatar_id = 'wav2lip_avatar1'
self.avatar_path = f"./data/{self.avatar_id}"
self.full_imgs_path = f"{self.avatar_path}/full_imgs"
self.face_imgs_path = f"{self.avatar_path}/face_imgs"
self.coords_path = f"{self.avatar_path}/coords.pkl"
self.__loadavatar() # self.avatar_id = 'wav2lip_avatar1'
# self.avatar_path = f"./data/{self.avatar_id}"
# self.full_imgs_path = f"{self.avatar_path}/full_imgs"
# self.face_imgs_path = f"{self.avatar_path}/face_imgs"
# self.coords_path = f"{self.avatar_path}/coords.pkl"
# self.__loadavatar()
self.mel_chunks_queue_ = Queue() self.mel_chunks_queue_ = Queue()
self.audio_chunks_queue_ = Queue() self.audio_chunks_queue_ = Queue()
@ -315,6 +238,8 @@ class Human:
self._infer = Infer(self) self._infer = Infer(self)
self.chunk_2_mal.warm_up() self.chunk_2_mal.warm_up()
self.audio_render = AudioRender()
# #
# self._thread = None # self._thread = None
# thread = threading.Thread(target=self.test) # thread = threading.Thread(target=self.test)
@ -361,7 +286,8 @@ class Human:
face_frames = [] face_frames = []
coord_frames = [] coord_frames = []
for face, coord in face_det_results: for face, coord in face_det_results:
face_frames.append(face) resized_crop_frame = cv2.resize(face, (img_size, img_size))
face_frames.append(resized_crop_frame)
coord_frames.append(coord) coord_frames.append(coord)
return full_list_cycle, face_frames, coord_frames return full_list_cycle, face_frames, coord_frames
@ -395,7 +321,8 @@ class Human:
print("self.mel_chunks_queue_ len:", self.mel_chunks_queue_.qsize()) print("self.mel_chunks_queue_ len:", self.mel_chunks_queue_.qsize())
m = self.mel_chunks_queue_.get() m = self.mel_chunks_queue_.get()
# mel_batch = np.reshape(m, [len(m), mel_batch.shape[1], mel_batch.shape[2], 1]) # mel_batch = np.reshape(m, [len(m), mel_batch.shape[1], mel_batch.shape[2], 1])
img_batch, mel_batch, frames, coords = datagen_signal(face_list_cycle[0], m, face_det_results) img_batch, mel_batch, frames, coords = utils.datagen_signal(face_list_cycle[0],
m, face_det_results, img_size)
img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device) img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device)
mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device) mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device)
@ -561,6 +488,14 @@ class Human:
image = combine_frame image = combine_frame
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
for audio_frame in audio_frames:
frame, type_ = audio_frame
frame = (frame * 32767).astype(np.int16)
self.audio_render.write(frame.tobytes(), int(frame.shape[0]*2))
# new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
# new_frame.planes[0].update(frame.tobytes())
# new_frame.sample_rate = 16000
return image return image

View File

@ -2,6 +2,9 @@
from ctypes import * from ctypes import *
import os import os
import numpy as np
current = os.path.dirname(__file__) current = os.path.dirname(__file__)
dynamic_path = os.path.join(current, 'AudioRender.dll') dynamic_path = os.path.join(current, 'AudioRender.dll')
@ -31,5 +34,6 @@ class AudioRender:
if not self.__init: if not self.__init:
return False return False
self.__audio_render_obj.argtypes = (POINTER(c_ubyte), c_uint) self.__audio_render_obj.argtypes = (POINTER(c_uint8), c_uint)
return self.__audio_render_obj.Write(data.ctypes.data_as(POINTER(c_ubyte)), size) byte_data = np.frombuffer(data, dtype=np.uint8)
return self.__audio_render_obj.Write(byte_data.ctypes.data_as(POINTER(c_uint8)), size)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Some files were not shown because too many files have changed in this diff Show More