modify test push

2024-09-23 15:52:39 +08:00 · 2024-09-23 15:52:39 +08:00 · 5af8ba1878
commit 5af8ba1878
parent 17d9437425
1 changed files with 81 additions and 2 deletions
--- a/Human.py
+++ b/Human.py
@ -2,9 +2,11 @@
 import logging
 import multiprocessing as mp
 import platform, subprocess
 import queue
 import time
 import numpy as np
 import audio
@ -162,7 +164,7 @@ def face_detect(images):
    while 1:
        predictions = []
        try:
-            for i in tqdm(range(0, len(images), batch_size)):
+            for i in range(0, len(images), batch_size):
                predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size])))
        except RuntimeError:
            if batch_size == 1:
@ -240,6 +242,44 @@ def datagen(frames, mels):
        yield img_batch, mel_batch, frame_batch, coords_batch
 def datagen_signal(frame, mel, face_det_results):
    img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
    # for i, m in enumerate(mels):
    idx = 0
    frame_to_save = frame.copy()
    face, coords = face_det_results[idx].copy()
    face = cv2.resize(face, (img_size, img_size))
    m = mel
    img_batch.append(face)
    mel_batch.append(m)
    frame_batch.append(frame_to_save)
    coords_batch.append(coords)
    if len(img_batch) >= wav2lip_batch_size:
        img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
        img_masked = img_batch.copy()
        img_masked[:, img_size // 2:] = 0
        img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
        mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
        return img_batch, mel_batch, frame_batch, coords_batch
    if len(img_batch) > 0:
        img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
        img_masked = img_batch.copy()
        img_masked[:, img_size//2:] = 0
        img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
        mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
        return img_batch, mel_batch, frame_batch, coords_batch
 class Human:
    def __init__(self):
@ -299,7 +339,46 @@ class Human:
        face_images_length = len(face_list_cycle)
        logging.info(f'face images length: {face_images_length}')
        print(f'face images length: {face_images_length}')
-        gen = datagen(face_list_cycle, self.mel_chunks_queue_)
+
        model = load_model(r'.\checkpoints\wav2lip.pth')
        print("Model loaded")
        frame_h, frame_w = face_list_cycle[0].shape[:-1]
        out = cv2.VideoWriter('temp/resul_tttt.avi',
                              cv2.VideoWriter_fourcc(*'DIVX'), 25, (frame_w, frame_h))
        face_det_results = face_detect(face_list_cycle)
        j = 0
        while not self.mel_chunks_queue_.empty():
            print("self.mel_chunks_queue_ len:", self.mel_chunks_queue_.qsize())
            m = self.mel_chunks_queue_.get()
            img_batch, mel_batch, frames, coords = datagen_signal(face_list_cycle[0], m, face_det_results)
            img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device)
            mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device)
            with torch.no_grad():
                pred = model(mel_batch, img_batch)
            pred = pred.cpu().numpy().transpose(0, 2, 3, 1) * 255.
            for p, f, c in zip(pred, frames, coords):
                y1, y2, x1, x2 = c
                p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
                f[y1:y2, x1:x2] = p
                # name = "%04d" % j
                # cv2.imwrite(f'temp/images/{j}.jpg', p)
                # j = j + 1
                out.write(f)
        out.release()
        command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format('./audio/audio1.wav', 'temp/resul_tttt.avi',
                                                                      'temp/resul_tttt.mp4')
        subprocess.call(command, shell=platform.system() != 'Windows')
        # gen = datagen(face_list_cycle, self.mel_chunks_queue_)
    def get_fps(self):
        return self._fps