diff --git a/data/avatars/wav2lip_avatar2/face_imgs/00000000.png b/data/avatars/wav2lip_avatar2/face_imgs/00000000.png index 9bc012e..2d31bb9 100644 Binary files a/data/avatars/wav2lip_avatar2/face_imgs/00000000.png and b/data/avatars/wav2lip_avatar2/face_imgs/00000000.png differ diff --git a/human/audio_inference_handler.py b/human/audio_inference_handler.py index 30e430b..6c78c84 100644 --- a/human/audio_inference_handler.py +++ b/human/audio_inference_handler.py @@ -59,7 +59,7 @@ class AudioInferenceHandler(AudioHandler): super().on_message(message) def __on_run(self): - wav2lip_path = os.path.join(current_file_path, '..', 'checkpoints', 'wav2lip_gan.pth') + wav2lip_path = os.path.join(current_file_path, '..', 'checkpoints', 'wav2lip.pth') logger.info(f'AudioInferenceHandler init, path:{wav2lip_path}') model = load_model(wav2lip_path) logger.info("Model loaded") @@ -130,7 +130,7 @@ class AudioInferenceHandler(AudioHandler): img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device) mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device) - # print('img_batch:', img_batch.shape, 'mel_batch:', mel_batch.shape) + print('img_batch:', img_batch.shape, 'mel_batch:', mel_batch.shape) with torch.no_grad(): pred = model(mel_batch, img_batch) diff --git a/human/human_context.py b/human/human_context.py index b0c5d37..40eee13 100644 --- a/human/human_context.py +++ b/human/human_context.py @@ -18,7 +18,7 @@ current_file_path = os.path.dirname(os.path.abspath(__file__)) class HumanContext: def __init__(self): self._fps = 50 # 20 ms per frame - self._image_size = 128 + self._image_size = 288 self._batch_size = 16 self._sample_rate = 16000 self._stride_left_size = 10