From 32e4444bb552d4cfb16f6a0448ad6d1afd6fb058 Mon Sep 17 00:00:00 2001
From: brige <jiegeaiai@163.com>
Date: Tue, 19 Nov 2024 23:18:09 +0800
Subject: [PATCH] modify inter

---
 human/audio_inference_handler.py | 2 +-
 human/human_context.py           | 8 ++++----
 render/video_render.py           | 5 ++++-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/human/audio_inference_handler.py b/human/audio_inference_handler.py
index 99a0af1..30e430b 100644
--- a/human/audio_inference_handler.py
+++ b/human/audio_inference_handler.py
@@ -59,7 +59,7 @@ class AudioInferenceHandler(AudioHandler):
         super().on_message(message)
 
     def __on_run(self):
-        wav2lip_path = os.path.join(current_file_path, '..', 'checkpoints', 'wav2lip.pth')
+        wav2lip_path = os.path.join(current_file_path, '..', 'checkpoints', 'wav2lip_gan.pth')
         logger.info(f'AudioInferenceHandler init, path:{wav2lip_path}')
         model = load_model(wav2lip_path)
         logger.info("Model loaded")
diff --git a/human/human_context.py b/human/human_context.py
index 4e0ee0f..b0c5d37 100644
--- a/human/human_context.py
+++ b/human/human_context.py
@@ -18,7 +18,7 @@ current_file_path = os.path.dirname(os.path.abspath(__file__))
 class HumanContext:
     def __init__(self):
         self._fps = 50  # 20 ms per frame
-        self._image_size = 96
+        self._image_size = 128
         self._batch_size = 16
         self._sample_rate = 16000
         self._stride_left_size = 10
@@ -37,9 +37,9 @@ class HumanContext:
         print(f'device:{self._device}')
         base_path = os.path.join(current_file_path, '..')
         logger.info(f'base path:{base_path}')
-        full_images, face_frames, coord_frames = load_avatar(base_path, self._image_size, self._device)
-        # full_images, face_frames, coord_frames = load_avatar_from_processed(base_path,
-        #                                                                     'wav2lip_avatar1')
+        # full_images, face_frames, coord_frames = load_avatar(base_path, self._image_size, self._device)
+        full_images, face_frames, coord_frames = load_avatar_from_processed(base_path,
+                                                                            'wav2lip_avatar2')
         self._frame_list_cycle = full_images
         self._face_list_cycle = face_frames
         self._coord_list_cycle = coord_frames
diff --git a/render/video_render.py b/render/video_render.py
index 2b53137..1dbecad 100644
--- a/render/video_render.py
+++ b/render/video_render.py
@@ -15,6 +15,7 @@ class VideoRender(BaseRender):
     def __init__(self, play_clock, context, human_render):
         super().__init__(play_clock, context, 'Video')
         self._human_render = human_render
+        self.index = 0
 
     def render(self, frame, ps):
         res_frame, idx, type_ = frame
@@ -30,7 +31,9 @@ class VideoRender(BaseRender):
             except:
                 print('resize error')
                 return
-            combine_frame[y1:y2, x1:x2, :3] = res_frame
+            cv2.imwrite(f'res_frame_{ self.index }.png', res_frame)
+            self.index = self.index + 1
+            combine_frame[y1:y2, x1:x2] = res_frame
 
         image = combine_frame
         # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)