diff --git a/face_detection/detection/sfd/detect.py b/face_detection/detection/sfd/detect.py index d6ff706..439de2b 100644 --- a/face_detection/detection/sfd/detect.py +++ b/face_detection/detection/sfd/detect.py @@ -56,17 +56,18 @@ def detect(net, img, device): return bboxlist -def batch_detect(net, imgs, device): - imgs = imgs - np.array([104, 117, 123]) - imgs = imgs.transpose(0, 3, 1, 2) +def batch_detect(net, images, device): + rgb = images[:, :, :, :3] + rgb = rgb - np.array([104, 117, 123]) + rgb = rgb.transpose(0, 3, 1, 2) if 'cuda' in device: torch.backends.cudnn.benchmark = True - imgs = torch.from_numpy(imgs).float().to(device) - BB, CC, HH, WW = imgs.size() + images1 = torch.from_numpy(rgb).float().to(device) + BB, CC, HH, WW = images1.size() with torch.no_grad(): - olist = net(imgs) + olist = net(images1) bboxlist = [] for i in range(len(olist) // 2): diff --git a/human/human_context.py b/human/human_context.py index 86c5549..4a69304 100644 --- a/human/human_context.py +++ b/human/human_context.py @@ -44,7 +44,6 @@ class HumanContext: logging.info(f'face images length: {face_images_length}') print(f'face images length: {face_images_length}') - def __del__(self): print(f'HumanContext: __del__') object_stop(self._asr) diff --git a/render/video_render.py b/render/video_render.py index 8f5c5b3..ccc68b0 100644 --- a/render/video_render.py +++ b/render/video_render.py @@ -30,8 +30,9 @@ class VideoRender(BaseRender): clock_time = self._play_clock.clock_time() time_difference = clock_time - ps + print("Video frame time", clock_time, ps, time_difference) if abs(time_difference) > self._play_clock.audio_diff_threshold: - if self._diff_avg_count < 3: + if self._diff_avg_count < 5: self._diff_avg_count += 1 else: if time_difference < -self._play_clock.audio_diff_threshold: @@ -62,7 +63,7 @@ class VideoRender(BaseRender): except: print('resize error') return - combine_frame[y1:y2, x1:x2] = res_frame + combine_frame[y1:y2, x1:x2, :3] = res_frame image = combine_frame # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) diff --git a/ui.py b/ui.py index 6d46fa2..2587a48 100644 --- a/ui.py +++ b/ui.py @@ -57,7 +57,7 @@ class App(customtkinter.CTk): # self.main_button_1.grid(row=2, column=2, padx=(20, 20), pady=(20, 20), sticky="nsew") background = os.path.join(current_file_path, 'data', 'background', 'background.webp') logger.info(f'background: {background}') - # self._background = ImageTk.PhotoImage(read_image(background)) + self._background = read_image(background).convert("RGBA") self._init_image_canvas() @@ -105,7 +105,13 @@ class App(customtkinter.CTk): image = cv2.resize(image, (int(iwidth * height / iheight), int(height)), interpolation=cv2.INTER_AREA) img = Image.fromarray(image) - imgtk = ImageTk.PhotoImage(image=img) + bg_width, bg_height = self._background.size + fg_width, fg_height = img.size + x = (bg_width - fg_width) // 2 + y = (bg_height - fg_height) // 2 + self._background.paste(img, (x, y), img) + + imgtk = ImageTk.PhotoImage(self._background) self._canvas.delete("all") diff --git a/utils/utils.py b/utils/utils.py index 7e890ff..8a688cc 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -36,7 +36,7 @@ def read_images(img_list): print(f'read image path:{img_path}') # frame = cv2.imread(img_path, cv2.IMREAD_UNCHANGED) frame = Image.open(img_path) - frame = frame.convert("RGBA") + # frame = frame.convert("RGBA") frame = np.array(frame) frames.append(frame) return frames @@ -179,7 +179,7 @@ def load_avatar(path, img_size, device): face_frames = [] coord_frames = [] for face, coord in face_det_results: - resized_crop_frame = cv2.resize(face, (img_size, img_size)) + resized_crop_frame = cv2.resize(face[:, :, :3], (img_size, img_size)) face_frames.append(resized_crop_frame) coord_frames.append(coord)