diff --git a/face_detection/detection/sfd/detect.py b/face_detection/detection/sfd/detect.py
index d6ff706..439de2b 100644
--- a/face_detection/detection/sfd/detect.py
+++ b/face_detection/detection/sfd/detect.py
@@ -56,17 +56,18 @@ def detect(net, img, device):
     return bboxlist
 
 
-def batch_detect(net, imgs, device):
-    imgs = imgs - np.array([104, 117, 123])
-    imgs = imgs.transpose(0, 3, 1, 2)
+def batch_detect(net, images, device):
+    rgb = images[:, :, :, :3]
+    rgb = rgb - np.array([104, 117, 123])
+    rgb = rgb.transpose(0, 3, 1, 2)
 
     if 'cuda' in device:
         torch.backends.cudnn.benchmark = True
 
-    imgs = torch.from_numpy(imgs).float().to(device)
-    BB, CC, HH, WW = imgs.size()
+    images1 = torch.from_numpy(rgb).float().to(device)
+    BB, CC, HH, WW = images1.size()
     with torch.no_grad():
-        olist = net(imgs)
+        olist = net(images1)
 
     bboxlist = []
     for i in range(len(olist) // 2):
diff --git a/human/human_context.py b/human/human_context.py
index 86c5549..4a69304 100644
--- a/human/human_context.py
+++ b/human/human_context.py
@@ -44,7 +44,6 @@ class HumanContext:
         logging.info(f'face images length: {face_images_length}')
         print(f'face images length: {face_images_length}')
 
-
     def __del__(self):
         print(f'HumanContext: __del__')
         object_stop(self._asr)
diff --git a/render/video_render.py b/render/video_render.py
index 8f5c5b3..ccc68b0 100644
--- a/render/video_render.py
+++ b/render/video_render.py
@@ -30,8 +30,9 @@ class VideoRender(BaseRender):
 
             clock_time = self._play_clock.clock_time()
             time_difference = clock_time - ps
+            print("Video frame time", clock_time, ps, time_difference)
             if abs(time_difference) > self._play_clock.audio_diff_threshold:
-                if self._diff_avg_count < 3:
+                if self._diff_avg_count < 5:
                     self._diff_avg_count += 1
                 else:
                     if time_difference < -self._play_clock.audio_diff_threshold:
@@ -62,7 +63,7 @@ class VideoRender(BaseRender):
                 except:
                     print('resize error')
                     return
-                combine_frame[y1:y2, x1:x2] = res_frame
+                combine_frame[y1:y2, x1:x2, :3] = res_frame
 
             image = combine_frame
             # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
diff --git a/ui.py b/ui.py
index 6d46fa2..2587a48 100644
--- a/ui.py
+++ b/ui.py
@@ -57,7 +57,7 @@ class App(customtkinter.CTk):
         # self.main_button_1.grid(row=2, column=2, padx=(20, 20), pady=(20, 20), sticky="nsew")
         background = os.path.join(current_file_path, 'data', 'background', 'background.webp')
         logger.info(f'background: {background}')
-        # self._background = ImageTk.PhotoImage(read_image(background))
+        self._background = read_image(background).convert("RGBA")
 
         self._init_image_canvas()
 
@@ -105,7 +105,13 @@ class App(customtkinter.CTk):
             image = cv2.resize(image, (int(iwidth * height / iheight), int(height)), interpolation=cv2.INTER_AREA)
         img = Image.fromarray(image)
 
-        imgtk = ImageTk.PhotoImage(image=img)
+        bg_width, bg_height = self._background.size
+        fg_width, fg_height = img.size
+        x = (bg_width - fg_width) // 2
+        y = (bg_height - fg_height) // 2
+        self._background.paste(img, (x, y), img)
+
+        imgtk = ImageTk.PhotoImage(self._background)
 
         self._canvas.delete("all")
 
diff --git a/utils/utils.py b/utils/utils.py
index 7e890ff..8a688cc 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -36,7 +36,7 @@ def read_images(img_list):
         print(f'read image path:{img_path}')
         # frame = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
         frame = Image.open(img_path)
-        frame = frame.convert("RGBA")
+        # frame = frame.convert("RGBA")
         frame = np.array(frame)
         frames.append(frame)
     return frames
@@ -179,7 +179,7 @@ def load_avatar(path, img_size, device):
     face_frames = []
     coord_frames = []
     for face, coord in face_det_results:
-        resized_crop_frame = cv2.resize(face, (img_size, img_size))
+        resized_crop_frame = cv2.resize(face[:, :, :3], (img_size, img_size))
         face_frames.append(resized_crop_frame)
         coord_frames.append(coord)