diff --git a/human/__init__.py b/human/__init__.py index 503a041..cb6be4d 100644 --- a/human/__init__.py +++ b/human/__init__.py @@ -4,4 +4,5 @@ from .human_context import HumanContext from .audio_mal_handler import AudioMalHandler from .audio_inference_handler import AudioInferenceHandler from .audio_inference_onnx_handler import AudioInferenceOnnxHandler +from .huaman_status import HumanStatusEnum, HumanStatus from .human_render import HumanRender diff --git a/human/audio_inference_handler.py b/human/audio_inference_handler.py index 82a8d51..6755a4b 100644 --- a/human/audio_inference_handler.py +++ b/human/audio_inference_handler.py @@ -6,10 +6,12 @@ import time from queue import Queue from threading import Event, Thread +import cv2 import numpy as np import torch from eventbus import EventBus +from human import HumanStatus from human_handler import AudioHandler from utils import load_model, mirror_index, get_device, SyncQueue @@ -67,12 +69,13 @@ class AudioInferenceHandler(AudioHandler): logger.info("Model loaded") face_list_cycle = self._context.face_list_cycle - length = len(face_list_cycle) index = 0 count = 0 count_time = 0 logger.info('start inference') + silence_length = 133 + human_status = HumanStatus(length, silence_length) device = get_device() logger.info(f'use device:{device}') @@ -107,11 +110,13 @@ class AudioInferenceHandler(AudioHandler): for i in range(batch_size): if not self._is_running: break - self.on_next_handle((None, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]), - 0) + # self.on_next_handle((None, mirror_index(silence_length, index), + self.on_next_handle((None, human_status.get_index(), + audio_frames[i * 2:i * 2 + 2]), 0) index = index + 1 else: logger.info(f'infer======= {current_text}') + human_status.try_to_talk() t = time.perf_counter() img_batch = [] # for i in range(batch_size): diff --git a/human/audio_inference_onnx_handler.py b/human/audio_inference_onnx_handler.py index 8a28627..b521fbe 100644 --- a/human/audio_inference_onnx_handler.py +++ b/human/audio_inference_onnx_handler.py @@ -5,7 +5,9 @@ import queue import time from threading import Event, Thread - # from gfpgan import GFPGANer +import cv2 + +# from gfpgan import GFPGANer from eventbus import EventBus from human_handler import AudioHandler from utils import load_model, mirror_index, get_device, SyncQueue @@ -93,9 +95,9 @@ class AudioInferenceOnnxHandler(AudioHandler): gfpgan_model = load_gfpgan_model(gfpgan_model_path) face_list_cycle = self._context.face_list_cycle - - length = len(face_list_cycle) + for i in range(length): + cv2.imwrite(f'face_{i}.png', face_list_cycle[i]) index = 0 count = 0 count_time = 0 @@ -157,18 +159,6 @@ class AudioInferenceOnnxHandler(AudioHandler): onnx_out = model_g.run(onnx_names, onnx_input)[0] pred = onnx_out - # onnxruntime_inputs = {"audio_seqs__0": mel_batch, } - # onnxruntime_names = [output.name for output in model_a.get_outputs()] - # embeddings = model_a.run(onnxruntime_names, onnxruntime_inputs)[0] - # - # onnxruntime_inputs = {"audio_embedings__0": embeddings, "img_seqs__1": img_batch} - # onnxruntime_names = [output.name for output in model_g.get_outputs()] - # - # start_model = time.time() - # onnxruntime_output = model_g.run(onnxruntime_names, onnxruntime_inputs)[0] - # end_model = time.time() - # pred = onnxruntime_output - count_time += (time.perf_counter() - t) count += batch_size diff --git a/human/huaman_status.py b/human/huaman_status.py new file mode 100644 index 0000000..a031f95 --- /dev/null +++ b/human/huaman_status.py @@ -0,0 +1,36 @@ +#encoding = utf8 + +import logging + + +from enum import Enum + + +class HumanStatusEnum(Enum): + silence = 1 + talking = 2 + + +class HumanStatus: + def __init__(self, total_frames=0, last_silence_frame=0): + self._status = HumanStatusEnum.silence + self._total_frames = total_frames + self._last_silence_frame = last_silence_frame + self._current_frame = 0 + + def get_status(self): + return self._status + + def set_status(self, status): + self._status = status + return self._status + + def try_to_talk(self): + if self._status == HumanStatusEnum.silence: + if self._current_frame - self._last_silence_frame < 0: + return False + self._status = HumanStatusEnum.talking + return True + + def get_index(self): + return self._total_frames diff --git a/ipc/ipc.dll b/ipc/ipc.dll index a12ee07..9fd8727 100644 Binary files a/ipc/ipc.dll and b/ipc/ipc.dll differ diff --git a/ipc/ipc.exp b/ipc/ipc.exp index 433ba89..6d11e2b 100644 Binary files a/ipc/ipc.exp and b/ipc/ipc.exp differ diff --git a/ipc/ipc.lib b/ipc/ipc.lib index 0816970..8f4c600 100644 Binary files a/ipc/ipc.lib and b/ipc/ipc.lib differ diff --git a/ipc/ipc.pdb b/ipc/ipc.pdb index 83c7e03..b7765b7 100644 Binary files a/ipc/ipc.pdb and b/ipc/ipc.pdb differ diff --git a/ipc/ipc_mem.py b/ipc/ipc_mem.py new file mode 100644 index 0000000..3abb62e --- /dev/null +++ b/ipc/ipc_mem.py @@ -0,0 +1,62 @@ +#encoding = utf8 + +import logging +import os + +from ctypes import * + +current = os.path.dirname(__file__) +dynamic_path = os.path.join(current, 'ipc.dll') + + +class IPCMem: + def __init__(self, sender, receiver): + self.__ipc_obj = WinDLL(dynamic_path) + print(self.__ipc_obj) + if self.__ipc_obj is not None: + self.__ipc_obj.initialize.argtypes = [c_char_p, c_char_p] + self.__ipc_obj.initialize.restype = c_bool + print('IPCUtil init', sender.encode('utf-8'), receiver.encode('utf-8')) + self.__init = self.__ipc_obj.initialize(sender.encode('utf-8'), receiver.encode('utf-8')) + print('IPCUtil init', self.__init) + + def __del__(self): + print('IPCUtil __del__') + if self.__ipc_obj is None: + return + if self.__init: + self.__ipc_obj.uninitialize() + + def listen(self): + if not self.__init: + return False + self.__ipc_obj.listen.restype = c_bool + return self.__ipc_obj.listen() + + def send_text(self, data): + if not self.__init: + return False + self.__ipc_obj.send.argtypes = [c_char_p, c_uint] + self.__ipc_obj.send.restype = c_bool + send_data = data.encode('utf-8') + send_len = len(send_data) + 1 + if not self.__ipc_obj.send(send_data, send_len): + self.__ipc_obj.reConnect() + return True + + def send_binary(self, data, size): + if not self.__init: + return False + self.__ipc_obj.send.argtypes = [c_char_p, c_uint] + self.__ipc_obj.send.restype = c_bool + data_ptr = cast(data, c_char_p) + return self.__ipc_obj.send(data_ptr, size) + + def set_reader_callback(self, callback): + if not self.__init: + return False + CALLBACK_TYPE = CFUNCTYPE(None, c_char_p, c_uint) + self.c_callback = CALLBACK_TYPE(callback) # Store the callback to prevent garbage collection + self.__ipc_obj.setReaderCallback.argtypes = [CALLBACK_TYPE] + self.__ipc_obj.setReaderCallback.restype = c_bool + return self.__ipc_obj.setReaderCallback(self.c_callback) diff --git a/ipc/ipc_util.py b/ipc/ipc_util.py index 798536a..9ae5d80 100644 --- a/ipc/ipc_util.py +++ b/ipc/ipc_util.py @@ -38,7 +38,7 @@ class IPCUtil: self.__ipc_obj.send.restype = c_bool send_data = data.encode('utf-8') send_len = len(send_data) + 1 - if not self.__ipc_obj.send(send_data, send_len): + if not self.__ipc_obj.trySend(send_data, send_len): self.__ipc_obj.reConnect() return True @@ -48,7 +48,7 @@ class IPCUtil: self.__ipc_obj.send.argtypes = [c_char_p, c_uint] self.__ipc_obj.send.restype = c_bool data_ptr = cast(data, c_char_p) - return self.__ipc_obj.send(data_ptr, size) + return self.__ipc_obj.trySend(data_ptr, size) def set_reader_callback(self, callback): if not self.__init: diff --git a/main.py b/main.py index 258f3da..2a36bb6 100644 --- a/main.py +++ b/main.py @@ -15,4 +15,5 @@ if __name__ == '__main__': logger.info('------------start------------') render = IpcRender() render.run() + render.stop() logger.info('------------finish------------') \ No newline at end of file diff --git a/ui/ipc_render.py b/ui/ipc_render.py index ac0c90a..17c91d9 100644 --- a/ui/ipc_render.py +++ b/ui/ipc_render.py @@ -64,7 +64,8 @@ class IpcRender: logger.info('ipc render exit') def stop(self): - pass + if self._human_context is not None: + self._human_context.stop() def on_render(self, image): self._queue.put(image)