diff --git a/action_handler/ppt/ppt_action.py b/action_handler/ppt/ppt_action.py index 4e8c398..7910b35 100644 --- a/action_handler/ppt/ppt_action.py +++ b/action_handler/ppt/ppt_action.py @@ -32,6 +32,7 @@ class PPTAction: self.ppt_controller = PPTController() self.ppt_controller.open(path) + self.ppt_controller.full_screen() def goto(self, *args, **kwargs): page = args[0] diff --git a/action_handler/ppt/ppt_controller.py b/action_handler/ppt/ppt_controller.py index f4405bb..43998a5 100644 --- a/action_handler/ppt/ppt_controller.py +++ b/action_handler/ppt/ppt_controller.py @@ -77,16 +77,16 @@ class PPTController: def has_active_presentation(self): return True if self.get_presentation_count() > 0 else False - -if __name__ == '__main__': - ppt = PPTControler() - ppt.open(r'D:\Project\LLV\pptAgnet\ppt_test.pptx') - time.sleep(2) - ppt.full_screen() - time.sleep(2) - ppt.goto_slide(6) - time.sleep(2) - ppt.next_page() - time.sleep(2) - ppt.pre_page() - time.sleep(2) +# +# if __name__ == '__main__': +# ppt = PPTControler() +# ppt.open(r'D:\Project\LLV\pptAgnet\ppt_test.pptx') +# time.sleep(2) +# ppt.full_screen() +# time.sleep(2) +# ppt.goto_slide(6) +# time.sleep(2) +# ppt.next_page() +# time.sleep(2) +# ppt.pre_page() +# time.sleep(2) diff --git a/data/ppt/ppt_test.pptx b/data/ppt/ppt_test.pptx new file mode 100644 index 0000000..7a570fa Binary files /dev/null and b/data/ppt/ppt_test.pptx differ diff --git a/data/ppt/test_ppt.pptx b/data/ppt/test_ppt.pptx new file mode 100644 index 0000000..51f8594 Binary files /dev/null and b/data/ppt/test_ppt.pptx differ diff --git a/human/human_context.py b/human/human_context.py index 6d55e8e..17c202f 100644 --- a/human/human_context.py +++ b/human/human_context.py @@ -6,7 +6,7 @@ from asr import SherpaNcnnAsr from eventbus import EventBus from .audio_inference_handler import AudioInferenceHandler from .audio_mal_handler import AudioMalHandler -from nlp import PunctuationSplit, DouBao, Kimi +from nlp import PunctuationSplit, DouBao, Kimi, PPT from tts import TTSEdge, TTSAudioSplitHandle, TTSEdgeHttp from utils import load_avatar, get_device, object_stop, load_avatar_from_processed, load_avatar_from_256_processed @@ -118,8 +118,9 @@ class HumanContext: self._tts_handle = TTSAudioSplitHandle(self, self._mal_handler) self._tts = TTSEdgeHttp(self._tts_handle) split = PunctuationSplit() - self._nlp = DouBao(self, split, self._tts) + # self._nlp = DouBao(self, split, self._tts) # self._nlp = Kimi(self, split, self._tts) + self._nlp = PPT(self, split, self._tts) self._asr = SherpaNcnnAsr() self._asr.attach(self._nlp) diff --git a/main.py b/main.py index cc0aba2..95b8d90 100644 --- a/main.py +++ b/main.py @@ -13,12 +13,14 @@ current_file_path = os.path.dirname(os.path.abspath(__file__)) if __name__ == '__main__': config_logging('./logs/info.log', logging.INFO, logging.INFO) - + print(current_file_path) logger.info('------------start------------') context = HumanContext() render = IpcRender(context) context.build(render) action_manger = ActionManager() + + # action_manger.ppt_action.open(f'{current_file_path}/data/ppt/test_ppt.pptx') render.run() render.stop() - logger.info('------------finish------------') \ No newline at end of file + logger.info('------------finish------------') diff --git a/nlp/__init__.py b/nlp/__init__.py index 9da22f8..57fa0cf 100644 --- a/nlp/__init__.py +++ b/nlp/__init__.py @@ -3,4 +3,5 @@ from .nlp_callback import NLPCallback from .nlp_doubao import DouBao from .nlp_kimi import Kimi +from .nlp_ppt import PPT from .nlp_split import PunctuationSplit diff --git a/nlp/nlp_base.py b/nlp/nlp_base.py index 377eb27..72eb520 100644 --- a/nlp/nlp_base.py +++ b/nlp/nlp_base.py @@ -1,11 +1,13 @@ #encoding = utf8 import logging +import os from asr import AsrObserver from eventbus import EventBus from utils import AsyncTaskQueue logger = logging.getLogger(__name__) +current_file_path = os.path.dirname(os.path.abspath(__file__)) class NLPBase(AsrObserver): @@ -16,6 +18,8 @@ class NLPBase(AsrObserver): self._callback = callback self._is_running = True + self._test_page = 1 + EventBus().register('stop', self.on_stop) EventBus().register('clear_cache', self.on_clear_cache) @@ -56,9 +60,18 @@ class NLPBase(AsrObserver): def completed(self, message: str): if not self._is_running: return + message = f'讲解第{self._test_page}页' + if self._test_page == 1: + message = '讲解一下汉代的女童教育' + EventBus().post('ppt_open', f'{current_file_path}/../data/ppt/test_20250103213237A001.pptx') + else: + EventBus().post('ppt_goto', self._test_page) + + self._test_page = self._test_page + 1 logger.info(f'complete:{message}') self.ask(message) + def ask(self, question): logger.info(f'ask:{question}') self._is_running = True diff --git a/nlp/nlp_ppt.py b/nlp/nlp_ppt.py new file mode 100644 index 0000000..003a69d --- /dev/null +++ b/nlp/nlp_ppt.py @@ -0,0 +1,113 @@ +# encoding = utf8 +import json +import logging +import time +import uuid + +import requests +# import websocket +from websocket import create_connection + +from nlp.nlp_base import NLPBase + +logger = logging.getLogger(__name__) + + +class PPTWebsocket: + def __init__(self, token): + self.__token = token + self._response = '' + self._requesting = False + headers = { + "Authorization": f"Bearer {token}", + } + header_list = [f"{key}: {value}" for key, value in headers.items()] + sid = uuid.uuid4() + url = f'ws://crm.yuekexinxi.com/llm/chat?agent_id=4&app_key=fjV0TjBd1Te9zMWVmp4UuUycHJ3YLEv2&session_id={sid}' + # websocket.enableTrace(True) + self._ws = create_connection( + url, + header=header_list + ) + result = self._ws.recv() + print("接收结果:", result) + + def __request(self, question): + print("获取连接状态:", self._ws.getstatus()) + self._requesting = True + self._ws.send(question) + + result = self._ws.recv() + self._requesting = False + print("接收结果:", result) + return json.loads(result) + + def request(self, question, handle, callback): + t = time.time() + self._requesting = True + logger.info(f'-------ppt ask:{question}') + self._response = self.__request(question) + content = self._response['content']['text'] + callback(content) + + self._requesting = False + logger.info(f'-------dou_bao nlp time:{time.time() - t:.4f}s') + + def close(self): + pass + # if self._response is not None and self._requesting: + # self._response.close() + + def aclose(self): + pass + # if self._response is not None and self._requesting: + # self._response.close() + logger.info('PPTWebsocket close') + + +class PPT(NLPBase): + def __init__(self, context, split, callback=None): + super().__init__(context, split, callback) + logger.info("PPT init") + self.__token = '' # 'c9635f9e-0f9e-4ca1-ac90-8af25a541b74' + self._ppt = None# PPTHttp(self.__token) + self._request_token() + + def _request_token(self): + url = 'http://crm.yuekexinxi.com/llm/platform/token' + data = { + "app_key": "fjV0TjBd1Te9zMWVmp4UuUycHJ3YLEv2", + "secret_key": "0Fhy1a9JFaSAiSuu7DdtrZG1Mcoz1Gpz" + } + response = requests.post(url, json=data, stream=True) + if response.status_code != 200: + logger.error(f'request token failed: {response.text}') + return + + try: + content = json.loads(response.text) + logger.info(content) + code = content.get('code') + if 200 != code: + logger.warning(f"content code {code} != 200") + return + self.__token = content.get('data') + self._ppt = PPTWebsocket(self.__token) + + except Exception as e: + logger.error(f'response error: {e}') + + def _request(self, question): + self._ppt.request(question, self._split_handle, self._on_callback) + + def _on_close(self): + if self._ppt is not None: + self._ppt.close() + logger.info('AsyncArk close') + + def on_clear_cache(self, *args, **kwargs): + super().on_clear_cache(*args, **kwargs) + if self._ppt is not None: + self._ppt.aclose() + logger.info('DouBao clear_cache') + diff --git a/test/test_nlp_only.py b/test/test_nlp_only.py index d8af213..f20dfd7 100644 --- a/test/test_nlp_only.py +++ b/test/test_nlp_only.py @@ -5,7 +5,7 @@ import os import time import requests -from nlp import PunctuationSplit, DouBao, NLPCallback +from nlp import PunctuationSplit, DouBao, NLPCallback, PPT from utils import config_logging logger = logging.getLogger(__name__) @@ -37,6 +37,9 @@ class DisplayNLP(NLPCallback): def on_message(self, txt: str): print(txt) + def on_clear(self): + pass + def main(): # 你的API_KEY @@ -65,11 +68,12 @@ def main(): # print("文件下载完成") split = PunctuationSplit() - nlp = DouBao(None, split, DisplayNLP()) - nlp.ask('你好') - nlp.ask('你是谁') - nlp.ask('能做什么') - time.sleep(5) + # nlp = DouBao(None, split, DisplayNLP()) + nlp = PPT(None, split, DisplayNLP()) + nlp.ask('讲解一下汉代的女童教育') + nlp.ask('讲解第二页') + nlp.ask('讲解第三页') + time.sleep(50) nlp.stop() print("stop")