Compare commits

...

3 Commits

Author SHA1 Message Date
9bbaa4618d modify nlp base 2025-01-09 22:39:28 +08:00
9a454810f1 modify ppt 2025-01-07 21:52:15 +08:00
b7f91e4820 add action manager 2024-12-24 07:37:03 +08:00
13 changed files with 309 additions and 10 deletions

View File

@ -0,0 +1,3 @@
#encoding = utf8
from .action_manager import ActionManager

View File

@ -0,0 +1,10 @@
#encoding = utf8
from action_handler.ppt import PPTAction
class ActionManager:
def __init__(self):
self.ppt_action = PPTAction()
def __del__(self):
pass

View File

@ -0,0 +1,4 @@
#encoding = utf8
from .ppt_controller import PPTController
from .ppt_action import PPTAction

View File

@ -0,0 +1,55 @@
#encoding = utf8
import logging
from action_handler.ppt import PPTController
from eventbus import EventBus
logger = logging.getLogger(__name__)
class PPTAction:
def __init__(self):
self.ppt_controller = None
EventBus().register("ppt_open", self.open)
EventBus().register("ppt_goto", self.goto)
EventBus().register("ppt_next", self.goto)
EventBus().register("ppt_pre", self.goto)
def __del__(self):
EventBus().unregister("ppt_open", self.open)
EventBus().unregister("ppt_goto", self.goto)
EventBus().unregister("ppt_next", self.goto)
EventBus().unregister("ppt_pre", self.goto)
del self.ppt_controller
def open(self, *args, **kwargs):
path = args[0]
if path is None or len(path) == 0:
logger.warning('path is empty')
return
if self.ppt_controller is not None:
del self.ppt_controller
self.ppt_controller = PPTController()
self.ppt_controller.open(path)
self.ppt_controller.full_screen()
def goto(self, *args, **kwargs):
page = args[0]
if page is None or self.ppt_controller is None:
logger.warning('page is none or ppt controller is none')
return
self.ppt_controller.goto_slide(page)
def next(self, *args, **kwargs):
if self.ppt_controller is None:
logger.warning('page is none or ppt controller is none')
return
self.ppt_controller.next_page()
def pre(self, *args, **kwargs):
if self.ppt_controller is None:
logger.warning('page is none or ppt controller is none')
return
self.ppt_controller.pre_page()

View File

@ -0,0 +1,92 @@
#encoding = utf8
import time
import logging
import win32com.client
import win32api
import win32con
import pythoncom
logger = logging.getLogger(__name__)
VK_CODE = {
'spacebar': 0x20,
'down_arrow': 0x28,
}
class PPTController:
def __init__(self):
pythoncom.CoInitialize()
self.app = win32com.client.Dispatch("PowerPoint.Application")
self.app.Visible = True
def __del__(self):
self.app.Quit()
pythoncom.CoUninitialize()
def open(self, path):
self.app.Presentations.Open(path)
def full_screen(self):
if self.has_active_presentation():
self.app.ActivePresentation.SlideShowSettings.Run()
return self.get_active_presentation_slide_index()
def click(self):
win32api.keybd_event(VK_CODE['spacebar'], 0, 0, 0)
win32api.keybd_event(VK_CODE['spacebar'], 0 , win32con.KEYEVENTF_KEYUP, 0)
return self.get_active_presentation_slide_index()
def goto_slide(self, index):
if self.has_active_presentation():
try:
self.app.ActiveWindow.View.GotoSlide(index)
return self.app.ActiveWindow.View.Slide.SlideIndex
except Exception as e:
self.app.SlideShowWindows(1).View.GotoSlide(index)
return self.app.SlideShowWindows(1).View.CurrentShowPosition
def next_page(self):
if self.has_active_presentation():
count = self.get_active_presentation_slide_count()
index = self.get_active_presentation_slide_index()
return index if index >= count else self.goto_slide(index+1)
def pre_page(self):
if self.has_active_presentation():
index = self.get_active_presentation_slide_index()
return index if index <= 1 else self.goto_slide(index-1)
def get_active_presentation_slide_index(self):
if self.has_active_presentation():
try:
index = self.app.ActiveWindow.View.Slide.SlideIndex
return index
except Exception as e:
print(e)
index = self.app.SlideShowWindows(1).View.CurrentShowPosition
return index
def get_active_presentation_slide_count(self):
return self.app.ActivePresentation.Slides.Count
def get_presentation_count(self):
return self.app.Presentations.Count
def has_active_presentation(self):
return True if self.get_presentation_count() > 0 else False
#
# if __name__ == '__main__':
# ppt = PPTControler()
# ppt.open(r'D:\Project\LLV\pptAgnet\ppt_test.pptx')
# time.sleep(2)
# ppt.full_screen()
# time.sleep(2)
# ppt.goto_slide(6)
# time.sleep(2)
# ppt.next_page()
# time.sleep(2)
# ppt.pre_page()
# time.sleep(2)

BIN
data/ppt/ppt_test.pptx Normal file

Binary file not shown.

BIN
data/ppt/test_ppt.pptx Normal file

Binary file not shown.

View File

@ -6,7 +6,7 @@ from asr import SherpaNcnnAsr
from eventbus import EventBus from eventbus import EventBus
from .audio_inference_handler import AudioInferenceHandler from .audio_inference_handler import AudioInferenceHandler
from .audio_mal_handler import AudioMalHandler from .audio_mal_handler import AudioMalHandler
from nlp import PunctuationSplit, DouBao, Kimi from nlp import PunctuationSplit, DouBao, Kimi, PPT
from tts import TTSEdge, TTSAudioSplitHandle, TTSEdgeHttp from tts import TTSEdge, TTSAudioSplitHandle, TTSEdgeHttp
from utils import load_avatar, get_device, object_stop, load_avatar_from_processed, load_avatar_from_256_processed from utils import load_avatar, get_device, object_stop, load_avatar_from_processed, load_avatar_from_256_processed
@ -118,8 +118,9 @@ class HumanContext:
self._tts_handle = TTSAudioSplitHandle(self, self._mal_handler) self._tts_handle = TTSAudioSplitHandle(self, self._mal_handler)
self._tts = TTSEdgeHttp(self._tts_handle) self._tts = TTSEdgeHttp(self._tts_handle)
split = PunctuationSplit() split = PunctuationSplit()
self._nlp = DouBao(self, split, self._tts) # self._nlp = DouBao(self, split, self._tts)
# self._nlp = Kimi(self, split, self._tts) # self._nlp = Kimi(self, split, self._tts)
self._nlp = PPT(self, split, self._tts)
self._asr = SherpaNcnnAsr() self._asr = SherpaNcnnAsr()
self._asr.attach(self._nlp) self._asr.attach(self._nlp)

View File

@ -3,6 +3,7 @@
import logging import logging
import os import os
from action_handler import ActionManager
from human import HumanContext from human import HumanContext
from ui import IpcRender from ui import IpcRender
from utils import config_logging from utils import config_logging
@ -12,11 +13,14 @@ current_file_path = os.path.dirname(os.path.abspath(__file__))
if __name__ == '__main__': if __name__ == '__main__':
config_logging('./logs/info.log', logging.INFO, logging.INFO) config_logging('./logs/info.log', logging.INFO, logging.INFO)
print(current_file_path)
logger.info('------------start------------') logger.info('------------start------------')
context = HumanContext() context = HumanContext()
render = IpcRender(context) render = IpcRender(context)
context.build(render) context.build(render)
action_manger = ActionManager()
# action_manger.ppt_action.open(f'{current_file_path}/data/ppt/test_ppt.pptx')
render.run() render.run()
render.stop() render.stop()
logger.info('------------finish------------') logger.info('------------finish------------')

View File

@ -3,4 +3,5 @@
from .nlp_callback import NLPCallback from .nlp_callback import NLPCallback
from .nlp_doubao import DouBao from .nlp_doubao import DouBao
from .nlp_kimi import Kimi from .nlp_kimi import Kimi
from .nlp_ppt import PPT
from .nlp_split import PunctuationSplit from .nlp_split import PunctuationSplit

View File

@ -1,11 +1,13 @@
#encoding = utf8 #encoding = utf8
import logging import logging
import os
from asr import AsrObserver from asr import AsrObserver
from eventbus import EventBus from eventbus import EventBus
from utils import AsyncTaskQueue from utils import AsyncTaskQueue
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
current_file_path = os.path.dirname(os.path.abspath(__file__))
class NLPBase(AsrObserver): class NLPBase(AsrObserver):
@ -16,6 +18,8 @@ class NLPBase(AsrObserver):
self._callback = callback self._callback = callback
self._is_running = True self._is_running = True
self._test_page = 1
EventBus().register('stop', self.on_stop) EventBus().register('stop', self.on_stop)
EventBus().register('clear_cache', self.on_clear_cache) EventBus().register('clear_cache', self.on_clear_cache)
@ -56,6 +60,14 @@ class NLPBase(AsrObserver):
def completed(self, message: str): def completed(self, message: str):
if not self._is_running: if not self._is_running:
return return
message = f'讲解第{self._test_page}'
if self._test_page == 1:
message = '讲解一下汉代的女童教育'
EventBus().post('ppt_open', f'{current_file_path}/../data/ppt/test_20250103213237A001.pptx')
else:
EventBus().post('ppt_goto', self._test_page)
self._test_page = self._test_page + 1
logger.info(f'complete:{message}') logger.info(f'complete:{message}')
self.ask(message) self.ask(message)

113
nlp/nlp_ppt.py Normal file
View File

@ -0,0 +1,113 @@
# encoding = utf8
import json
import logging
import time
import uuid
import requests
# import websocket
from websocket import create_connection
from nlp.nlp_base import NLPBase
logger = logging.getLogger(__name__)
class PPTWebsocket:
def __init__(self, token):
self.__token = token
self._response = ''
self._requesting = False
headers = {
"Authorization": f"Bearer {token}",
}
header_list = [f"{key}: {value}" for key, value in headers.items()]
sid = uuid.uuid4()
url = f'ws://crm.yuekexinxi.com/llm/chat?agent_id=4&app_key=fjV0TjBd1Te9zMWVmp4UuUycHJ3YLEv2&session_id={sid}'
# websocket.enableTrace(True)
self._ws = create_connection(
url,
header=header_list
)
result = self._ws.recv()
print("接收结果:", result)
def __request(self, question):
print("获取连接状态:", self._ws.getstatus())
self._requesting = True
self._ws.send(question)
result = self._ws.recv()
self._requesting = False
print("接收结果:", result)
return json.loads(result)
def request(self, question, handle, callback):
t = time.time()
self._requesting = True
logger.info(f'-------ppt ask:{question}')
self._response = self.__request(question)
content = self._response['content']['text']
callback(content)
self._requesting = False
logger.info(f'-------dou_bao nlp time:{time.time() - t:.4f}s')
def close(self):
pass
# if self._response is not None and self._requesting:
# self._response.close()
def aclose(self):
pass
# if self._response is not None and self._requesting:
# self._response.close()
logger.info('PPTWebsocket close')
class PPT(NLPBase):
def __init__(self, context, split, callback=None):
super().__init__(context, split, callback)
logger.info("PPT init")
self.__token = '' # 'c9635f9e-0f9e-4ca1-ac90-8af25a541b74'
self._ppt = None# PPTHttp(self.__token)
self._request_token()
def _request_token(self):
url = 'http://crm.yuekexinxi.com/llm/platform/token'
data = {
"app_key": "fjV0TjBd1Te9zMWVmp4UuUycHJ3YLEv2",
"secret_key": "0Fhy1a9JFaSAiSuu7DdtrZG1Mcoz1Gpz"
}
response = requests.post(url, json=data, stream=True)
if response.status_code != 200:
logger.error(f'request token failed: {response.text}')
return
try:
content = json.loads(response.text)
logger.info(content)
code = content.get('code')
if 200 != code:
logger.warning(f"content code {code} != 200")
return
self.__token = content.get('data')
self._ppt = PPTWebsocket(self.__token)
except Exception as e:
logger.error(f'response error: {e}')
def _request(self, question):
self._ppt.request(question, self._split_handle, self._on_callback)
def _on_close(self):
if self._ppt is not None:
self._ppt.close()
logger.info('AsyncArk close')
def on_clear_cache(self, *args, **kwargs):
super().on_clear_cache(*args, **kwargs)
if self._ppt is not None:
self._ppt.aclose()
logger.info('DouBao clear_cache')

View File

@ -5,7 +5,7 @@ import os
import time import time
import requests import requests
from nlp import PunctuationSplit, DouBao, NLPCallback from nlp import PunctuationSplit, DouBao, NLPCallback, PPT
from utils import config_logging from utils import config_logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -37,6 +37,9 @@ class DisplayNLP(NLPCallback):
def on_message(self, txt: str): def on_message(self, txt: str):
print(txt) print(txt)
def on_clear(self):
pass
def main(): def main():
# 你的API_KEY # 你的API_KEY
@ -65,11 +68,12 @@ def main():
# print("文件下载完成") # print("文件下载完成")
split = PunctuationSplit() split = PunctuationSplit()
nlp = DouBao(None, split, DisplayNLP()) # nlp = DouBao(None, split, DisplayNLP())
nlp.ask('你好') nlp = PPT(None, split, DisplayNLP())
nlp.ask('你是谁') nlp.ask('讲解一下汉代的女童教育')
nlp.ask('能做什么') nlp.ask('讲解第二页')
time.sleep(5) nlp.ask('讲解第三页')
time.sleep(50)
nlp.stop() nlp.stop()
print("stop") print("stop")