Compare commits

..

3 Commits

Author SHA1 Message Date
9bbaa4618d modify nlp base 2025-01-09 22:39:28 +08:00
9a454810f1 modify ppt 2025-01-07 21:52:15 +08:00
b7f91e4820 add action manager 2024-12-24 07:37:03 +08:00
13 changed files with 309 additions and 10 deletions

View File

@ -0,0 +1,3 @@
#encoding = utf8
from .action_manager import ActionManager

View File

@ -0,0 +1,10 @@
#encoding = utf8
from action_handler.ppt import PPTAction
class ActionManager:
def __init__(self):
self.ppt_action = PPTAction()
def __del__(self):
pass

View File

@ -0,0 +1,4 @@
#encoding = utf8
from .ppt_controller import PPTController
from .ppt_action import PPTAction

View File

@ -0,0 +1,55 @@
#encoding = utf8
import logging
from action_handler.ppt import PPTController
from eventbus import EventBus
logger = logging.getLogger(__name__)
class PPTAction:
def __init__(self):
self.ppt_controller = None
EventBus().register("ppt_open", self.open)
EventBus().register("ppt_goto", self.goto)
EventBus().register("ppt_next", self.goto)
EventBus().register("ppt_pre", self.goto)
def __del__(self):
EventBus().unregister("ppt_open", self.open)
EventBus().unregister("ppt_goto", self.goto)
EventBus().unregister("ppt_next", self.goto)
EventBus().unregister("ppt_pre", self.goto)
del self.ppt_controller
def open(self, *args, **kwargs):
path = args[0]
if path is None or len(path) == 0:
logger.warning('path is empty')
return
if self.ppt_controller is not None:
del self.ppt_controller
self.ppt_controller = PPTController()
self.ppt_controller.open(path)
self.ppt_controller.full_screen()
def goto(self, *args, **kwargs):
page = args[0]
if page is None or self.ppt_controller is None:
logger.warning('page is none or ppt controller is none')
return
self.ppt_controller.goto_slide(page)
def next(self, *args, **kwargs):
if self.ppt_controller is None:
logger.warning('page is none or ppt controller is none')
return
self.ppt_controller.next_page()
def pre(self, *args, **kwargs):
if self.ppt_controller is None:
logger.warning('page is none or ppt controller is none')
return
self.ppt_controller.pre_page()

View File

@ -0,0 +1,92 @@
#encoding = utf8
import time
import logging
import win32com.client
import win32api
import win32con
import pythoncom
logger = logging.getLogger(__name__)
VK_CODE = {
'spacebar': 0x20,
'down_arrow': 0x28,
}
class PPTController:
def __init__(self):
pythoncom.CoInitialize()
self.app = win32com.client.Dispatch("PowerPoint.Application")
self.app.Visible = True
def __del__(self):
self.app.Quit()
pythoncom.CoUninitialize()
def open(self, path):
self.app.Presentations.Open(path)
def full_screen(self):
if self.has_active_presentation():
self.app.ActivePresentation.SlideShowSettings.Run()
return self.get_active_presentation_slide_index()
def click(self):
win32api.keybd_event(VK_CODE['spacebar'], 0, 0, 0)
win32api.keybd_event(VK_CODE['spacebar'], 0 , win32con.KEYEVENTF_KEYUP, 0)
return self.get_active_presentation_slide_index()
def goto_slide(self, index):
if self.has_active_presentation():
try:
self.app.ActiveWindow.View.GotoSlide(index)
return self.app.ActiveWindow.View.Slide.SlideIndex
except Exception as e:
self.app.SlideShowWindows(1).View.GotoSlide(index)
return self.app.SlideShowWindows(1).View.CurrentShowPosition
def next_page(self):
if self.has_active_presentation():
count = self.get_active_presentation_slide_count()
index = self.get_active_presentation_slide_index()
return index if index >= count else self.goto_slide(index+1)
def pre_page(self):
if self.has_active_presentation():
index = self.get_active_presentation_slide_index()
return index if index <= 1 else self.goto_slide(index-1)
def get_active_presentation_slide_index(self):
if self.has_active_presentation():
try:
index = self.app.ActiveWindow.View.Slide.SlideIndex
return index
except Exception as e:
print(e)
index = self.app.SlideShowWindows(1).View.CurrentShowPosition
return index
def get_active_presentation_slide_count(self):
return self.app.ActivePresentation.Slides.Count
def get_presentation_count(self):
return self.app.Presentations.Count
def has_active_presentation(self):
return True if self.get_presentation_count() > 0 else False
#
# if __name__ == '__main__':
# ppt = PPTControler()
# ppt.open(r'D:\Project\LLV\pptAgnet\ppt_test.pptx')
# time.sleep(2)
# ppt.full_screen()
# time.sleep(2)
# ppt.goto_slide(6)
# time.sleep(2)
# ppt.next_page()
# time.sleep(2)
# ppt.pre_page()
# time.sleep(2)

BIN
data/ppt/ppt_test.pptx Normal file

Binary file not shown.

BIN
data/ppt/test_ppt.pptx Normal file

Binary file not shown.

View File

@ -6,7 +6,7 @@ from asr import SherpaNcnnAsr
from eventbus import EventBus
from .audio_inference_handler import AudioInferenceHandler
from .audio_mal_handler import AudioMalHandler
from nlp import PunctuationSplit, DouBao, Kimi
from nlp import PunctuationSplit, DouBao, Kimi, PPT
from tts import TTSEdge, TTSAudioSplitHandle, TTSEdgeHttp
from utils import load_avatar, get_device, object_stop, load_avatar_from_processed, load_avatar_from_256_processed
@ -118,8 +118,9 @@ class HumanContext:
self._tts_handle = TTSAudioSplitHandle(self, self._mal_handler)
self._tts = TTSEdgeHttp(self._tts_handle)
split = PunctuationSplit()
self._nlp = DouBao(self, split, self._tts)
# self._nlp = DouBao(self, split, self._tts)
# self._nlp = Kimi(self, split, self._tts)
self._nlp = PPT(self, split, self._tts)
self._asr = SherpaNcnnAsr()
self._asr.attach(self._nlp)

View File

@ -3,6 +3,7 @@
import logging
import os
from action_handler import ActionManager
from human import HumanContext
from ui import IpcRender
from utils import config_logging
@ -12,11 +13,14 @@ current_file_path = os.path.dirname(os.path.abspath(__file__))
if __name__ == '__main__':
config_logging('./logs/info.log', logging.INFO, logging.INFO)
print(current_file_path)
logger.info('------------start------------')
context = HumanContext()
render = IpcRender(context)
context.build(render)
action_manger = ActionManager()
# action_manger.ppt_action.open(f'{current_file_path}/data/ppt/test_ppt.pptx')
render.run()
render.stop()
logger.info('------------finish------------')

View File

@ -3,4 +3,5 @@
from .nlp_callback import NLPCallback
from .nlp_doubao import DouBao
from .nlp_kimi import Kimi
from .nlp_ppt import PPT
from .nlp_split import PunctuationSplit

View File

@ -1,11 +1,13 @@
#encoding = utf8
import logging
import os
from asr import AsrObserver
from eventbus import EventBus
from utils import AsyncTaskQueue
logger = logging.getLogger(__name__)
current_file_path = os.path.dirname(os.path.abspath(__file__))
class NLPBase(AsrObserver):
@ -16,6 +18,8 @@ class NLPBase(AsrObserver):
self._callback = callback
self._is_running = True
self._test_page = 1
EventBus().register('stop', self.on_stop)
EventBus().register('clear_cache', self.on_clear_cache)
@ -56,6 +60,14 @@ class NLPBase(AsrObserver):
def completed(self, message: str):
if not self._is_running:
return
message = f'讲解第{self._test_page}'
if self._test_page == 1:
message = '讲解一下汉代的女童教育'
EventBus().post('ppt_open', f'{current_file_path}/../data/ppt/test_20250103213237A001.pptx')
else:
EventBus().post('ppt_goto', self._test_page)
self._test_page = self._test_page + 1
logger.info(f'complete:{message}')
self.ask(message)

113
nlp/nlp_ppt.py Normal file
View File

@ -0,0 +1,113 @@
# encoding = utf8
import json
import logging
import time
import uuid
import requests
# import websocket
from websocket import create_connection
from nlp.nlp_base import NLPBase
logger = logging.getLogger(__name__)
class PPTWebsocket:
def __init__(self, token):
self.__token = token
self._response = ''
self._requesting = False
headers = {
"Authorization": f"Bearer {token}",
}
header_list = [f"{key}: {value}" for key, value in headers.items()]
sid = uuid.uuid4()
url = f'ws://crm.yuekexinxi.com/llm/chat?agent_id=4&app_key=fjV0TjBd1Te9zMWVmp4UuUycHJ3YLEv2&session_id={sid}'
# websocket.enableTrace(True)
self._ws = create_connection(
url,
header=header_list
)
result = self._ws.recv()
print("接收结果:", result)
def __request(self, question):
print("获取连接状态:", self._ws.getstatus())
self._requesting = True
self._ws.send(question)
result = self._ws.recv()
self._requesting = False
print("接收结果:", result)
return json.loads(result)
def request(self, question, handle, callback):
t = time.time()
self._requesting = True
logger.info(f'-------ppt ask:{question}')
self._response = self.__request(question)
content = self._response['content']['text']
callback(content)
self._requesting = False
logger.info(f'-------dou_bao nlp time:{time.time() - t:.4f}s')
def close(self):
pass
# if self._response is not None and self._requesting:
# self._response.close()
def aclose(self):
pass
# if self._response is not None and self._requesting:
# self._response.close()
logger.info('PPTWebsocket close')
class PPT(NLPBase):
def __init__(self, context, split, callback=None):
super().__init__(context, split, callback)
logger.info("PPT init")
self.__token = '' # 'c9635f9e-0f9e-4ca1-ac90-8af25a541b74'
self._ppt = None# PPTHttp(self.__token)
self._request_token()
def _request_token(self):
url = 'http://crm.yuekexinxi.com/llm/platform/token'
data = {
"app_key": "fjV0TjBd1Te9zMWVmp4UuUycHJ3YLEv2",
"secret_key": "0Fhy1a9JFaSAiSuu7DdtrZG1Mcoz1Gpz"
}
response = requests.post(url, json=data, stream=True)
if response.status_code != 200:
logger.error(f'request token failed: {response.text}')
return
try:
content = json.loads(response.text)
logger.info(content)
code = content.get('code')
if 200 != code:
logger.warning(f"content code {code} != 200")
return
self.__token = content.get('data')
self._ppt = PPTWebsocket(self.__token)
except Exception as e:
logger.error(f'response error: {e}')
def _request(self, question):
self._ppt.request(question, self._split_handle, self._on_callback)
def _on_close(self):
if self._ppt is not None:
self._ppt.close()
logger.info('AsyncArk close')
def on_clear_cache(self, *args, **kwargs):
super().on_clear_cache(*args, **kwargs)
if self._ppt is not None:
self._ppt.aclose()
logger.info('DouBao clear_cache')

View File

@ -5,7 +5,7 @@ import os
import time
import requests
from nlp import PunctuationSplit, DouBao, NLPCallback
from nlp import PunctuationSplit, DouBao, NLPCallback, PPT
from utils import config_logging
logger = logging.getLogger(__name__)
@ -37,6 +37,9 @@ class DisplayNLP(NLPCallback):
def on_message(self, txt: str):
print(txt)
def on_clear(self):
pass
def main():
# 你的API_KEY
@ -65,11 +68,12 @@ def main():
# print("文件下载完成")
split = PunctuationSplit()
nlp = DouBao(None, split, DisplayNLP())
nlp.ask('你好')
nlp.ask('你是谁')
nlp.ask('能做什么')
time.sleep(5)
# nlp = DouBao(None, split, DisplayNLP())
nlp = PPT(None, split, DisplayNLP())
nlp.ask('讲解一下汉代的女童教育')
nlp.ask('讲解第二页')
nlp.ask('讲解第三页')
time.sleep(50)
nlp.stop()
print("stop")