human/nlp/nlp_split.py
2024-10-09 20:19:00 +08:00

25 lines
565 B
Python

#encoding = utf8
import re
from abc import ABC, abstractmethod
class NLPSplit(ABC):
@abstractmethod
def handle(self, message: str):
pass
class PunctuationSplit(NLPSplit):
def __init__(self):
self._pattern = r'[,。、;?!,.!?]'
def handle(self, message: str):
match = re.search(self._pattern, message)
if match:
pos = match.start() + 1
msg = message[:pos]
msg = msg.strip()
message = message[pos:]
return message, msg
return message, ''