human/nlp/nlp_split.py

25 lines
573 B
Python
Raw Normal View History

2024-10-09 12:19:00 +00:00
#encoding = utf8
import re
from abc import ABC, abstractmethod
class NLPSplit(ABC):
@abstractmethod
def handle(self, message: str):
pass
class PunctuationSplit(NLPSplit):
def __init__(self):
2024-10-10 11:01:13 +00:00
self._pattern = r'(?<!\d)[,.,。?!:;、]'
2024-10-09 12:19:00 +00:00
def handle(self, message: str):
match = re.search(self._pattern, message)
if match:
pos = match.start() + 1
msg = message[:pos]
msg = msg.strip()
message = message[pos:]
return message, msg
return message, ''