25 lines
573 B
Python
25 lines
573 B
Python
#encoding = utf8
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
class NLPSplit(ABC):
|
|
@abstractmethod
|
|
def handle(self, message: str):
|
|
pass
|
|
|
|
|
|
class PunctuationSplit(NLPSplit):
|
|
def __init__(self):
|
|
self._pattern = r'(?<!\d)[,.,。?!:;、]'
|
|
|
|
def handle(self, message: str):
|
|
match = re.search(self._pattern, message)
|
|
if match:
|
|
pos = match.start() + 1
|
|
msg = message[:pos]
|
|
msg = msg.strip()
|
|
message = message[pos:]
|
|
return message, msg
|
|
return message, ''
|