2024-10-09 12:19:00 +00:00
|
|
|
#encoding = utf8
|
|
|
|
import re
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
|
|
|
|
|
|
class NLPSplit(ABC):
|
|
|
|
@abstractmethod
|
|
|
|
def handle(self, message: str):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class PunctuationSplit(NLPSplit):
|
|
|
|
def __init__(self):
|
2024-10-10 11:01:13 +00:00
|
|
|
self._pattern = r'(?<!\d)[,.,。?!:;、]'
|
2024-10-09 12:19:00 +00:00
|
|
|
|
|
|
|
def handle(self, message: str):
|
2024-10-31 18:31:59 +00:00
|
|
|
message = message.replace('*', '')
|
2024-12-05 16:16:41 +00:00
|
|
|
message = message.replace('#', '')
|
2024-10-09 12:19:00 +00:00
|
|
|
match = re.search(self._pattern, message)
|
|
|
|
if match:
|
|
|
|
pos = match.start() + 1
|
|
|
|
msg = message[:pos]
|
|
|
|
msg = msg.strip()
|
|
|
|
message = message[pos:]
|
|
|
|
return message, msg
|
|
|
|
return message, ''
|