Compare commits

..

No commits in common. "64e3f339cdad4aad942e8e0e30b11f43b91d80ee" and "4c9ec6831f3ad88fd8350e7581ff136b3feecffe" have entirely different histories.

12 changed files with 51 additions and 46 deletions

View File

@ -27,7 +27,6 @@ class SherpaNcnnAsr(AsrBase):
super().__init__()
self._recognizer = self._create_recognizer()
logger.info('SherpaNcnnAsr init')
print('SherpaNcnnAsr init')
def __del__(self):
self.__del__()
@ -61,10 +60,17 @@ class SherpaNcnnAsr(AsrBase):
time.sleep(3)
last_result = ""
logger.info(f'_recognize_loop')
print(f'_recognize_loop')
while self._stop_event.is_set():
logger.info(f'_recognize_loop000')
self._notify_complete('介绍中国5000年历史文学')
logger.info(f'_recognize_loop111')
segment_id += 1
time.sleep(60)
logger.info(f'_recognize_loop222')
logger.info(f'_recognize_loop exit')
'''
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
while self._stop_event.is_set():
while not self._stop_event.is_set():
samples, _ = s.read(self._samples_per_read) # a blocking read
samples = samples.reshape(-1)
self._recognizer.accept_waveform(self._sample_rate, samples)
@ -83,13 +89,4 @@ class SherpaNcnnAsr(AsrBase):
self._notify_complete(result)
segment_id += 1
self._recognizer.reset()
'''
while self._stop_event.is_set():
logger.info(f'_recognize_loop000')
self._notify_complete('介绍中国5000年历史文学')
logger.info(f'_recognize_loop111')
segment_id += 1
time.sleep(60)
logger.info(f'_recognize_loop222')
logger.info(f'_recognize_loop exit')
'''

View File

@ -75,7 +75,7 @@ class AudioInferenceHandler(AudioHandler):
batch_size = self._context.batch_size
try:
mel_batch = self._mal_queue.get(timeout=0.02)
# print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size())
print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size())
except queue.Empty:
continue
@ -100,6 +100,7 @@ class AudioInferenceHandler(AudioHandler):
self.on_next_handle((None, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
0)
index = index + 1
print('AudioInferenceHandler all silence')
else:
logger.info('infer=======')
t = time.perf_counter()

View File

@ -20,16 +20,15 @@ class AudioMalHandler(AudioHandler):
EventBus().register('stop', self._on_stop)
self._is_running = True
self._queue = SyncQueue(context.batch_size * 2, "AudioMalHandler_queue")
self._queue = SyncQueue(context.batch_size, "AudioMalHandler_queue")
self._exit_event = Event()
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
self._exit_event.set()
self._thread.start()
self.frames = []
self.chunk = context.sample_rate // context.fps
self._exit_event = Event()
self._exit_event.set()
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
self._thread.start()
self._is_running = True
logger.info("AudioMalHandler init")
def __del__(self):

View File

@ -42,14 +42,8 @@ class HumanRender(AudioHandler):
def _on_run(self):
logging.info('human render run')
while self._exit_event.is_set() and self._is_running:
# t = time.time()
self._run_step()
# delay = time.time() - t
delay = 0.03805 # - delay
# print(delay)
# if delay <= 0.0:
# continue
time.sleep(delay)
time.sleep(0.038)
logging.info('human render exit')
@ -59,7 +53,7 @@ class HumanRender(AudioHandler):
if value is None:
return
res_frame, idx, audio_frames = value
# print('render queue size', self._queue.size())
print('voice render queue size', self._queue.size())
if not self._empty_log:
self._empty_log = True
logging.info('render render:')

View File

@ -55,7 +55,7 @@ class NLPBase(AsrObserver):
def ask(self, question):
logger.info(f'ask:{question}')
self._is_running = True
self._ask_queue.add_task(self._request, question)
task = self._ask_queue.add_task(self._request, question)
logger.info(f'ask:{question} completed')
def stop(self):

View File

@ -15,9 +15,28 @@ class VideoRender(BaseRender):
def __init__(self, play_clock, context, human_render):
super().__init__(play_clock, context, 'Video')
self._human_render = human_render
self._diff_avg_count = 0
def render(self, frame, ps):
res_frame, idx, type_ = frame
clock_time = self._play_clock.clock_time()
time_difference = clock_time - ps
if abs(time_difference) > self._play_clock.audio_diff_threshold:
if self._diff_avg_count < 5:
self._diff_avg_count += 1
else:
if time_difference < -self._play_clock.audio_diff_threshold:
sleep_time = abs(time_difference)
print("Video frame waiting to catch up with audio", sleep_time)
if sleep_time <= 1.0:
time.sleep(sleep_time)
# elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms
# print("Video frame dropped to catch up with audio")
# continue
else:
self._diff_avg_count = 0
if type_ == 0:
combine_frame = self._context.frame_list_cycle[idx]

View File

@ -44,8 +44,7 @@ def main():
from io import BytesIO
async def fetch_audio():
# url = "http://localhost:8082/v1/audio/speech"
url = "https://tts.mzzsfy.eu.org/v1/audio/speech"
url = "http://localhost:8082/v1/audio/speech"
data = {
"model": "tts-1",
"input": "写了一个高性能tts(文本转声音)工具,5千字仅需5秒,免费使用",

View File

@ -32,13 +32,13 @@ class TTSBase(NLPCallback):
self._handle = value
async def _request(self, txt: str, index):
# print('_request:', txt)
print('_request:', txt)
t = time.time()
stream = await self._on_request(txt)
if stream is None:
logger.warn(f'-------stream is None')
print(f'-------stream is None')
return
logger.info(f'-------tts time:{time.time() - t:.4f}s, txt:{txt}')
print(f'-------tts time:{time.time() - t:.4f}s')
if self._handle is not None and self._is_running:
await self._on_handle(stream, index)
else:
@ -59,13 +59,13 @@ class TTSBase(NLPCallback):
def message(self, txt):
txt = txt.strip()
if len(txt) == 0:
# logger.info(f'message is empty')
logger.info(f'message is empty')
return
logger.info(f'message:{txt}')
index = 0
if self._handle is not None:
index = self._handle.get_index()
# print(f'message txt-index:{txt}, index {index}')
print(f'message txt-index:{txt}, index {index}')
self._message_queue.add_task(self._request, txt, index)
def stop(self):

View File

@ -22,7 +22,7 @@ class TTSEdgeHttp(TTSBase):
logger.info(f"TTSEdge init, {voice}")
async def _on_request(self, txt: str):
logger.info(f'TTSEdgeHttp, _on_request, txt:{txt}')
print('TTSEdgeHttp, _on_request, txt:', txt)
data = {
"model": "tts-1",
"input": txt,
@ -38,7 +38,7 @@ class TTSEdgeHttp(TTSBase):
return stream
else:
byte_stream = None
return byte_stream, None
return byte_stream
async def _on_handle(self, stream, index):
print('-------tts _on_handle')

View File

@ -55,11 +55,7 @@ class PyGameUI:
if self._queue.empty():
return
image = self._queue.get()
color_format = "RGB"
if 4 == image.shape[2]:
color_format = "RGBA"
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], color_format)
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], "RGB")
def stop(self):
logger.info('stop')

View File

@ -45,7 +45,7 @@ class AsyncTaskQueue:
await func(*args) # Execute async function
except Exception as e:
logging.error(f'{self._name} error: {e}')
logging.error(f'{self._name} error:', e)
finally:
self._queue.task_done()

View File

@ -194,7 +194,7 @@ def config_logging(file_name: str, console_level: int = logging.INFO, file_level
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
'[%(asctime)s.%(msecs)03d %(levelname)s] %(message)s',
'[%(asctime)s %(levelname)s] %(message)s',
datefmt="%Y/%m/%d %H:%M:%S"
))
console_handler.setLevel(console_level)