modify buffer size

This commit is contained in:
jiegeaiai 2024-11-08 07:27:00 +08:00
parent 4c9ec6831f
commit 5d07422eaa
14 changed files with 46 additions and 51 deletions

View File

@ -27,6 +27,7 @@ class SherpaNcnnAsr(AsrBase):
super().__init__()
self._recognizer = self._create_recognizer()
logger.info('SherpaNcnnAsr init')
print('SherpaNcnnAsr init')
def __del__(self):
self.__del__()
@ -60,17 +61,10 @@ class SherpaNcnnAsr(AsrBase):
time.sleep(3)
last_result = ""
logger.info(f'_recognize_loop')
while self._stop_event.is_set():
logger.info(f'_recognize_loop000')
self._notify_complete('介绍中国5000年历史文学')
logger.info(f'_recognize_loop111')
segment_id += 1
time.sleep(60)
logger.info(f'_recognize_loop222')
logger.info(f'_recognize_loop exit')
'''
print(f'_recognize_loop')
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
while not self._stop_event.is_set():
while self._stop_event.is_set():
samples, _ = s.read(self._samples_per_read) # a blocking read
samples = samples.reshape(-1)
self._recognizer.accept_waveform(self._sample_rate, samples)
@ -89,4 +83,13 @@ class SherpaNcnnAsr(AsrBase):
self._notify_complete(result)
segment_id += 1
self._recognizer.reset()
'''
while self._stop_event.is_set():
logger.info(f'_recognize_loop000')
self._notify_complete('介绍中国5000年历史文学')
logger.info(f'_recognize_loop111')
segment_id += 1
time.sleep(60)
logger.info(f'_recognize_loop222')
logger.info(f'_recognize_loop exit')
'''

Binary file not shown.

Before

Width:  |  Height:  |  Size: 452 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 258 KiB

View File

@ -75,7 +75,7 @@ class AudioInferenceHandler(AudioHandler):
batch_size = self._context.batch_size
try:
mel_batch = self._mal_queue.get(timeout=0.02)
print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size())
# print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size())
except queue.Empty:
continue
@ -100,7 +100,6 @@ class AudioInferenceHandler(AudioHandler):
self.on_next_handle((None, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
0)
index = index + 1
print('AudioInferenceHandler all silence')
else:
logger.info('infer=======')
t = time.perf_counter()

View File

@ -20,15 +20,16 @@ class AudioMalHandler(AudioHandler):
EventBus().register('stop', self._on_stop)
self._queue = SyncQueue(context.batch_size, "AudioMalHandler_queue")
self._exit_event = Event()
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
self._exit_event.set()
self._thread.start()
self._is_running = True
self._queue = SyncQueue(context.batch_size * 2, "AudioMalHandler_queue")
self.frames = []
self.chunk = context.sample_rate // context.fps
self._is_running = True
self._exit_event = Event()
self._exit_event.set()
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
self._thread.start()
logger.info("AudioMalHandler init")
def __del__(self):

View File

@ -42,8 +42,14 @@ class HumanRender(AudioHandler):
def _on_run(self):
logging.info('human render run')
while self._exit_event.is_set() and self._is_running:
# t = time.time()
self._run_step()
time.sleep(0.038)
# delay = time.time() - t
delay = 0.03805 # - delay
# print(delay)
# if delay <= 0.0:
# continue
time.sleep(delay)
logging.info('human render exit')
@ -53,7 +59,7 @@ class HumanRender(AudioHandler):
if value is None:
return
res_frame, idx, audio_frames = value
print('voice render queue size', self._queue.size())
# print('render queue size', self._queue.size())
if not self._empty_log:
self._empty_log = True
logging.info('render render:')

View File

@ -55,7 +55,7 @@ class NLPBase(AsrObserver):
def ask(self, question):
logger.info(f'ask:{question}')
self._is_running = True
task = self._ask_queue.add_task(self._request, question)
self._ask_queue.add_task(self._request, question)
logger.info(f'ask:{question} completed')
def stop(self):

View File

@ -15,28 +15,9 @@ class VideoRender(BaseRender):
def __init__(self, play_clock, context, human_render):
super().__init__(play_clock, context, 'Video')
self._human_render = human_render
self._diff_avg_count = 0
def render(self, frame, ps):
res_frame, idx, type_ = frame
clock_time = self._play_clock.clock_time()
time_difference = clock_time - ps
if abs(time_difference) > self._play_clock.audio_diff_threshold:
if self._diff_avg_count < 5:
self._diff_avg_count += 1
else:
if time_difference < -self._play_clock.audio_diff_threshold:
sleep_time = abs(time_difference)
print("Video frame waiting to catch up with audio", sleep_time)
if sleep_time <= 1.0:
time.sleep(sleep_time)
# elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms
# print("Video frame dropped to catch up with audio")
# continue
else:
self._diff_avg_count = 0
if type_ == 0:
combine_frame = self._context.frame_list_cycle[idx]

View File

@ -44,7 +44,8 @@ def main():
from io import BytesIO
async def fetch_audio():
url = "http://localhost:8082/v1/audio/speech"
# url = "http://localhost:8082/v1/audio/speech"
url = "https://tts.mzzsfy.eu.org/v1/audio/speech"
data = {
"model": "tts-1",
"input": "写了一个高性能tts(文本转声音)工具,5千字仅需5秒,免费使用",

View File

@ -32,13 +32,13 @@ class TTSBase(NLPCallback):
self._handle = value
async def _request(self, txt: str, index):
print('_request:', txt)
# print('_request:', txt)
t = time.time()
stream = await self._on_request(txt)
if stream is None:
print(f'-------stream is None')
logger.warn(f'-------stream is None')
return
print(f'-------tts time:{time.time() - t:.4f}s')
logger.info(f'-------tts time:{time.time() - t:.4f}s, txt:{txt}')
if self._handle is not None and self._is_running:
await self._on_handle(stream, index)
else:
@ -59,13 +59,13 @@ class TTSBase(NLPCallback):
def message(self, txt):
txt = txt.strip()
if len(txt) == 0:
logger.info(f'message is empty')
# logger.info(f'message is empty')
return
logger.info(f'message:{txt}')
index = 0
if self._handle is not None:
index = self._handle.get_index()
print(f'message txt-index:{txt}, index {index}')
# print(f'message txt-index:{txt}, index {index}')
self._message_queue.add_task(self._request, txt, index)
def stop(self):

View File

@ -22,7 +22,7 @@ class TTSEdgeHttp(TTSBase):
logger.info(f"TTSEdge init, {voice}")
async def _on_request(self, txt: str):
print('TTSEdgeHttp, _on_request, txt:', txt)
logger.info(f'TTSEdgeHttp, _on_request, txt:{txt}')
data = {
"model": "tts-1",
"input": txt,
@ -38,7 +38,7 @@ class TTSEdgeHttp(TTSBase):
return stream
else:
byte_stream = None
return byte_stream
return byte_stream, None
async def _on_handle(self, stream, index):
print('-------tts _on_handle')

View File

@ -55,7 +55,11 @@ class PyGameUI:
if self._queue.empty():
return
image = self._queue.get()
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], "RGB")
color_format = "RGB"
if 4 == image.shape[2]:
color_format = "RGBA"
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], color_format)
def stop(self):
logger.info('stop')

View File

@ -45,7 +45,7 @@ class AsyncTaskQueue:
await func(*args) # Execute async function
except Exception as e:
logging.error(f'{self._name} error:', e)
logging.error(f'{self._name} error: {e}')
finally:
self._queue.task_done()

View File

@ -194,7 +194,7 @@ def config_logging(file_name: str, console_level: int = logging.INFO, file_level
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
'[%(asctime)s %(levelname)s] %(message)s',
'[%(asctime)s.%(msecs)03d %(levelname)s] %(message)s',
datefmt="%Y/%m/%d %H:%M:%S"
))
console_handler.setLevel(console_level)