modify buffer size

This commit is contained in:
jiegeaiai 2024-11-08 07:27:00 +08:00
parent 4c9ec6831f
commit 5d07422eaa
14 changed files with 46 additions and 51 deletions

View File

@ -27,6 +27,7 @@ class SherpaNcnnAsr(AsrBase):
super().__init__() super().__init__()
self._recognizer = self._create_recognizer() self._recognizer = self._create_recognizer()
logger.info('SherpaNcnnAsr init') logger.info('SherpaNcnnAsr init')
print('SherpaNcnnAsr init')
def __del__(self): def __del__(self):
self.__del__() self.__del__()
@ -60,17 +61,10 @@ class SherpaNcnnAsr(AsrBase):
time.sleep(3) time.sleep(3)
last_result = "" last_result = ""
logger.info(f'_recognize_loop') logger.info(f'_recognize_loop')
while self._stop_event.is_set(): print(f'_recognize_loop')
logger.info(f'_recognize_loop000')
self._notify_complete('介绍中国5000年历史文学')
logger.info(f'_recognize_loop111')
segment_id += 1
time.sleep(60)
logger.info(f'_recognize_loop222')
logger.info(f'_recognize_loop exit')
'''
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s: with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
while not self._stop_event.is_set(): while self._stop_event.is_set():
samples, _ = s.read(self._samples_per_read) # a blocking read samples, _ = s.read(self._samples_per_read) # a blocking read
samples = samples.reshape(-1) samples = samples.reshape(-1)
self._recognizer.accept_waveform(self._sample_rate, samples) self._recognizer.accept_waveform(self._sample_rate, samples)
@ -89,4 +83,13 @@ class SherpaNcnnAsr(AsrBase):
self._notify_complete(result) self._notify_complete(result)
segment_id += 1 segment_id += 1
self._recognizer.reset() self._recognizer.reset()
'''
while self._stop_event.is_set():
logger.info(f'_recognize_loop000')
self._notify_complete('介绍中国5000年历史文学')
logger.info(f'_recognize_loop111')
segment_id += 1
time.sleep(60)
logger.info(f'_recognize_loop222')
logger.info(f'_recognize_loop exit')
''' '''

Binary file not shown.

Before

Width:  |  Height:  |  Size: 452 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 258 KiB

View File

@ -75,7 +75,7 @@ class AudioInferenceHandler(AudioHandler):
batch_size = self._context.batch_size batch_size = self._context.batch_size
try: try:
mel_batch = self._mal_queue.get(timeout=0.02) mel_batch = self._mal_queue.get(timeout=0.02)
print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size()) # print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size())
except queue.Empty: except queue.Empty:
continue continue
@ -100,7 +100,6 @@ class AudioInferenceHandler(AudioHandler):
self.on_next_handle((None, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]), self.on_next_handle((None, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
0) 0)
index = index + 1 index = index + 1
print('AudioInferenceHandler all silence')
else: else:
logger.info('infer=======') logger.info('infer=======')
t = time.perf_counter() t = time.perf_counter()

View File

@ -20,15 +20,16 @@ class AudioMalHandler(AudioHandler):
EventBus().register('stop', self._on_stop) EventBus().register('stop', self._on_stop)
self._queue = SyncQueue(context.batch_size, "AudioMalHandler_queue") self._is_running = True
self._exit_event = Event() self._queue = SyncQueue(context.batch_size * 2, "AudioMalHandler_queue")
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
self._exit_event.set()
self._thread.start()
self.frames = [] self.frames = []
self.chunk = context.sample_rate // context.fps self.chunk = context.sample_rate // context.fps
self._is_running = True
self._exit_event = Event()
self._exit_event.set()
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
self._thread.start()
logger.info("AudioMalHandler init") logger.info("AudioMalHandler init")
def __del__(self): def __del__(self):

View File

@ -42,8 +42,14 @@ class HumanRender(AudioHandler):
def _on_run(self): def _on_run(self):
logging.info('human render run') logging.info('human render run')
while self._exit_event.is_set() and self._is_running: while self._exit_event.is_set() and self._is_running:
# t = time.time()
self._run_step() self._run_step()
time.sleep(0.038) # delay = time.time() - t
delay = 0.03805 # - delay
# print(delay)
# if delay <= 0.0:
# continue
time.sleep(delay)
logging.info('human render exit') logging.info('human render exit')
@ -53,7 +59,7 @@ class HumanRender(AudioHandler):
if value is None: if value is None:
return return
res_frame, idx, audio_frames = value res_frame, idx, audio_frames = value
print('voice render queue size', self._queue.size()) # print('render queue size', self._queue.size())
if not self._empty_log: if not self._empty_log:
self._empty_log = True self._empty_log = True
logging.info('render render:') logging.info('render render:')

View File

@ -55,7 +55,7 @@ class NLPBase(AsrObserver):
def ask(self, question): def ask(self, question):
logger.info(f'ask:{question}') logger.info(f'ask:{question}')
self._is_running = True self._is_running = True
task = self._ask_queue.add_task(self._request, question) self._ask_queue.add_task(self._request, question)
logger.info(f'ask:{question} completed') logger.info(f'ask:{question} completed')
def stop(self): def stop(self):

View File

@ -15,28 +15,9 @@ class VideoRender(BaseRender):
def __init__(self, play_clock, context, human_render): def __init__(self, play_clock, context, human_render):
super().__init__(play_clock, context, 'Video') super().__init__(play_clock, context, 'Video')
self._human_render = human_render self._human_render = human_render
self._diff_avg_count = 0
def render(self, frame, ps): def render(self, frame, ps):
res_frame, idx, type_ = frame res_frame, idx, type_ = frame
clock_time = self._play_clock.clock_time()
time_difference = clock_time - ps
if abs(time_difference) > self._play_clock.audio_diff_threshold:
if self._diff_avg_count < 5:
self._diff_avg_count += 1
else:
if time_difference < -self._play_clock.audio_diff_threshold:
sleep_time = abs(time_difference)
print("Video frame waiting to catch up with audio", sleep_time)
if sleep_time <= 1.0:
time.sleep(sleep_time)
# elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms
# print("Video frame dropped to catch up with audio")
# continue
else:
self._diff_avg_count = 0
if type_ == 0: if type_ == 0:
combine_frame = self._context.frame_list_cycle[idx] combine_frame = self._context.frame_list_cycle[idx]

View File

@ -44,7 +44,8 @@ def main():
from io import BytesIO from io import BytesIO
async def fetch_audio(): async def fetch_audio():
url = "http://localhost:8082/v1/audio/speech" # url = "http://localhost:8082/v1/audio/speech"
url = "https://tts.mzzsfy.eu.org/v1/audio/speech"
data = { data = {
"model": "tts-1", "model": "tts-1",
"input": "写了一个高性能tts(文本转声音)工具,5千字仅需5秒,免费使用", "input": "写了一个高性能tts(文本转声音)工具,5千字仅需5秒,免费使用",

View File

@ -32,13 +32,13 @@ class TTSBase(NLPCallback):
self._handle = value self._handle = value
async def _request(self, txt: str, index): async def _request(self, txt: str, index):
print('_request:', txt) # print('_request:', txt)
t = time.time() t = time.time()
stream = await self._on_request(txt) stream = await self._on_request(txt)
if stream is None: if stream is None:
print(f'-------stream is None') logger.warn(f'-------stream is None')
return return
print(f'-------tts time:{time.time() - t:.4f}s') logger.info(f'-------tts time:{time.time() - t:.4f}s, txt:{txt}')
if self._handle is not None and self._is_running: if self._handle is not None and self._is_running:
await self._on_handle(stream, index) await self._on_handle(stream, index)
else: else:
@ -59,13 +59,13 @@ class TTSBase(NLPCallback):
def message(self, txt): def message(self, txt):
txt = txt.strip() txt = txt.strip()
if len(txt) == 0: if len(txt) == 0:
logger.info(f'message is empty') # logger.info(f'message is empty')
return return
logger.info(f'message:{txt}') logger.info(f'message:{txt}')
index = 0 index = 0
if self._handle is not None: if self._handle is not None:
index = self._handle.get_index() index = self._handle.get_index()
print(f'message txt-index:{txt}, index {index}') # print(f'message txt-index:{txt}, index {index}')
self._message_queue.add_task(self._request, txt, index) self._message_queue.add_task(self._request, txt, index)
def stop(self): def stop(self):

View File

@ -22,7 +22,7 @@ class TTSEdgeHttp(TTSBase):
logger.info(f"TTSEdge init, {voice}") logger.info(f"TTSEdge init, {voice}")
async def _on_request(self, txt: str): async def _on_request(self, txt: str):
print('TTSEdgeHttp, _on_request, txt:', txt) logger.info(f'TTSEdgeHttp, _on_request, txt:{txt}')
data = { data = {
"model": "tts-1", "model": "tts-1",
"input": txt, "input": txt,
@ -38,7 +38,7 @@ class TTSEdgeHttp(TTSBase):
return stream return stream
else: else:
byte_stream = None byte_stream = None
return byte_stream return byte_stream, None
async def _on_handle(self, stream, index): async def _on_handle(self, stream, index):
print('-------tts _on_handle') print('-------tts _on_handle')

View File

@ -55,7 +55,11 @@ class PyGameUI:
if self._queue.empty(): if self._queue.empty():
return return
image = self._queue.get() image = self._queue.get()
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], "RGB") color_format = "RGB"
if 4 == image.shape[2]:
color_format = "RGBA"
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], color_format)
def stop(self): def stop(self):
logger.info('stop') logger.info('stop')

View File

@ -45,7 +45,7 @@ class AsyncTaskQueue:
await func(*args) # Execute async function await func(*args) # Execute async function
except Exception as e: except Exception as e:
logging.error(f'{self._name} error:', e) logging.error(f'{self._name} error: {e}')
finally: finally:
self._queue.task_done() self._queue.task_done()

View File

@ -194,7 +194,7 @@ def config_logging(file_name: str, console_level: int = logging.INFO, file_level
console_handler = logging.StreamHandler() console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter( console_handler.setFormatter(logging.Formatter(
'[%(asctime)s %(levelname)s] %(message)s', '[%(asctime)s.%(msecs)03d %(levelname)s] %(message)s',
datefmt="%Y/%m/%d %H:%M:%S" datefmt="%Y/%m/%d %H:%M:%S"
)) ))
console_handler.setLevel(console_level) console_handler.setLevel(console_level)