modify buffer size
This commit is contained in:
parent
4c9ec6831f
commit
5d07422eaa
@ -27,6 +27,7 @@ class SherpaNcnnAsr(AsrBase):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self._recognizer = self._create_recognizer()
|
self._recognizer = self._create_recognizer()
|
||||||
logger.info('SherpaNcnnAsr init')
|
logger.info('SherpaNcnnAsr init')
|
||||||
|
print('SherpaNcnnAsr init')
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.__del__()
|
self.__del__()
|
||||||
@ -60,17 +61,10 @@ class SherpaNcnnAsr(AsrBase):
|
|||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
last_result = ""
|
last_result = ""
|
||||||
logger.info(f'_recognize_loop')
|
logger.info(f'_recognize_loop')
|
||||||
while self._stop_event.is_set():
|
print(f'_recognize_loop')
|
||||||
logger.info(f'_recognize_loop000')
|
|
||||||
self._notify_complete('介绍中国5000年历史文学')
|
|
||||||
logger.info(f'_recognize_loop111')
|
|
||||||
segment_id += 1
|
|
||||||
time.sleep(60)
|
|
||||||
logger.info(f'_recognize_loop222')
|
|
||||||
logger.info(f'_recognize_loop exit')
|
|
||||||
'''
|
|
||||||
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
|
with sd.InputStream(channels=1, dtype="float32", samplerate=self._sample_rate) as s:
|
||||||
while not self._stop_event.is_set():
|
while self._stop_event.is_set():
|
||||||
samples, _ = s.read(self._samples_per_read) # a blocking read
|
samples, _ = s.read(self._samples_per_read) # a blocking read
|
||||||
samples = samples.reshape(-1)
|
samples = samples.reshape(-1)
|
||||||
self._recognizer.accept_waveform(self._sample_rate, samples)
|
self._recognizer.accept_waveform(self._sample_rate, samples)
|
||||||
@ -89,4 +83,13 @@ class SherpaNcnnAsr(AsrBase):
|
|||||||
self._notify_complete(result)
|
self._notify_complete(result)
|
||||||
segment_id += 1
|
segment_id += 1
|
||||||
self._recognizer.reset()
|
self._recognizer.reset()
|
||||||
|
'''
|
||||||
|
while self._stop_event.is_set():
|
||||||
|
logger.info(f'_recognize_loop000')
|
||||||
|
self._notify_complete('介绍中国5000年历史文学')
|
||||||
|
logger.info(f'_recognize_loop111')
|
||||||
|
segment_id += 1
|
||||||
|
time.sleep(60)
|
||||||
|
logger.info(f'_recognize_loop222')
|
||||||
|
logger.info(f'_recognize_loop exit')
|
||||||
'''
|
'''
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 452 KiB |
Binary file not shown.
Before Width: | Height: | Size: 258 KiB |
@ -75,7 +75,7 @@ class AudioInferenceHandler(AudioHandler):
|
|||||||
batch_size = self._context.batch_size
|
batch_size = self._context.batch_size
|
||||||
try:
|
try:
|
||||||
mel_batch = self._mal_queue.get(timeout=0.02)
|
mel_batch = self._mal_queue.get(timeout=0.02)
|
||||||
print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size())
|
# print('AudioInferenceHandler mel_batch:', len(mel_batch), 'size:', self._mal_queue.size())
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -100,7 +100,6 @@ class AudioInferenceHandler(AudioHandler):
|
|||||||
self.on_next_handle((None, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
|
self.on_next_handle((None, mirror_index(length, index), audio_frames[i * 2:i * 2 + 2]),
|
||||||
0)
|
0)
|
||||||
index = index + 1
|
index = index + 1
|
||||||
print('AudioInferenceHandler all silence')
|
|
||||||
else:
|
else:
|
||||||
logger.info('infer=======')
|
logger.info('infer=======')
|
||||||
t = time.perf_counter()
|
t = time.perf_counter()
|
||||||
|
@ -20,15 +20,16 @@ class AudioMalHandler(AudioHandler):
|
|||||||
|
|
||||||
EventBus().register('stop', self._on_stop)
|
EventBus().register('stop', self._on_stop)
|
||||||
|
|
||||||
self._queue = SyncQueue(context.batch_size, "AudioMalHandler_queue")
|
self._is_running = True
|
||||||
self._exit_event = Event()
|
self._queue = SyncQueue(context.batch_size * 2, "AudioMalHandler_queue")
|
||||||
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
|
|
||||||
self._exit_event.set()
|
|
||||||
self._thread.start()
|
|
||||||
|
|
||||||
self.frames = []
|
self.frames = []
|
||||||
self.chunk = context.sample_rate // context.fps
|
self.chunk = context.sample_rate // context.fps
|
||||||
self._is_running = True
|
|
||||||
|
self._exit_event = Event()
|
||||||
|
self._exit_event.set()
|
||||||
|
self._thread = Thread(target=self._on_run, name="AudioMalHandlerThread")
|
||||||
|
self._thread.start()
|
||||||
logger.info("AudioMalHandler init")
|
logger.info("AudioMalHandler init")
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
|
@ -42,8 +42,14 @@ class HumanRender(AudioHandler):
|
|||||||
def _on_run(self):
|
def _on_run(self):
|
||||||
logging.info('human render run')
|
logging.info('human render run')
|
||||||
while self._exit_event.is_set() and self._is_running:
|
while self._exit_event.is_set() and self._is_running:
|
||||||
|
# t = time.time()
|
||||||
self._run_step()
|
self._run_step()
|
||||||
time.sleep(0.038)
|
# delay = time.time() - t
|
||||||
|
delay = 0.03805 # - delay
|
||||||
|
# print(delay)
|
||||||
|
# if delay <= 0.0:
|
||||||
|
# continue
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
logging.info('human render exit')
|
logging.info('human render exit')
|
||||||
|
|
||||||
@ -53,7 +59,7 @@ class HumanRender(AudioHandler):
|
|||||||
if value is None:
|
if value is None:
|
||||||
return
|
return
|
||||||
res_frame, idx, audio_frames = value
|
res_frame, idx, audio_frames = value
|
||||||
print('voice render queue size', self._queue.size())
|
# print('render queue size', self._queue.size())
|
||||||
if not self._empty_log:
|
if not self._empty_log:
|
||||||
self._empty_log = True
|
self._empty_log = True
|
||||||
logging.info('render render:')
|
logging.info('render render:')
|
||||||
|
@ -55,7 +55,7 @@ class NLPBase(AsrObserver):
|
|||||||
def ask(self, question):
|
def ask(self, question):
|
||||||
logger.info(f'ask:{question}')
|
logger.info(f'ask:{question}')
|
||||||
self._is_running = True
|
self._is_running = True
|
||||||
task = self._ask_queue.add_task(self._request, question)
|
self._ask_queue.add_task(self._request, question)
|
||||||
logger.info(f'ask:{question} completed')
|
logger.info(f'ask:{question} completed')
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
@ -15,28 +15,9 @@ class VideoRender(BaseRender):
|
|||||||
def __init__(self, play_clock, context, human_render):
|
def __init__(self, play_clock, context, human_render):
|
||||||
super().__init__(play_clock, context, 'Video')
|
super().__init__(play_clock, context, 'Video')
|
||||||
self._human_render = human_render
|
self._human_render = human_render
|
||||||
self._diff_avg_count = 0
|
|
||||||
|
|
||||||
def render(self, frame, ps):
|
def render(self, frame, ps):
|
||||||
res_frame, idx, type_ = frame
|
res_frame, idx, type_ = frame
|
||||||
clock_time = self._play_clock.clock_time()
|
|
||||||
time_difference = clock_time - ps
|
|
||||||
if abs(time_difference) > self._play_clock.audio_diff_threshold:
|
|
||||||
if self._diff_avg_count < 5:
|
|
||||||
self._diff_avg_count += 1
|
|
||||||
else:
|
|
||||||
if time_difference < -self._play_clock.audio_diff_threshold:
|
|
||||||
sleep_time = abs(time_difference)
|
|
||||||
print("Video frame waiting to catch up with audio", sleep_time)
|
|
||||||
if sleep_time <= 1.0:
|
|
||||||
time.sleep(sleep_time)
|
|
||||||
|
|
||||||
# elif time_difference > self._play_clock.audio_diff_threshold: # 视频比音频快超过10ms
|
|
||||||
# print("Video frame dropped to catch up with audio")
|
|
||||||
# continue
|
|
||||||
|
|
||||||
else:
|
|
||||||
self._diff_avg_count = 0
|
|
||||||
|
|
||||||
if type_ == 0:
|
if type_ == 0:
|
||||||
combine_frame = self._context.frame_list_cycle[idx]
|
combine_frame = self._context.frame_list_cycle[idx]
|
||||||
|
@ -44,7 +44,8 @@ def main():
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
async def fetch_audio():
|
async def fetch_audio():
|
||||||
url = "http://localhost:8082/v1/audio/speech"
|
# url = "http://localhost:8082/v1/audio/speech"
|
||||||
|
url = "https://tts.mzzsfy.eu.org/v1/audio/speech"
|
||||||
data = {
|
data = {
|
||||||
"model": "tts-1",
|
"model": "tts-1",
|
||||||
"input": "写了一个高性能tts(文本转声音)工具,5千字仅需5秒,免费使用",
|
"input": "写了一个高性能tts(文本转声音)工具,5千字仅需5秒,免费使用",
|
||||||
|
@ -32,13 +32,13 @@ class TTSBase(NLPCallback):
|
|||||||
self._handle = value
|
self._handle = value
|
||||||
|
|
||||||
async def _request(self, txt: str, index):
|
async def _request(self, txt: str, index):
|
||||||
print('_request:', txt)
|
# print('_request:', txt)
|
||||||
t = time.time()
|
t = time.time()
|
||||||
stream = await self._on_request(txt)
|
stream = await self._on_request(txt)
|
||||||
if stream is None:
|
if stream is None:
|
||||||
print(f'-------stream is None')
|
logger.warn(f'-------stream is None')
|
||||||
return
|
return
|
||||||
print(f'-------tts time:{time.time() - t:.4f}s')
|
logger.info(f'-------tts time:{time.time() - t:.4f}s, txt:{txt}')
|
||||||
if self._handle is not None and self._is_running:
|
if self._handle is not None and self._is_running:
|
||||||
await self._on_handle(stream, index)
|
await self._on_handle(stream, index)
|
||||||
else:
|
else:
|
||||||
@ -59,13 +59,13 @@ class TTSBase(NLPCallback):
|
|||||||
def message(self, txt):
|
def message(self, txt):
|
||||||
txt = txt.strip()
|
txt = txt.strip()
|
||||||
if len(txt) == 0:
|
if len(txt) == 0:
|
||||||
logger.info(f'message is empty')
|
# logger.info(f'message is empty')
|
||||||
return
|
return
|
||||||
logger.info(f'message:{txt}')
|
logger.info(f'message:{txt}')
|
||||||
index = 0
|
index = 0
|
||||||
if self._handle is not None:
|
if self._handle is not None:
|
||||||
index = self._handle.get_index()
|
index = self._handle.get_index()
|
||||||
print(f'message txt-index:{txt}, index {index}')
|
# print(f'message txt-index:{txt}, index {index}')
|
||||||
self._message_queue.add_task(self._request, txt, index)
|
self._message_queue.add_task(self._request, txt, index)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
@ -22,7 +22,7 @@ class TTSEdgeHttp(TTSBase):
|
|||||||
logger.info(f"TTSEdge init, {voice}")
|
logger.info(f"TTSEdge init, {voice}")
|
||||||
|
|
||||||
async def _on_request(self, txt: str):
|
async def _on_request(self, txt: str):
|
||||||
print('TTSEdgeHttp, _on_request, txt:', txt)
|
logger.info(f'TTSEdgeHttp, _on_request, txt:{txt}')
|
||||||
data = {
|
data = {
|
||||||
"model": "tts-1",
|
"model": "tts-1",
|
||||||
"input": txt,
|
"input": txt,
|
||||||
@ -38,7 +38,7 @@ class TTSEdgeHttp(TTSBase):
|
|||||||
return stream
|
return stream
|
||||||
else:
|
else:
|
||||||
byte_stream = None
|
byte_stream = None
|
||||||
return byte_stream
|
return byte_stream, None
|
||||||
|
|
||||||
async def _on_handle(self, stream, index):
|
async def _on_handle(self, stream, index):
|
||||||
print('-------tts _on_handle')
|
print('-------tts _on_handle')
|
||||||
|
@ -55,7 +55,11 @@ class PyGameUI:
|
|||||||
if self._queue.empty():
|
if self._queue.empty():
|
||||||
return
|
return
|
||||||
image = self._queue.get()
|
image = self._queue.get()
|
||||||
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], "RGB")
|
color_format = "RGB"
|
||||||
|
if 4 == image.shape[2]:
|
||||||
|
color_format = "RGBA"
|
||||||
|
|
||||||
|
self._human_image = pygame.image.frombuffer(image.tobytes(), image.shape[1::-1], color_format)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
logger.info('stop')
|
logger.info('stop')
|
||||||
|
@ -45,7 +45,7 @@ class AsyncTaskQueue:
|
|||||||
|
|
||||||
await func(*args) # Execute async function
|
await func(*args) # Execute async function
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f'{self._name} error:', e)
|
logging.error(f'{self._name} error: {e}')
|
||||||
finally:
|
finally:
|
||||||
self._queue.task_done()
|
self._queue.task_done()
|
||||||
|
|
||||||
|
@ -194,7 +194,7 @@ def config_logging(file_name: str, console_level: int = logging.INFO, file_level
|
|||||||
|
|
||||||
console_handler = logging.StreamHandler()
|
console_handler = logging.StreamHandler()
|
||||||
console_handler.setFormatter(logging.Formatter(
|
console_handler.setFormatter(logging.Formatter(
|
||||||
'[%(asctime)s %(levelname)s] %(message)s',
|
'[%(asctime)s.%(msecs)03d %(levelname)s] %(message)s',
|
||||||
datefmt="%Y/%m/%d %H:%M:%S"
|
datefmt="%Y/%m/%d %H:%M:%S"
|
||||||
))
|
))
|
||||||
console_handler.setLevel(console_level)
|
console_handler.setLevel(console_level)
|
||||||
|
Loading…
Reference in New Issue
Block a user