modify audio
This commit is contained in:
parent
472a17f896
commit
3aec7b7103
103
Human.py
103
Human.py
@ -311,9 +311,9 @@ class Human:
|
||||
self.audio_chunks_queue_ = Queue()
|
||||
self._test_image_queue = Queue()
|
||||
#
|
||||
# self._thread = None
|
||||
# thread = threading.Thread(target=self.test)
|
||||
# thread.start()
|
||||
self._thread = None
|
||||
thread = threading.Thread(target=self.test)
|
||||
thread.start()
|
||||
# self.test()
|
||||
# self.play_pcm()
|
||||
|
||||
@ -339,23 +339,16 @@ class Human:
|
||||
# stream.close()
|
||||
# p.terminate()
|
||||
|
||||
def test(self):
|
||||
wav = audio.load_wav(r'./audio/audio.wav', 16000)
|
||||
# with open(r'./audio/test.wav', 'rb') as f:
|
||||
# byte_data = f.read()
|
||||
#
|
||||
# byte_data = byte_data[16:]
|
||||
# inputs = np.concatenate(byte_data) # [N * chunk]
|
||||
# wav = load_audio_from_bytes(inputs)
|
||||
print('wav length:', len(wav))
|
||||
mel = audio.melspectrogram(wav)
|
||||
def inter(self, model, chunks, face_list_cycle, face_det_results, out, j):
|
||||
inputs = np.concatenate(chunks) # [5 * chunk]
|
||||
mel = audio.melspectrogram(inputs)
|
||||
if np.isnan(mel.reshape(-1)).sum() > 0:
|
||||
raise ValueError(
|
||||
'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again')
|
||||
|
||||
mel_step_size = 16
|
||||
|
||||
print('fps:', self._fps)
|
||||
print('fps:', self._fps)
|
||||
mel_idx_multiplier = 80. / self._fps
|
||||
print('mel_idx_multiplier:', mel_idx_multiplier)
|
||||
i = 0
|
||||
@ -369,26 +362,6 @@ class Human:
|
||||
self.mel_chunks_queue_.put(mel[:, start_idx: start_idx + mel_step_size])
|
||||
i += 1
|
||||
|
||||
batch_size = 128
|
||||
print('batch_size:', batch_size, ' mel_chunks len:', self.mel_chunks_queue_.qsize())
|
||||
|
||||
face_images_path = r'./face/'
|
||||
face_images_path = utils.read_files_path(face_images_path)
|
||||
face_list_cycle = read_images(face_images_path)
|
||||
face_images_length = len(face_list_cycle)
|
||||
logging.info(f'face images length: {face_images_length}')
|
||||
print(f'face images length: {face_images_length}')
|
||||
|
||||
model = load_model(r'.\checkpoints\wav2lip.pth')
|
||||
print("Model loaded")
|
||||
|
||||
frame_h, frame_w = face_list_cycle[0].shape[:-1]
|
||||
# out = cv2.VideoWriter('temp/resul_tttt.avi',
|
||||
# cv2.VideoWriter_fourcc(*'DIVX'), 25, (frame_w, frame_h))
|
||||
|
||||
face_det_results = face_detect(face_list_cycle)
|
||||
|
||||
j = 0
|
||||
while not self.mel_chunks_queue_.empty():
|
||||
print("self.mel_chunks_queue_ len:", self.mel_chunks_queue_.qsize())
|
||||
m = self.mel_chunks_queue_.get()
|
||||
@ -411,12 +384,62 @@ class Human:
|
||||
j = j + 1
|
||||
p = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
|
||||
self._test_image_queue.put(p)
|
||||
# out.write(f)
|
||||
#
|
||||
# out.release()
|
||||
# command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format('./audio/audio1.wav', 'temp/resul_tttt.avi',
|
||||
# 'temp/resul_tttt.mp4')
|
||||
# subprocess.call(command, shell=platform.system() != 'Windows')
|
||||
out.write(f)
|
||||
return j
|
||||
|
||||
def test(self):
|
||||
batch_size = 128
|
||||
print('batch_size:', batch_size, ' mel_chunks len:', self.mel_chunks_queue_.qsize())
|
||||
|
||||
face_images_path = r'./face/'
|
||||
face_images_path = utils.read_files_path(face_images_path)
|
||||
face_list_cycle = read_images(face_images_path)
|
||||
face_images_length = len(face_list_cycle)
|
||||
logging.info(f'face images length: {face_images_length}')
|
||||
print(f'face images length: {face_images_length}')
|
||||
|
||||
model = load_model(r'.\checkpoints\wav2lip.pth')
|
||||
print("Model loaded")
|
||||
|
||||
frame_h, frame_w = face_list_cycle[0].shape[:-1]
|
||||
out = cv2.VideoWriter('temp/resul_tttt.avi',
|
||||
cv2.VideoWriter_fourcc(*'DIVX'), 25, (frame_w, frame_h))
|
||||
|
||||
face_det_results = face_detect(face_list_cycle)
|
||||
|
||||
audio_path = r'./temp/audio/chunk_0.wav'
|
||||
stream = audio.load_wav(audio_path, 16000)
|
||||
stream_len = stream.shape[0]
|
||||
print('wav length:', stream_len)
|
||||
_audio_chunk_queue = queue.Queue()
|
||||
index = 0
|
||||
chunk_len = 6400
|
||||
while stream_len >= chunk_len:
|
||||
audio_chunk = stream[index:index + chunk_len]
|
||||
_audio_chunk_queue.put(audio_chunk)
|
||||
stream_len -= chunk_len
|
||||
index += chunk_len
|
||||
if stream_len > 0:
|
||||
audio_chunk = stream[index:index + stream_len]
|
||||
_audio_chunk_queue.put(audio_chunk)
|
||||
index += stream_len
|
||||
stream_len -= stream_len
|
||||
|
||||
print('_audio_chunk_queue:', _audio_chunk_queue.qsize())
|
||||
|
||||
j = 0
|
||||
while not _audio_chunk_queue.empty():
|
||||
chunks = []
|
||||
length = min(5, _audio_chunk_queue.qsize())
|
||||
for i in range(length):
|
||||
chunks.append(_audio_chunk_queue.get())
|
||||
|
||||
j = self.inter(model, chunks, face_list_cycle, face_det_results, out, j)
|
||||
|
||||
out.release()
|
||||
command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format(audio_path, 'temp/resul_tttt.avi',
|
||||
'temp/resul_tttt.mp4')
|
||||
subprocess.call(command, shell=platform.system() != 'Windows')
|
||||
|
||||
|
||||
# gen = datagen(face_list_cycle, self.mel_chunks_queue_)
|
||||
|
1
audio.py
1
audio.py
@ -138,7 +138,6 @@ def _denormalize(D):
|
||||
return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
|
||||
|
||||
|
||||
|
||||
def load_audio(file_path, sr=16000):
|
||||
"""加载音频文件并返回音频数据和采样率"""
|
||||
wav, sr = librosa.load(file_path, sr=sr)
|
||||
|
@ -27,10 +27,14 @@ class Chunk2Mal:
|
||||
self._exit_event.set()
|
||||
self._thread.start()
|
||||
self._audio_render = AudioRender()
|
||||
self._stream_len = 0
|
||||
logging.info('chunk2mal start')
|
||||
|
||||
def _concatenate(self):
|
||||
logging.info('np.concatenate')
|
||||
if len(self._chunks) < 3:
|
||||
logging.info(f'np.concatenate: {len(self._chunks)}')
|
||||
return
|
||||
inputs = np.concatenate(self._chunks) # [5 * chunk]
|
||||
self._chunks = []
|
||||
mel = audio.melspectrogram(inputs)
|
||||
@ -54,10 +58,12 @@ class Chunk2Mal:
|
||||
self._human.push_mel_chunks_queue(mel[:, start_idx: start_idx + mel_step_size])
|
||||
i += 1
|
||||
|
||||
wav = np.concatenate(self._audio_chunks) # [5 * chunk]self._audio_chunks
|
||||
wav *= 32767 / max(0.01, np.max(np.abs(wav)))
|
||||
wav = wav.astype(np.int16)
|
||||
self._audio_render.write(wav, len(wav))
|
||||
# wav = np.concatenate(self._audio_chunks) # [5 * chunk]self._audio_chunks
|
||||
# print('_concatenate', len(wav))
|
||||
# audio.save_chunks([wav], 16000, "./temp/audio/")
|
||||
# wav *= 32767 / max(0.01, np.max(np.abs(wav)))
|
||||
# wav = wav.astype(np.int16)
|
||||
# self._audio_render.write(wav, len(wav))
|
||||
self._audio_chunks = []
|
||||
|
||||
print('mel_chunks count:', count)
|
||||
@ -74,11 +80,12 @@ class Chunk2Mal:
|
||||
try:
|
||||
chunk = self._audio_chunk_queue.get(block=True, timeout=1)
|
||||
self._chunks.append(chunk)
|
||||
self._stream_len = self._stream_len + len(chunk)
|
||||
print('Chunk2Mal _stream_len:', self._stream_len)
|
||||
self._audio_chunks.append(chunk.copy())
|
||||
# print(type(chunk))
|
||||
|
||||
# self._human.push_audio_frames(chunk, 0)
|
||||
if len(self._chunks) < 102: # 200ms
|
||||
if len(self._chunks) < 10: # 200ms
|
||||
continue
|
||||
except queue.Empty:
|
||||
# print('Chunk2Mal queue.Empty')
|
||||
|
@ -14,10 +14,9 @@ from io import BytesIO
|
||||
from queue import Queue
|
||||
from threading import Thread, Event
|
||||
|
||||
from IPython.core.display_functions import display
|
||||
from pydub import AudioSegment
|
||||
|
||||
import audio
|
||||
from audio_render import AudioRender
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -37,6 +36,7 @@ class TTSBase:
|
||||
# self._pcm_player = pyaudio.PyAudio()
|
||||
# self._pcm_stream = self._pcm_player.open(format=pyaudio.paInt16,
|
||||
# channels=1, rate=24000, output=True)
|
||||
# self._audio_render = AudioRender()
|
||||
logging.info('tts start')
|
||||
|
||||
def _on_run(self):
|
||||
@ -57,9 +57,18 @@ class TTSBase:
|
||||
|
||||
self._io_stream.seek(0)
|
||||
stream = self.__create_bytes_stream(self._io_stream)
|
||||
audio.save_chunks([stream], 16000, './temp/audio/')
|
||||
# wav = audio.split_audio(stream, 16000, 0.04)
|
||||
|
||||
# audio.save_chunks(wav, 16000, './temp/audio/')
|
||||
# audio.save_wav(stream, "./temp/audio/test1.wav", 16000)
|
||||
stream_len = stream.shape[0]
|
||||
|
||||
# wav = stream #np.concatenate(stream) # [5 * chunk]self._audio_chunks
|
||||
# print('_concatenate', len(wav))
|
||||
|
||||
# self._audio_chunks = []
|
||||
|
||||
print("stream_len:", stream_len, " _chunk_len:", self._chunk_len)
|
||||
index = 0
|
||||
segment = 0
|
||||
@ -69,6 +78,12 @@ class TTSBase:
|
||||
stream_len -= self._chunk_len
|
||||
index += self._chunk_len
|
||||
segment = segment + 1
|
||||
|
||||
if stream_len > 0:
|
||||
audio_chunk = stream[index:index + stream_len]
|
||||
self._human.push_audio_chunk(audio_chunk)
|
||||
segment = segment + 1
|
||||
|
||||
print("segment:", segment)
|
||||
self._io_stream.seek(0)
|
||||
self._io_stream.truncate()
|
||||
@ -84,7 +99,7 @@ class TTSBase:
|
||||
|
||||
if sample_rate != self._human.get_audio_sample_rate() and stream.shape[0] > 0:
|
||||
logger.warning(f'tts audio sample rate is {sample_rate}, resample to {self._human.get_audio_sample_rate() }')
|
||||
stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self._human.get_audio_sample_rate() )
|
||||
stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self._human.get_audio_sample_rate())
|
||||
|
||||
return stream
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user