1.8Kпросмотров
77.7%от подписчиков
18 декабря 2024 г.
Score: 1.9K
Перевод аудио в текст с помощью нейронной сети📌 Часть 2 audio = whisper.pad_or_trim(audio_13) #Number of samples in our trimmed/padded audio
n_samples = audio.shape[-1]
#Time of each sample
time = np.linspace(0,(n_samples-1)delta,n_samples) plt.figure(figsize=(20,10))
plt.title('Signal')
plt.plot(time,audio)
plt.ylabel('amplitude')
plt.xlabel('seconds')
plt.show() """Next, we can start plotting a mel spectogram by applying a log_mel_spectogram() funtion to our audio file. It converts the y-axis (frequency) into the mel scale:""" mel = whisper.log_mel_spectrogram(audio).to(model_m.device) fig, (ax1, ax2) = plt.subplots(2)
fig.tight_layout(pad=5.0)
ax1.plot(time,audio)
ax1.set_title('Signal')
ax1.set_xlabel('Time, seconds')
ax1.set_ylabel('Amplitude')
ax2.imshow((mel.numpy()mel.numpy())**(1/2),interpolation='nearest', aspect='auto')
ax2.set_title('Mel Spectrogram of a Signal')
ax2.set_xlabel('Time, seconds')
ax2.set_ylabel('Mel Scale') #Next, we can move on to language detection. #Language detection sr=22050
ipd.Audio(audio, rate=sr) probs = model_m.detect_language(mel) probs #Transcription file_path2= 'Rec.mp3' transcription = model_m.transcribe(file_path2, fp16 = False)['text'] transcription