Spaces:
Runtime error
Runtime error
| """Create spectrograms from audio files using matplotlib""" | |
| import logging | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import Tuple, Union | |
| import matplotlib as mpl | |
| mpl.use('Agg') | |
| import librosa | |
| import librosa.display | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import scipy.signal as signal | |
| logging.basicConfig(format='%(asctime)s: %(message)s', level=logging.INFO) | |
| class FFTConfig(): | |
| n_fft: Union[int, None] = 2**12 | |
| win_length: Union[int, None] = None | |
| hop_length: int = 512 | |
| sr: int = 22_050 | |
| db: bool = False | |
| mel: bool = False | |
| fmin: int = 50 | |
| fmax: int = 10_000 | |
| y_axis: str = 'linear' | |
| denoise: Union[str, None] = None | |
| pcen: bool = False | |
| cmap: str = 'magma' | |
| n_mels: int = 128 | |
| vmin: Union[float, None] = None | |
| vmax: Union[float, None] = None | |
| bandpass: bool = True | |
| ylim: Union[Tuple[float, float], None] = (0, 512) | |
| def load_wav(fpath): | |
| y, sr = librosa.load(fpath) | |
| audio, _ = librosa.effects.trim(y) | |
| return audio, sr | |
| def calc_stft(audio, fft_config): | |
| stft = librosa.stft(audio, n_fft=fft_config.n_fft, hop_length=fft_config.hop_length, win_length=fft_config.win_length) | |
| return np.abs(stft) | |
| def plot_spec(inp, output, fft_config: FFTConfig): | |
| # Audio returns sr and audio! (opposite of librosa) | |
| sr, audio = inp | |
| fft_config.sr = sr | |
| if fft_config.bandpass: | |
| audio = fish_filter(audio, fs=sr) | |
| stft = calc_stft(audio, fft_config) | |
| if fft_config.pcen: | |
| # Scale PCEN: https://librosa.org/doc/latest/generated/librosa.pcen.html?highlight=pcen#librosa.pcen | |
| stft = librosa.pcen(stft * (2**31), sr=fft_config.sr, hop_length=fft_config.hop_length) | |
| fft_config.db = True | |
| if fft_config.mel: | |
| stft = librosa.feature.melspectrogram( | |
| y=audio, | |
| sr=fft_config.sr, | |
| n_mels=fft_config.n_mels, | |
| fmin=fft_config.fmin, | |
| fmax=fft_config.fmax | |
| ) | |
| # Mel is in db | |
| fft_config.db = True | |
| if fft_config.db: | |
| stft = librosa.amplitude_to_db(stft, ref=np.max) | |
| fig, ax = plt.subplots(1, 1) | |
| _ = librosa.display.specshow( | |
| stft, | |
| sr=fft_config.sr, | |
| hop_length=fft_config.hop_length, | |
| x_axis='time', | |
| y_axis=fft_config.y_axis, | |
| fmin=fft_config.fmin, | |
| fmax=fft_config.fmax, | |
| cmap=fft_config.cmap, | |
| ax=ax, | |
| vmin=fft_config.vmin, | |
| vmax=fft_config.vmax | |
| ) | |
| ax.set_axis_off() | |
| if fft_config.ylim is not None: | |
| ax.set_ylim(fft_config.ylim) | |
| if output: | |
| fig.savefig(output, bbox_inches='tight', pad_inches=0) | |
| plt.close(fig=fig) | |
| plt.close('all') | |
| def fish_filter(call, low=50, high=512, order=8, fs=22_050): | |
| sos = signal.butter(order, [low, high], 'bandpass', output='sos', fs=fs) | |
| return signal.sosfilt(sos, call) | |