Python librosa.cqt() Examples
The following are 12
code examples of librosa.cqt().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: feature_description.py From Audio-Vision with MIT License | 6 votes |
def cqt(features,path,dataset=None): """ This function extracts constant q-transform from audio. Make sure, you pass a dictionary containing all attributes and a path to audio. """ fsx = features['fs'][0] hop_length = features['hop_length'][0] n_bins = features['n_bins'][0] bins_per_octave = features['bins_per_octave'][0] window = features['window'][0] mono=features['mono'][0] wav, fs = read_audio('librosa',path,dataset) wav=convert_mono(wav,mono) if fs != fsx: raise Exception("Assertion Error. Sampling rate Found {} Expected {}".format(fs,fsx)) X=librosa.cqt(y=wav, hop_length=hop_length,sr=fs, n_bins=n_bins, bins_per_octave=bins_per_octave,window=window) X=X.T X=np.abs(np.log10(X)) return X #def mfcc(features,path):
Example #2
Source File: create_spectrograms.py From tartarus with MIT License | 5 votes |
def compute_spec(audio_file,spectro_file): # Get actual audio audio, sr = librosa.load(audio_file, sr=config['resample_sr']) # Compute spectrogram if config['spectrogram_type']=='cqt': spec = librosa.cqt(audio, sr=sr, hop_length=config['hop'], n_bins=config['cqt_bins']) elif config['spectrogram_type']=='mel': spec = librosa.feature.melspectrogram(y=audio, sr=sr, hop_length=config['hop'],n_fft=config['n_fft'],n_mels=config['n_mels']) elif config['spectrogram_type']=='stft': spec = librosa.stft(y=audio,n_fft=config['n_fft']) # Write results: with open(spectro_file, "w") as f: pickle.dump(spec, f, protocol=-1) # spec shape: MxN.
Example #3
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the CQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) cqtm, phase = magphase(C) if self.log: cqtm = amplitude_to_db(cqtm, ref=np.max) return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)}
Example #4
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the CQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) cqtm, phase = magphase(C) if self.log: cqtm = amplitude_to_db(cqtm, ref=np.max) dphase = phase_diff(np.angle(phase).T[self.idx], self.conv) return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 'dphase': to_dtype(dphase, self.dtype)}
Example #5
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the HCQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' cqtm, phase = [], [] n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) for h in self.harmonics: C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin * h, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) C, P = magphase(C) if self.log: C = amplitude_to_db(C, ref=np.max) cqtm.append(C) phase.append(P) cqtm = to_dtype(np.asarray(cqtm), self.dtype) phase = to_dtype(np.angle(np.asarray(phase)), self.dtype) return {'mag': self._index(cqtm), 'phase': self._index(phase)}
Example #6
Source File: features.py From msaf with MIT License | 5 votes |
def get_id(self): """Identifier of these features.""" return "cqt"
Example #7
Source File: features.py From msaf with MIT License | 5 votes |
def compute_features(self): """Actual implementation of the features. Returns ------- cqt: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ linear_cqt = np.abs(librosa.cqt( self._audio, sr=self.sr, hop_length=self.hop_length, n_bins=self.n_bins, norm=self.norm, filter_scale=self.filter_scale) ) ** 2 cqt = librosa.amplitude_to_db(linear_cqt, ref=self.ref_power).T return cqt
Example #8
Source File: datautils.py From panotti with MIT License | 5 votes |
def make_melgram(mono_sig, sr, n_mels=128): # @keunwoochoi upgraded form 96 to 128 mel bins in kapre #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig, # latest librosa deprecated logamplitude in favor of amplitude_to_db # sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:] melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig, sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis] # last newaxis is b/c tensorflow wants 'channels_last' order ''' # librosa docs also include a perceptual CQT example: CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1')) freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1')) perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max) melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:] ''' return melgram
Example #9
Source File: cqt.py From pumpp with ISC License | 4 votes |
def transform_audio(self, y): '''Compute the HCQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' cqtm, phase = [], [] n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) for h in self.harmonics: C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin * h, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) C, P = magphase(C) if self.log: C = amplitude_to_db(C, ref=np.max) cqtm.append(C) phase.append(P) cqtm = to_dtype(np.asarray(cqtm), self.dtype) phase = np.angle(np.asarray(phase)) dphase = to_dtype(phase_diff(self._index(phase), self.conv), self.dtype) return {'mag': self._index(cqtm), 'dphase': dphase}
Example #10
Source File: features.py From msaf with MIT License | 4 votes |
def __init__(self, file_struct, feat_type, sr=config.sample_rate, hop_length=config.hop_size, n_bins=config.cqt.bins, norm=config.cqt.norm, filter_scale=config.cqt.filter_scale, ref_power=config.cqt.ref_power): """Constructor of the class. Parameters ---------- file_struct: `msaf.input_output.FileStruct` Object containing the file paths from where to extract/read the features. feat_type: `FeatureTypes` Enum containing the type of features. sr: int > 0 Sampling rate for the analysis. hop_length: int > 0 Hop size in frames for the analysis. n_bins: int > 0 Number of frequency bins for the CQT. norm: float Type of norm to use for basis function normalization. filter_scale: float The scale of the filter for the CQT. ref_power: str The reference power for logarithmic scaling. See `configdefaults.py` for the possible values. """ # Init the parent super().__init__(file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type) # Init the CQT parameters self.n_bins = n_bins self.norm = norm self.filter_scale = filter_scale if ref_power == "max": self.ref_power = np.max elif ref_power == "min": self.ref_power = np.min elif ref_power == "median": self.ref_power = np.median else: raise FeatureParamsError("Wrong value for ref_power")
Example #11
Source File: utils.py From time-domain-neural-audio-style-transfer with Apache License 2.0 | 4 votes |
def rainbowgram(path, ax, peak=70.0, use_cqt=False, n_fft=1024, hop_length=256, sr=22050, over_sample=4, res_factor=0.8, octaves=5, notes_per_octave=10): audio = librosa.load(path, sr=sr)[0] if use_cqt: C = librosa.cqt(audio, sr=sr, hop_length=hop_length, bins_per_octave=int(notes_per_octave * over_sample), n_bins=int(octaves * notes_per_octave * over_sample), filter_scale=res_factor, fmin=librosa.note_to_hz('C2')) else: C = librosa.stft( audio, n_fft=n_fft, win_length=n_fft, hop_length=hop_length, center=True) mag, phase = librosa.core.magphase(C) phase_angle = np.angle(phase) phase_unwrapped = np.unwrap(phase_angle) dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1] dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi mag = (librosa.logamplitude( mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1 cdict = { 'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), 'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), 'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), 'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0)) } my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict) plt.register_cmap(cmap=my_mask) ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow) ax.matshow(mag[::-1, :], cmap=my_mask)
Example #12
Source File: audio_utils.py From hands-on-music-generation-with-magenta with MIT License | 4 votes |
def save_spectrogram_plot(audio: Any, sample_rate: int = 16000, filename: Optional[str] = None, output_dir: str = "output") -> None: """ Saves the spectrogram plot of the given audio to the given filename in the given output_dir. The resulting plot is a Constant-Q transform (CQT) spectrogram with the vertical axis being the amplitude converted to dB-scale. :param audio: the audio content, as a floating point time series :param sample_rate: the sampling rate of the file :param filename: the optional filename, set to "%Y-%m-%d_%H%M%S".png if None :param output_dir: the output dir """ os.makedirs(output_dir, exist_ok=True) # Pitch min and max corresponds to the pitch min and max # of the wavenet training checkpoint pitch_min = np.min(36) pitch_max = np.max(84) frequency_min = librosa.midi_to_hz(pitch_min) frequency_max = 2 * librosa.midi_to_hz(pitch_max) octaves = int(np.ceil(np.log2(frequency_max) - np.log2(frequency_min))) bins_per_octave = 32 num_bins = int(bins_per_octave * octaves) hop_length = 2048 constant_q_transform = librosa.cqt( audio, sr=sample_rate, hop_length=hop_length, fmin=frequency_min, n_bins=num_bins, bins_per_octave=bins_per_octave) plt.figure() plt.axis("off") librosa.display.specshow( librosa.amplitude_to_db(constant_q_transform, ref=np.max), sr=sample_rate) if not filename: date_and_time = time.strftime("%Y-%m-%d_%H%M%S") filename = f"{date_and_time}.png" path = os.path.join(output_dir, filename) plt.savefig(fname=path, dpi=600) plt.close()