Python librosa.cqt() Examples

The following are 12 code examples of librosa.cqt(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module librosa , or try the search function .
Example #1
Source File: feature_description.py    From Audio-Vision with MIT License 6 votes vote down vote up
def cqt(features,path,dataset=None):
    """
    This function extracts constant q-transform from audio.
    Make sure, you pass a dictionary containing all attributes
    and a path to audio.
    """    
    fsx = features['fs'][0]
    hop_length = features['hop_length'][0]
    n_bins = features['n_bins'][0]
    bins_per_octave = features['bins_per_octave'][0]
    window = features['window'][0]
    mono=features['mono'][0]
    wav, fs = read_audio('librosa',path,dataset)
    wav=convert_mono(wav,mono)
    if fs != fsx:
        raise Exception("Assertion Error. Sampling rate Found {} Expected {}".format(fs,fsx))
    X=librosa.cqt(y=wav, hop_length=hop_length,sr=fs, n_bins=n_bins, bins_per_octave=bins_per_octave,window=window)
    X=X.T
    X=np.abs(np.log10(X))
    return X


#def mfcc(features,path): 
Example #2
Source File: create_spectrograms.py    From tartarus with MIT License 5 votes vote down vote up
def compute_spec(audio_file,spectro_file):
	# Get actual audio
	audio, sr = librosa.load(audio_file, sr=config['resample_sr'])
	# Compute spectrogram
	if config['spectrogram_type']=='cqt':
		spec = librosa.cqt(audio, sr=sr, hop_length=config['hop'], n_bins=config['cqt_bins'])
	elif config['spectrogram_type']=='mel':
		spec = librosa.feature.melspectrogram(y=audio, sr=sr, hop_length=config['hop'],n_fft=config['n_fft'],n_mels=config['n_mels'])
	elif config['spectrogram_type']=='stft':
		spec = librosa.stft(y=audio,n_fft=config['n_fft'])
	# Write results:
	with open(spectro_file, "w") as f:
		pickle.dump(spec, f, protocol=-1) # spec shape: MxN. 
Example #3
Source File: cqt.py    From pumpp with ISC License 5 votes vote down vote up
def transform_audio(self, y):
        '''Compute the CQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                fmin=self.fmin,
                n_bins=(self.n_octaves * self.over_sample * 12),
                bins_per_octave=(self.over_sample * 12))

        C = fix_length(C, n_frames)

        cqtm, phase = magphase(C)
        if self.log:
            cqtm = amplitude_to_db(cqtm, ref=np.max)

        return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
                'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)} 
Example #4
Source File: cqt.py    From pumpp with ISC License 5 votes vote down vote up
def transform_audio(self, y):
        '''Compute the CQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                fmin=self.fmin,
                n_bins=(self.n_octaves * self.over_sample * 12),
                bins_per_octave=(self.over_sample * 12))

        C = fix_length(C, n_frames)

        cqtm, phase = magphase(C)
        if self.log:
            cqtm = amplitude_to_db(cqtm, ref=np.max)

        dphase = phase_diff(np.angle(phase).T[self.idx], self.conv)

        return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
                'dphase': to_dtype(dphase, self.dtype)} 
Example #5
Source File: cqt.py    From pumpp with ISC License 5 votes vote down vote up
def transform_audio(self, y):
        '''Compute the HCQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        cqtm, phase = [], []

        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        for h in self.harmonics:
            C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                    fmin=self.fmin * h,
                    n_bins=(self.n_octaves * self.over_sample * 12),
                    bins_per_octave=(self.over_sample * 12))

            C = fix_length(C, n_frames)

            C, P = magphase(C)
            if self.log:
                C = amplitude_to_db(C, ref=np.max)
            cqtm.append(C)
            phase.append(P)

        cqtm = to_dtype(np.asarray(cqtm), self.dtype)
        phase = to_dtype(np.angle(np.asarray(phase)), self.dtype)

        return {'mag': self._index(cqtm),
                'phase': self._index(phase)} 
Example #6
Source File: features.py    From msaf with MIT License 5 votes vote down vote up
def get_id(self):
        """Identifier of these features."""
        return "cqt" 
Example #7
Source File: features.py    From msaf with MIT License 5 votes vote down vote up
def compute_features(self):
        """Actual implementation of the features.

        Returns
        -------
        cqt: np.array(N, F)
            The features, each row representing a feature vector for a give
            time frame/beat.
        """
        linear_cqt = np.abs(librosa.cqt(
            self._audio, sr=self.sr, hop_length=self.hop_length,
            n_bins=self.n_bins, norm=self.norm, filter_scale=self.filter_scale)
                            ) ** 2
        cqt = librosa.amplitude_to_db(linear_cqt, ref=self.ref_power).T
        return cqt 
Example #8
Source File: datautils.py    From panotti with MIT License 5 votes vote down vote up
def make_melgram(mono_sig, sr, n_mels=128):   # @keunwoochoi upgraded form 96 to 128 mel bins in kapre
    #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig,  # latest librosa deprecated logamplitude in favor of amplitude_to_db
    #    sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]

    melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig,
        sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis]     # last newaxis is b/c tensorflow wants 'channels_last' order

    '''
    # librosa docs also include a perceptual CQT example:
    CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1'))
    freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1'))
    perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max)
    melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:]
    '''
    return melgram 
Example #9
Source File: cqt.py    From pumpp with ISC License 4 votes vote down vote up
def transform_audio(self, y):
        '''Compute the HCQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        cqtm, phase = [], []

        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        for h in self.harmonics:
            C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                    fmin=self.fmin * h,
                    n_bins=(self.n_octaves * self.over_sample * 12),
                    bins_per_octave=(self.over_sample * 12))

            C = fix_length(C, n_frames)

            C, P = magphase(C)
            if self.log:
                C = amplitude_to_db(C, ref=np.max)
            cqtm.append(C)
            phase.append(P)

        cqtm = to_dtype(np.asarray(cqtm), self.dtype)
        phase = np.angle(np.asarray(phase))

        dphase = to_dtype(phase_diff(self._index(phase), self.conv),
                          self.dtype)

        return {'mag': self._index(cqtm),
                'dphase': dphase} 
Example #10
Source File: features.py    From msaf with MIT License 4 votes vote down vote up
def __init__(self, file_struct, feat_type, sr=config.sample_rate,
                 hop_length=config.hop_size, n_bins=config.cqt.bins,
                 norm=config.cqt.norm, filter_scale=config.cqt.filter_scale,
                 ref_power=config.cqt.ref_power):
        """Constructor of the class.

        Parameters
        ----------
        file_struct: `msaf.input_output.FileStruct`
            Object containing the file paths from where to extract/read
            the features.
        feat_type: `FeatureTypes`
            Enum containing the type of features.
        sr: int > 0
            Sampling rate for the analysis.
        hop_length: int > 0
            Hop size in frames for the analysis.
        n_bins: int > 0
            Number of frequency bins for the CQT.
        norm: float
            Type of norm to use for basis function normalization.
        filter_scale: float
            The scale of the filter for the CQT.
        ref_power: str
            The reference power for logarithmic scaling.
            See `configdefaults.py` for the possible values.
        """
        # Init the parent
        super().__init__(file_struct=file_struct, sr=sr, hop_length=hop_length,
                         feat_type=feat_type)
        # Init the CQT parameters
        self.n_bins = n_bins
        self.norm = norm
        self.filter_scale = filter_scale
        if ref_power == "max":
            self.ref_power = np.max
        elif ref_power == "min":
            self.ref_power = np.min
        elif ref_power == "median":
            self.ref_power = np.median
        else:
            raise FeatureParamsError("Wrong value for ref_power") 
Example #11
Source File: utils.py    From time-domain-neural-audio-style-transfer with Apache License 2.0 4 votes vote down vote up
def rainbowgram(path,
                ax,
                peak=70.0,
                use_cqt=False,
                n_fft=1024,
                hop_length=256,
                sr=22050,
                over_sample=4,
                res_factor=0.8,
                octaves=5,
                notes_per_octave=10):
    audio = librosa.load(path, sr=sr)[0]
    if use_cqt:
        C = librosa.cqt(audio,
                        sr=sr,
                        hop_length=hop_length,
                        bins_per_octave=int(notes_per_octave * over_sample),
                        n_bins=int(octaves * notes_per_octave * over_sample),
                        filter_scale=res_factor,
                        fmin=librosa.note_to_hz('C2'))
    else:
        C = librosa.stft(
            audio,
            n_fft=n_fft,
            win_length=n_fft,
            hop_length=hop_length,
            center=True)
    mag, phase = librosa.core.magphase(C)
    phase_angle = np.angle(phase)
    phase_unwrapped = np.unwrap(phase_angle)
    dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
    dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
    mag = (librosa.logamplitude(
        mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1
    cdict = {
        'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0))
    }
    my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict)
    plt.register_cmap(cmap=my_mask)
    ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
    ax.matshow(mag[::-1, :], cmap=my_mask) 
Example #12
Source File: audio_utils.py    From hands-on-music-generation-with-magenta with MIT License 4 votes vote down vote up
def save_spectrogram_plot(audio: Any,
                          sample_rate: int = 16000,
                          filename: Optional[str] = None,
                          output_dir: str = "output") -> None:
  """
  Saves the spectrogram plot of the given audio to the given filename in
  the given output_dir. The resulting plot is a Constant-Q transform (CQT)
  spectrogram with the vertical axis being the amplitude converted to
  dB-scale.

  :param audio: the audio content, as a floating point time series
  :param sample_rate: the sampling rate of the file
  :param filename: the optional filename, set to "%Y-%m-%d_%H%M%S".png if None
  :param output_dir: the output dir
  """
  os.makedirs(output_dir, exist_ok=True)

  # Pitch min and max corresponds to the pitch min and max
  # of the wavenet training checkpoint
  pitch_min = np.min(36)
  pitch_max = np.max(84)
  frequency_min = librosa.midi_to_hz(pitch_min)
  frequency_max = 2 * librosa.midi_to_hz(pitch_max)
  octaves = int(np.ceil(np.log2(frequency_max) - np.log2(frequency_min)))
  bins_per_octave = 32
  num_bins = int(bins_per_octave * octaves)
  hop_length = 2048
  constant_q_transform = librosa.cqt(
    audio,
    sr=sample_rate,
    hop_length=hop_length,
    fmin=frequency_min,
    n_bins=num_bins,
    bins_per_octave=bins_per_octave)
  plt.figure()
  plt.axis("off")
  librosa.display.specshow(
    librosa.amplitude_to_db(constant_q_transform, ref=np.max),
    sr=sample_rate)

  if not filename:
    date_and_time = time.strftime("%Y-%m-%d_%H%M%S")
    filename = f"{date_and_time}.png"
  path = os.path.join(output_dir, filename)
  plt.savefig(fname=path, dpi=600)
  plt.close()