Python Examples of librosa.cqt

Source File: feature_description.py From Audio-Vision with MIT License

6 votes

def cqt(features,path,dataset=None):
    """
    This function extracts constant q-transform from audio.
    Make sure, you pass a dictionary containing all attributes
    and a path to audio.
    """    
    fsx = features['fs'][0]
    hop_length = features['hop_length'][0]
    n_bins = features['n_bins'][0]
    bins_per_octave = features['bins_per_octave'][0]
    window = features['window'][0]
    mono=features['mono'][0]
    wav, fs = read_audio('librosa',path,dataset)
    wav=convert_mono(wav,mono)
    if fs != fsx:
        raise Exception("Assertion Error. Sampling rate Found {} Expected {}".format(fs,fsx))
    X=librosa.cqt(y=wav, hop_length=hop_length,sr=fs, n_bins=n_bins, bins_per_octave=bins_per_octave,window=window)
    X=X.T
    X=np.abs(np.log10(X))
    return X


#def mfcc(features,path):

Source File: create_spectrograms.py From tartarus with MIT License

5 votes

def compute_spec(audio_file,spectro_file):
	# Get actual audio
	audio, sr = librosa.load(audio_file, sr=config['resample_sr'])
	# Compute spectrogram
	if config['spectrogram_type']=='cqt':
		spec = librosa.cqt(audio, sr=sr, hop_length=config['hop'], n_bins=config['cqt_bins'])
	elif config['spectrogram_type']=='mel':
		spec = librosa.feature.melspectrogram(y=audio, sr=sr, hop_length=config['hop'],n_fft=config['n_fft'],n_mels=config['n_mels'])
	elif config['spectrogram_type']=='stft':
		spec = librosa.stft(y=audio,n_fft=config['n_fft'])
	# Write results:
	with open(spectro_file, "w") as f:
		pickle.dump(spec, f, protocol=-1) # spec shape: MxN.

Source File: cqt.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the CQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                fmin=self.fmin,
                n_bins=(self.n_octaves * self.over_sample * 12),
                bins_per_octave=(self.over_sample * 12))

        C = fix_length(C, n_frames)

        cqtm, phase = magphase(C)
        if self.log:
            cqtm = amplitude_to_db(cqtm, ref=np.max)

        return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
                'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)}

Source File: cqt.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the CQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                fmin=self.fmin,
                n_bins=(self.n_octaves * self.over_sample * 12),
                bins_per_octave=(self.over_sample * 12))

        C = fix_length(C, n_frames)

        cqtm, phase = magphase(C)
        if self.log:
            cqtm = amplitude_to_db(cqtm, ref=np.max)

        dphase = phase_diff(np.angle(phase).T[self.idx], self.conv)

        return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
                'dphase': to_dtype(dphase, self.dtype)}

Source File: cqt.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the HCQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        cqtm, phase = [], []

        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        for h in self.harmonics:
            C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                    fmin=self.fmin * h,
                    n_bins=(self.n_octaves * self.over_sample * 12),
                    bins_per_octave=(self.over_sample * 12))

            C = fix_length(C, n_frames)

            C, P = magphase(C)
            if self.log:
                C = amplitude_to_db(C, ref=np.max)
            cqtm.append(C)
            phase.append(P)

        cqtm = to_dtype(np.asarray(cqtm), self.dtype)
        phase = to_dtype(np.angle(np.asarray(phase)), self.dtype)

        return {'mag': self._index(cqtm),
                'phase': self._index(phase)}

Source File: features.py From msaf with MIT License

5 votes

def get_id(self):
        """Identifier of these features."""
        return "cqt"

Source File: features.py From msaf with MIT License

5 votes

def compute_features(self):
        """Actual implementation of the features.

        Returns
        -------
        cqt: np.array(N, F)
            The features, each row representing a feature vector for a give
            time frame/beat.
        """
        linear_cqt = np.abs(librosa.cqt(
            self._audio, sr=self.sr, hop_length=self.hop_length,
            n_bins=self.n_bins, norm=self.norm, filter_scale=self.filter_scale)
                            ) ** 2
        cqt = librosa.amplitude_to_db(linear_cqt, ref=self.ref_power).T
        return cqt

Source File: datautils.py From panotti with MIT License

5 votes

def make_melgram(mono_sig, sr, n_mels=128):   # @keunwoochoi upgraded form 96 to 128 mel bins in kapre
    #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig,  # latest librosa deprecated logamplitude in favor of amplitude_to_db
    #    sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]

    melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig,
        sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis]     # last newaxis is b/c tensorflow wants 'channels_last' order

    '''
    # librosa docs also include a perceptual CQT example:
    CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1'))
    freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1'))
    perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max)
    melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:]
    '''
    return melgram

Source File: cqt.py From pumpp with ISC License

4 votes

def transform_audio(self, y):
        '''Compute the HCQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        cqtm, phase = [], []

        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        for h in self.harmonics:
            C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                    fmin=self.fmin * h,
                    n_bins=(self.n_octaves * self.over_sample * 12),
                    bins_per_octave=(self.over_sample * 12))

            C = fix_length(C, n_frames)

            C, P = magphase(C)
            if self.log:
                C = amplitude_to_db(C, ref=np.max)
            cqtm.append(C)
            phase.append(P)

        cqtm = to_dtype(np.asarray(cqtm), self.dtype)
        phase = np.angle(np.asarray(phase))

        dphase = to_dtype(phase_diff(self._index(phase), self.conv),
                          self.dtype)

        return {'mag': self._index(cqtm),
                'dphase': dphase}

Source File: features.py From msaf with MIT License

4 votes

def __init__(self, file_struct, feat_type, sr=config.sample_rate,
                 hop_length=config.hop_size, n_bins=config.cqt.bins,
                 norm=config.cqt.norm, filter_scale=config.cqt.filter_scale,
                 ref_power=config.cqt.ref_power):
        """Constructor of the class.

        Parameters
        ----------
        file_struct: `msaf.input_output.FileStruct`
            Object containing the file paths from where to extract/read
            the features.
        feat_type: `FeatureTypes`
            Enum containing the type of features.
        sr: int > 0
            Sampling rate for the analysis.
        hop_length: int > 0
            Hop size in frames for the analysis.
        n_bins: int > 0
            Number of frequency bins for the CQT.
        norm: float
            Type of norm to use for basis function normalization.
        filter_scale: float
            The scale of the filter for the CQT.
        ref_power: str
            The reference power for logarithmic scaling.
            See `configdefaults.py` for the possible values.
        """
        # Init the parent
        super().__init__(file_struct=file_struct, sr=sr, hop_length=hop_length,
                         feat_type=feat_type)
        # Init the CQT parameters
        self.n_bins = n_bins
        self.norm = norm
        self.filter_scale = filter_scale
        if ref_power == "max":
            self.ref_power = np.max
        elif ref_power == "min":
            self.ref_power = np.min
        elif ref_power == "median":
            self.ref_power = np.median
        else:
            raise FeatureParamsError("Wrong value for ref_power")

Source File: utils.py From time-domain-neural-audio-style-transfer with Apache License 2.0

4 votes

def rainbowgram(path,
                ax,
                peak=70.0,
                use_cqt=False,
                n_fft=1024,
                hop_length=256,
                sr=22050,
                over_sample=4,
                res_factor=0.8,
                octaves=5,
                notes_per_octave=10):
    audio = librosa.load(path, sr=sr)[0]
    if use_cqt:
        C = librosa.cqt(audio,
                        sr=sr,
                        hop_length=hop_length,
                        bins_per_octave=int(notes_per_octave * over_sample),
                        n_bins=int(octaves * notes_per_octave * over_sample),
                        filter_scale=res_factor,
                        fmin=librosa.note_to_hz('C2'))
    else:
        C = librosa.stft(
            audio,
            n_fft=n_fft,
            win_length=n_fft,
            hop_length=hop_length,
            center=True)
    mag, phase = librosa.core.magphase(C)
    phase_angle = np.angle(phase)
    phase_unwrapped = np.unwrap(phase_angle)
    dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
    dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
    mag = (librosa.logamplitude(
        mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1
    cdict = {
        'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0))
    }
    my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict)
    plt.register_cmap(cmap=my_mask)
    ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
    ax.matshow(mag[::-1, :], cmap=my_mask)

Source File: audio_utils.py From hands-on-music-generation-with-magenta with MIT License

4 votes

def save_spectrogram_plot(audio: Any,
                          sample_rate: int = 16000,
                          filename: Optional[str] = None,
                          output_dir: str = "output") -> None:
  """
  Saves the spectrogram plot of the given audio to the given filename in
  the given output_dir. The resulting plot is a Constant-Q transform (CQT)
  spectrogram with the vertical axis being the amplitude converted to
  dB-scale.

  :param audio: the audio content, as a floating point time series
  :param sample_rate: the sampling rate of the file
  :param filename: the optional filename, set to "%Y-%m-%d_%H%M%S".png if None
  :param output_dir: the output dir
  """
  os.makedirs(output_dir, exist_ok=True)

  # Pitch min and max corresponds to the pitch min and max
  # of the wavenet training checkpoint
  pitch_min = np.min(36)
  pitch_max = np.max(84)
  frequency_min = librosa.midi_to_hz(pitch_min)
  frequency_max = 2 * librosa.midi_to_hz(pitch_max)
  octaves = int(np.ceil(np.log2(frequency_max) - np.log2(frequency_min)))
  bins_per_octave = 32
  num_bins = int(bins_per_octave * octaves)
  hop_length = 2048
  constant_q_transform = librosa.cqt(
    audio,
    sr=sample_rate,
    hop_length=hop_length,
    fmin=frequency_min,
    n_bins=num_bins,
    bins_per_octave=bins_per_octave)
  plt.figure()
  plt.axis("off")
  librosa.display.specshow(
    librosa.amplitude_to_db(constant_q_transform, ref=np.max),
    sr=sample_rate)

  if not filename:
    date_and_time = time.strftime("%Y-%m-%d_%H%M%S")
    filename = f"{date_and_time}.png"
  path = os.path.join(output_dir, filename)
  plt.savefig(fname=path, dpi=600)
  plt.close()

Python librosa.cqt() Examples