Python Examples of numpy.hamming

Source File: timitphonemerec.py From mist-rnns with Apache License 2.0

7 votes

def _mfcc_and_labels(audio, labels):
  """ Convert to MFCC features and corresponding (interpolated) labels.

  Returns:
    A tuple, `(mfcc_features, mfcc_labels)`. A 1-D float array and a 1-D int
      array, both with the same shape.
  """
  mfcc_sample_rate = 100.0
  winfunc = lambda x: np.hamming(x)
  mfcc_features = python_speech_features.mfcc(audio, samplerate=timit.SAMPLE_RATE, winlen=0.025,
                                              winstep=1.0/mfcc_sample_rate, lowfreq=85.0,
                                              highfreq=timit.SAMPLE_RATE/2, winfunc=winfunc)
  t_audio = np.linspace(0.0, audio.shape[0] * 1.0 / timit.SAMPLE_RATE, audio.size, endpoint=False)
  t_mfcc = np.linspace(0.0, mfcc_features.shape[0] * 1.0 / mfcc_sample_rate, mfcc_features.shape[0], endpoint=False)
  interp_func = scipy.interpolate.interp1d(t_audio, labels, kind='nearest')
  mfcc_labels = interp_func(t_mfcc)
  return mfcc_features, mfcc_labels

Source File: smooth_and_interp.py From wavelet_prosody_toolkit with MIT License

6 votes

def smooth(params, win, type="HAMMING"):

    """
    gaussian type smoothing, convolution with hamming window
    """
    win = int(win+0.5)
    if win >= len(params)-1:
        win = len(params)-1

    if win % 2 == 0:
        win += 1

    s = np.r_[params[win-1:0:-1], params, params[-1:-win:-1]]

    if type == "HAMMING":
        w = np.hamming(win)
        # third = int(win/3)
        # w[:third] = 0
    else:
        w = np.ones(win)

    y = np.convolve(w/w.sum(), s, mode='valid')
    return y[int(win/2):-int(win/2)]

Source File: window.py From spectrum with BSD 3-Clause "New" or "Revised" License

6 votes

def plot_window(self):
        """Plot the window in the time domain

        .. plot::
            :width: 80%
            :include-source:

            from spectrum.window import Window
            w = Window(64, name='hamming')
            w.plot_window()

        """
        from pylab import plot, xlim, grid, title, ylabel, axis
        x = linspace(0, 1, self.N)
        xlim(0, 1)
        plot(x, self.data)
        grid(True)
        title('%s Window (%s points)' % (self.name.capitalize(), self.N))
        ylabel('Amplitude')
        axis([0, 1, 0, 1.1])

Source File: window.py From spectrum with BSD 3-Clause "New" or "Revised" License

6 votes

def window_hamming(N):
    r"""Hamming window

    :param N: window length


    The Hamming window is defined as

    .. math:: 0.54 -0.46 \cos\left(\frac{2\pi n}{N-1}\right)
               \qquad 0 \leq n \leq M-1

    .. plot::
        :width: 80%
        :include-source:

        from spectrum import window_visu
        window_visu(64, 'hamming')

    .. seealso:: numpy.hamming, :func:`create_window`, :class:`Window`.
    """
    from numpy import hamming
    return hamming(N)

Source File: base.py From python_kaldi_features with MIT License

6 votes

def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=40,nfft=512,lowfreq=64,highfreq=None,dither=1.0,remove_dc_offset=True,preemph=0.97,wintype='hamming'):
    """Compute log Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
    """
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,dither, remove_dc_offset,preemph,wintype)
    return numpy.log(feat)

Source File: pyQHM.py From AMFM_decompy with MIT License

6 votes

def __init__(self, window_duration, fs):
        self.dur = window_duration         # in seconds
        self.length = int(self.dur*fs+1)
        if not self.length %2:
            self.length -= 1
        self.data = np.hamming(self.length)
        self.data2 = self.data**2
        self.N = int(self.dur*fs/2)
        self.half_len_vec = np.arange(self.N+1)
        self.len_vec = np.arange(-self.N, self.N+1)

        self.a0 = 0.54**2 + (0.46**2)/2
        self.a1 = 0.54*0.46
        self.a2 = (0.46**2)/4

        self.R0_diag = R_eq(0, g0, self)
        self.R2_diag = sum(self.data2*(self.len_vec**2))

Source File: prepare_data.py From music_transcription_MAPS with MIT License

6 votes

def spectrogram(audio):
    """Calculate magnitude spectrogram of an audio sequence. 
    
    Args: 
      audio: 1darray, audio sequence. 
      
    Returns:
      x: ndarray, spectrogram (n_time, n_freq)
    """
    n_window = cfg.n_window
    n_overlap = cfg.n_overlap
    
    ham_win = np.hamming(n_window)
    [f, t, x] = signal.spectral.spectrogram(
                    audio, 
                    window=ham_win,
                    nperseg=n_window, 
                    noverlap=n_overlap, 
                    detrend=False, 
                    return_onesided=True, 
                    mode='magnitude') 
    x = x.T
    x = x.astype(np.float32)
    return x

Source File: ShortTermFeatures.py From pyAudioAnalysis with Apache License 2.0

6 votes

def phormants(x, sampling_rate):
    N = len(x)
    w = np.hamming(N)

    # Apply window and high pass filter.
    x1 = x * w
    x1 = lfilter([1], [1., 0.63], x1)

    # Get LPC.
    ncoeff = 2 + sampling_rate / 1000
    A, e, k = lpc(x1, ncoeff)
    # A, e, k = lpc(x1, 8)

    # Get roots.
    rts = np.roots(A)
    rts = [r for r in rts if np.imag(r) >= 0]

    # Get angles.
    angz = np.arctan2(np.imag(rts), np.real(rts))

    # Get frequencies.
    frqs = sorted(angz * (sampling_rate / (2 * math.pi)))

    return frqs

Source File: freq_analysis.py From pykaldi2 with MIT License

6 votes

def __init__(self, config=None, fs=16000, fft_size=512, frame_len=400, frame_shift=160, window='hamming', do_dither=True, dc_removal=False, use_gpu=False):
        self.fs = fs
        self.fft_size = fft_size
        self.frame_len = frame_len
        self.frame_shift = frame_shift
        self.window = window
        self.do_dither = do_dither
        self.dc_removal = dc_removal
        self.use_gpu = use_gpu

        if config is not None:
            for attr in config:
                setattr(self, attr, config[attr])

        self.n_bin = self.fft_size/2+1
        self.frame_overlap = self.frame_len - self.frame_shift

Source File: sr_dataset.py From pykaldi2 with MIT License

6 votes

def _logfbank_extractor(self, wav):
        # typical log fbank extraction for 16kHz speech data
        preemphasis = 0.96

        t1 = np.sum(self._window, 0)
        t1[t1 == 0] = -1
        inv = np.diag(1 / t1)
        mel = self._window.dot(inv).T

        wav = wav[1:] - preemphasis * wav[:-1]
        S = stft(wav, n_fft=512, hop_length=160, win_length=400, window=np.hamming(400), center=False).T

        spec_mag = np.abs(S)
        spec_power = spec_mag ** 2
        fbank_power = spec_power.T.dot(mel * 32768 ** 2) + 1
        log_fbank = np.log(fbank_power)

        return log_fbank

Source File: helper_classes.py From rapidtide with Apache License 2.0

6 votes

def __init__(self,
                 Fs=0.0,
                 corrorigin=0,
                 lagmininpts=0,
                 lagmaxinpts=0,
                 ncprefilter=None,
                 reftc=None,
                 detrendorder=1,
                 windowfunc='hamming',
                 corrweighting='none'):
        self.Fs = Fs
        self.corrorigin = corrorigin
        self.lagmininpts = lagmininpts
        self.lagmaxinpts = lagmaxinpts
        self.ncprefilter = ncprefilter
        self.reftc = reftc
        self.detrendorder = detrendorder
        self.windowfunc = windowfunc
        if self.windowfunc is not None:
            self.usewindowfunc = True
        else:
            self.usewindowfunc = False
        self.corrweighting = corrweighting
        if self.reftc is not None:
            self.setreftc(self.reftc)

Source File: iqplot.py From iqtool with The Unlicense

6 votes

def plotSpectrogram(data, fftWindow, fftSize, Fs):

    if fftSize == None:
        N = len(data)
    else:
        N = fftSize    
    
    if Fs == None:
        Fs = 2
    
    if fftWindow == "rectangular":
        plt.specgram(data, NFFT=N, Fs=Fs, 
        window=lambda data: data*np.ones(len(data)),  noverlap=int(N/10))
    elif fftWindow == "bartlett":
        plt.specgram(data, NFFT=N, Fs=Fs, 
        window=lambda data: data*np.bartlett(len(data)),  noverlap=int(N/10))
    elif args.fftWindow == "blackman":
        plt.specgram(data, NFFT=N, Fs=Fs, 
        window=lambda data: data*np.blackman(len(data)),  noverlap=int(N/10))
    elif fftWindow == "hamming":
        plt.specgram(data, NFFT=N, Fs=Fs, 
        window=lambda data: data*np.hamming(len(data)),  noverlap=int(N/10))
    elif fftWindow == "hanning":
         plt.specgram(data, NFFT=N, Fs=Fs, 
         window=lambda data: data*np.hanning(len(data)),  noverlap=int(N/10))

    plt.show()

Source File: audio.py From deepspeech-german with Apache License 2.0

6 votes

def audiofile_to_input_vector(audio_filename, numcep, numcontext):
    r"""
    Given a WAV audio file at ``audio_filename``, calculates ``numcep`` MFCC features
    at every 0.01s time step with a window length of 0.025s. Appends ``numcontext``
    context frames to the left and right of each time step, and returns this data
    in a numpy array.
    """
    # Load wav files
    fs, audio = wav.read(audio_filename)

    # Get mfcc coefficients
    features = mfcc(audio, samplerate=fs, numcep=numcep, winlen=0.032, winstep=0.02, winfunc=np.hamming)

    # Add empty initial and final contexts
    empty_context = np.zeros((numcontext, numcep), dtype=features.dtype)
    features = np.concatenate((empty_context, features, empty_context))

    return features

Source File: acoustic_feats.py From Ossian with Apache License 2.0

6 votes

def _smooth(params, win, type="HAMMING"):
    

    win = int(win+0.5)
    if win >= len(params)-1:
        win = len(params)-1
    if win % 2 == 0:
        win+=1

    s = np.r_[params[win-1:0:-1],params,params[-1:-win:-1]]

    
    if type=="HAMMING":
        w = np.hamming(win)
        third = int(win/5)
        #w[:third] = 0
    else:
        w = np.ones(win)
        
        
    y = np.convolve(w/w.sum(),s,mode='valid')
    return y[(win/2):-(win/2)]

Source File: base_orig.py From python_kaldi_features with MIT License

5 votes

def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
         nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True,
         winfunc=lambda x:numpy.ones((x,))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat

Source File: spectrum.py From pactools with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, block_length=1024, fft_length=None, step=None,
                 wfunc=np.hamming, fs=1.):

        super(Bicoherence,
              self).__init__(block_length=block_length, fft_length=fft_length,
                             step=step, wfunc=wfunc, fs=fs)

Source File: spectrum.py From pactools with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, block_length=1024, fft_length=None, step=None,
                 wfunc=np.hamming, fs=1.):
        super(Coherence,
              self).__init__(block_length=block_length, fft_length=fft_length,
                             step=step, wfunc=wfunc, fs=fs)

        self.coherence = None

Source File: sound.py From multisensory with Apache License 2.0

5 votes

def make_specgram(sound, rate, shift_fft = True, sample_times = None,
                  nfft = None, noverlap = 2000, par = 0):
  assert rate > 1 # probably should have multiple samples per second
  if nfft is None:
    nfft = int(np.ceil(0.05 * rate))

  nfft += (nfft % 2)
    
  win = np.hamming(nfft)
  sft, time_idx = stfft(sound, nfft, noverlap, win, par = par)
  sft = np.real(sft * np.conjugate(sft))
  sft /= np.sum(np.abs(win)**2)
  freqs = np.fft.fftfreq(sft.shape[1], 1./rate)
  
  # Since the input is real, the result will be symmetric, and thus we can throw away
  # the negative frequencies.
  nfreq = nfft // 2
  assert (freqs[nfreq-1] > 0) and (freqs[nfreq] < 0)
  freqs = freqs[nfreq - 1 : 0 : -1]
  sft = sft[:, nfreq - 1 : 0 : -1]

  if sample_times is None:
    times = time_idx * (1./rate)
  else:
    times = sample_times[time_idx]
  
  return freqs, np.asarray(sft, dtype = 'float32'), times

Source File: features.py From dcase2018_task1 with MIT License

5 votes

def __init__(self, sample_rate, window_size, overlap, mel_bins):
        
        self.window_size = window_size
        self.overlap = overlap
        self.ham_win = np.hamming(window_size)
        
        self.melW = librosa.filters.mel(sr=sample_rate, 
                                        n_fft=window_size, 
                                        n_mels=mel_bins, 
                                        fmin=50., 
                                        fmax=sample_rate // 2).T

Source File: swhear.py From diyECG-1opAmp with MIT License

5 votes

def FFT(data,rate):
    """given some data points and a rate, return [freq,power]"""
    data=data*np.hamming(len(data))
    fft=np.fft.fft(data)
    fft=10*np.log10(np.abs(fft))
    freq=np.fft.fftfreq(len(fft),1/rate)
    return freq[:int(len(freq)/2)],fft[:int(len(fft)/2)]

Source File: freq_analysis.py From pykaldi2 with MIT License

5 votes

def stft(y, n_fft=2048, hop_length=None, win_length=None, window='hamming', center=False, do_dither=False, dtype=np.complex64):
    """
    Perform short-time Fourier transform to input signal.

    :param y: input signal, usually a 1D numpy array.
    :param n_fft: FFT size
    :param hop_length: window shift in terms of number of samples
    :param win_length: window size in terms of number of samples
    :param window: window type, default is 'hamming'
    :param center: whether to pad zeros to the beginning and ending of the input.
    :param do_dither: whether to add small amount of noise to signal to avoid absolute zero
    :param dtype: type of output data.
    :return:
    """
    # By default, use the entire frame
    if win_length is None:
        win_length = n_fft

    # Set the default hop, if it's not already specified
    if hop_length is None:
        hop_length = int(win_length / 3)

    if center:
        assert y.ndim == 1
        y = np.pad(y, win_length-hop_length, mode='constant')

    fft_window = _get_window(window, win_length)

    nrfft = int(n_fft // 2) + 1

    if do_dither:
        y = y + np.random.normal(loc=0.0, scale=1e-5, size=y.shape)

    y_frames = _enframe(y, hop_length, win_length, axis_t=0, newaxis_t=1, newaxis_b=0, copy=True)
    # RFFT and Conjugate here to match phase from DPWE code
    #stft_matrix = fft.fft(fft_window.reshape(1,win_length) * y_frames, n=n_fft, axis=1)[:, :nrfft].astype(dtype=dtype)
    stft_matrix = np.fft.fft(fft_window.reshape(1,win_length) * y_frames, n=n_fft, axis=1)[:, :nrfft].astype(dtype=dtype)
    return stft_matrix

Source File: helper_classes.py From rapidtide with Apache License 2.0

5 votes

def track(self, x, fs):

        self.freqs, self.times, thespectrogram = sp.signal.spectrogram(np.concatenate([np.zeros(int(self.nperseg // 2)), x, np.zeros(int(self.nperseg // 2))], axis=0),
                                                             fs=fs,
                                                             detrend='constant',
                                                             scaling='spectrum',
                                                             nfft=None,
                                                             window=np.hamming(self.nfft),
                                                             noverlap=(self.nperseg - 1))
        lowerliminpts = tide_util.valtoindex(self.freqs, self.lowerlim)
        upperliminpts = tide_util.valtoindex(self.freqs, self.upperlim)

        if self.debug:
            print(self.times.shape, self.freqs.shape, thespectrogram.shape)
            print(self.times)

        # intitialize the peak fitter
        thefitter = correlation_fitter(corrtimeaxis=self.freqs,
                                       lagmin=self.lowerlim,
                                       lagmax=self.upperlim,
                                       absmaxsigma=10.0,
                                       absminsigma=0.1,
                                       debug=self.debug,
                                       findmaxtype='gauss',
                                       zerooutbadfit=False,
                                       refine=True,
                                       useguess=False,
                                       fastgauss=False
                                       )

        peakfreqs = np.zeros((thespectrogram.shape[1] - 1), dtype=float)
        for i in range(0, thespectrogram.shape[1] - 1):
            maxindex, peakfreqs[i], maxval, maxsigma, maskval, failreason, peakstart, peakend  = thefitter.fit(thespectrogram[:, i])
            if not (lowerliminpts <= maxindex <= upperliminpts):
                peakfreqs[i] = -1.0

        return self.times[:-1], peakfreqs

Source File: backend_comparisons.py From hangar-py with Apache License 2.0

5 votes

def setup(self, backend):

        # self.method
        self.current_iter_number = 0
        self.backend_code = {
            'numpy_10': '10',
            'hdf5_00': '00',
            'hdf5_01': '01',
        }
        # self.num_samples

        self.sample_shape = (50, 50, 20)

        self.tmpdir = mkdtemp()
        self.repo = Repository(path=self.tmpdir, exists=False)
        self.repo.init('tester', 'foo@test.bar', remove_old=True)
        self.co = self.repo.checkout(write=True)

        component_arrays = []
        ndims = len(self.sample_shape)
        for idx, shape in enumerate(self.sample_shape):
            layout = [1 for i in range(ndims)]
            layout[idx] = shape
            component = np.hamming(shape).reshape(*layout) * 100
            component_arrays.append(component.astype(np.float32))
        self.arr = np.prod(component_arrays).astype(np.float32)

        try:
            self.aset = self.co.arraysets.init_arrayset(
                'aset', prototype=self.arr, backend_opts=self.backend_code[backend])
        except TypeError:
            try:
                self.aset = self.co.arraysets.init_arrayset(
                    'aset', prototype=self.arr, backend=self.backend_code[backend])
            except ValueError:
                raise NotImplementedError
        except ValueError:
            raise NotImplementedError
        except AttributeError:
            self.aset = self.co.add_ndarray_column(
                'aset', prototype=self.arr, backend=self.backend_code[backend])

Source File: base_orig.py From python_kaldi_features with MIT License

5 votes

def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,
          winfunc=lambda x:numpy.ones((x,))):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
    pspec = sigproc.powspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log

    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log

    return feat,energy

Source File: auga.py From pydiogment with BSD 3-Clause "New" or "Revised" License

5 votes

def fade_in_and_out(infile):
    """
    Add a fade in and out effect to the audio file.

    Args:
        - infile (str) : input filename/path.
    """
    # read input file
    fs, sig = read_file(filename=infile)
    window = np.hamming(len(sig))

    # construct file names
    output_file_path = os.path.dirname(infile)
    name_attribute = "_augmented_fade_in_out.wav"

    # fade in and out
    window = np.hamming(len(sig))
    augmented_sig = window * sig
    augmented_sig /= np.mean(np.abs(augmented_sig))

    # export data to file
    write_file(output_file_path=output_file_path,
               input_file_name=infile,
               name_attribute=name_attribute,
               sig=augmented_sig,
               fs=fs)

Source File: iqplot.py From iqtool with The Unlicense

5 votes

def plotPSD(data,fftWindow, Fs):

    assert fftWindow in ['rectangular', 'bartlett', 'blackman', 
                         'hamming', 'hanning']
    
    N = len(data)
    
    #Generate the selected window
    if fftWindow == "rectangular":
        window = np.ones(N)
    elif fftWindow == "bartlett":
        window = np.bartlett(N)
    elif args.fftWindow == "blackman":
        window = np.blackman(N)
    elif fftWindow == "hamming":
        window = np.hamming(N)
    elif fftWindow == "hanning":
         window = np.hanning(N)         
         
    dft = np.fft.fft(data*window)    
    
    if Fs == None:
        #If the sample rate is known then plot PSD as
        #Power/Freq in (dB/Hz)
        plt.psd(data*window, NFFT=N)
        
    else:
        #If sample rate is not known then plot PSD as
        #Power/Freq as (dB/rad/sample)
        plt.psd(data*window, NFFT=N, Fs=Fs)

    plt.show()

Source File: base.py From python_kaldi_features with MIT License

5 votes

def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=40,nfft=512,lowfreq=0,highfreq=None,dither=1.0,remove_dc_offset=True, preemph=0.97, 
          wintype='hamming'):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
     winfunc=lambda x:numpy.ones((x,))   
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """
    highfreq= highfreq or samplerate/2
    frames,raw_frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, dither, preemph, remove_dc_offset, wintype)
    pspec = sigproc.powspec(frames,nfft) # nearly the same until this part
    energy = numpy.sum(raw_frames**2,1) # this stores the raw energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log

    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log

    return feat,energy

Source File: base.py From python_kaldi_features with MIT License

5 votes

def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
         nfilt=23,nfft=512,lowfreq=20,highfreq=None,dither=1.0,remove_dc_offset=True,preemph=0.97,
         ceplifter=22,useEnergy=True,wintype='povey'):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,dither,remove_dc_offset,preemph,wintype)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if useEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat

Source File: dsp.py From music_led_strip_control with MIT License

5 votes

def __init__(self, config_lock):

        self._config_lock = config_lock

        # Initial config load.
        self._config = ConfigService.instance(self._config_lock).config
        
        # Initialise filters etc. I've no idea what most of these are for but i imagine i won't be getting rid of them soon 
        n_fft_bins = self._config["audio_config"]["N_FFT_BINS"]
        min_volume_threshold = self._config["audio_config"]["MIN_VOLUME_THRESHOLD"]
        fps = self._config["audio_config"]["FPS"]
        n_rolling_history = self._config["audio_config"]["N_ROLLING_HISTORY"]
        default_sample_rate = self._config["audio_config"]["DEFAULT_SAMPLE_RATE"]

        led_count = self._config["device_config"]["LED_Count"]

        self.fft_plot_filter = ExpFilter(np.tile(1e-1, n_fft_bins), alpha_decay=0.5, alpha_rise=0.99)
        self.mel_gain =        ExpFilter(np.tile(1e-1, n_fft_bins), alpha_decay=0.01, alpha_rise=0.99)
        self.mel_smoothing =   ExpFilter(np.tile(1e-1, n_fft_bins), alpha_decay=0.5, alpha_rise=0.99)
        self.gain =            ExpFilter(np.tile(0.01, n_fft_bins), alpha_decay=0.001, alpha_rise=0.99)
        self.r_filt =          ExpFilter(np.tile(0.01, led_count // 2), alpha_decay=0.2, alpha_rise=0.99)
        self.g_filt =          ExpFilter(np.tile(0.01, led_count // 2), alpha_decay=0.05, alpha_rise=0.3)
        self.b_filt =          ExpFilter(np.tile(0.01, led_count // 2), alpha_decay=0.1, alpha_rise=0.5)
        self.common_mode =     ExpFilter(np.tile(0.01, led_count // 2), alpha_decay=0.99, alpha_rise=0.01)
        self.p_filt =          ExpFilter(np.tile(1, (3, led_count // 2)), alpha_decay=0.1, alpha_rise=0.99)
        self.volume =          ExpFilter(min_volume_threshold, alpha_decay=0.02, alpha_rise=0.02)
        self.p =               np.tile(1.0, (3, led_count // 2))
        # Number of audio samples to read every time frame
        self.samples_per_frame = int(default_sample_rate / fps)
        # Array containing the rolling audio sample window
        self.y_roll = np.random.rand(n_rolling_history, self.samples_per_frame) / 1e16
        self.fft_window =      np.hamming(int(default_sample_rate / fps)\
                                         * n_rolling_history)

        self.samples = None
        self.mel_y = None
        self.mel_x = None
        self.melbank = Melbank()
        self.create_mel_bank()

Source File: dsp.py From music_led_strip_control with MIT License

5 votes

def __init__(self, config_lock):

        self._config_lock = config_lock

        # Initial config load.
        self._config = ConfigService.instance(self._config_lock).config
        
        # Initialise filters etc. I've no idea what most of these are for but i imagine i won't be getting rid of them soon 
        n_fft_bins = self._config["audio_config"]["N_FFT_BINS"]
        min_volume_threshold = self._config["audio_config"]["MIN_VOLUME_THRESHOLD"]
        frames_per_buffer = self._config["audio_config"]["FRAMES_PER_BUFFER"]
        n_rolling_history = self._config["audio_config"]["N_ROLLING_HISTORY"]

        led_count = self._config["device_config"]["LED_Count"]

        self.fft_plot_filter = ExpFilter(np.tile(1e-1, n_fft_bins), alpha_decay=0.5, alpha_rise=0.99)
        self.mel_gain =        ExpFilter(np.tile(1e-1, n_fft_bins), alpha_decay=0.01, alpha_rise=0.99)
        self.mel_smoothing =   ExpFilter(np.tile(1e-1, n_fft_bins), alpha_decay=0.5, alpha_rise=0.99)
        self.gain =            ExpFilter(np.tile(0.01, n_fft_bins), alpha_decay=0.001, alpha_rise=0.99)
        self.r_filt =          ExpFilter(np.tile(0.01, led_count // 2), alpha_decay=0.2, alpha_rise=0.99)
        self.g_filt =          ExpFilter(np.tile(0.01, led_count // 2), alpha_decay=0.05, alpha_rise=0.3)
        self.b_filt =          ExpFilter(np.tile(0.01, led_count // 2), alpha_decay=0.1, alpha_rise=0.5)
        self.common_mode =     ExpFilter(np.tile(0.01, led_count // 2), alpha_decay=0.99, alpha_rise=0.01)
        self.p_filt =          ExpFilter(np.tile(1, (3, led_count // 2)), alpha_decay=0.1, alpha_rise=0.99)
        self.volume =          ExpFilter(min_volume_threshold, alpha_decay=0.02, alpha_rise=0.02)
        self.p =               np.tile(1.0, (3, led_count // 2))
        # Number of audio samples to read every time frame
        #self.samples_per_frame = int(default_sample_rate / fps)
        self.samples_per_frame = int(frames_per_buffer)
        # Array containing the rolling audio sample window
        self.y_roll = np.random.rand(n_rolling_history, self.samples_per_frame) / 1e16
        #self.fft_window =      np.hamming(int(default_sample_rate / fps)\
        #                                 * n_rolling_history)
        self.fft_window =      np.hamming(int(frames_per_buffer)\
                                         * n_rolling_history)

        self.samples = None
        self.mel_y = None
        self.mel_x = None
        self.melbank = Melbank()
        self.create_mel_bank()

Python numpy.hamming() Examples