Python librosa.magphase() Examples
The following are 30
code examples of librosa.magphase().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: audio.py From Speech_emotion_recognition_BLSTM with MIT License | 6 votes |
def split_vocal(self, y): S_full, phase = librosa.magphase(librosa.stft(y)) # To avoid being biased by local continuity, we constrain similar frames to be # separated by at least 1.2 seconds. S_filter = librosa.decompose.nn_filter(S_full, aggregate=np.median, metric='cosine', width=int(librosa.time_to_frames(self._constrained, sr=self._sr))) S_filter = np.minimum(S_full, S_filter) margin_v = 10 power = 2 mask_v = librosa.util.softmask(S_full - S_filter, margin_v * S_filter, power=power) S_foreground = mask_v * S_full foreground = griffinlim(S_foreground) return foreground
Example #2
Source File: irm_dataset.py From IRM-based-Speech-Enhancement-using-LSTM with MIT License | 6 votes |
def __getitem__(self, idx): clean_y, _ = librosa.load(self.clean_f_paths[idx], sr=16000) snr = random.choice(self.snr_list) noise_data = random.choice(self.all_noise_data) noise_name = noise_data["name"] noise_y = noise_data["y"] name = f"{str(idx).zfill(5)}_{noise_name}_{snr}" clean_y, noise_y, noisy_y = synthesis_noisy_y(clean_y, noise_y, snr) if self.mode == "train": clean_mag, _ = librosa.magphase(librosa.stft(clean_y, n_fft=320, hop_length=160, win_length=320)) noise_mag, _ = librosa.magphase(librosa.stft(noise_y, n_fft=320, hop_length=160, win_length=320)) noisy_mag, _ = librosa.magphase(librosa.stft(noisy_y, n_fft=320, hop_length=160, win_length=320)) mask = np.sqrt(clean_mag ** 2 / (clean_mag + noise_mag) ** 2) n_frames = clean_mag.shape[-1] return noisy_mag, clean_mag, mask, n_frames elif self.mode == "validation": return noisy_y, clean_y, name else: return noisy_y, name
Example #3
Source File: text2speech.py From OpenSeq2Seq with Apache License 2.0 | 6 votes |
def griffin_lim(magnitudes, n_iters=50, n_fft=1024): """ Griffin-Lim algorithm to convert magnitude spectrograms to audio signals """ phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape)) complex_spec = magnitudes * phase signal = librosa.istft(complex_spec) if not np.isfinite(signal).all(): print("WARNING: audio was not finite, skipping audio saving") return np.array([0]) for _ in range(n_iters): _, phase = librosa.magphase(librosa.stft(signal, n_fft=n_fft)) complex_spec = magnitudes * phase signal = librosa.istft(complex_spec) return signal
Example #4
Source File: tts_infer.py From NeMo with Apache License 2.0 | 6 votes |
def griffin_lim(magnitudes, n_iters=50, n_fft=1024): """ Griffin-Lim algorithm to convert magnitude spectrograms to audio signals """ phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape)) complex_spec = magnitudes * phase signal = librosa.istft(complex_spec) if not np.isfinite(signal).all(): logging.warning("audio was not finite, skipping audio saving") return np.array([0]) for _ in range(n_iters): _, phase = librosa.magphase(librosa.stft(signal, n_fft=n_fft)) complex_spec = magnitudes * phase signal = librosa.istft(complex_spec) return signal
Example #5
Source File: helpers.py From NeMo with Apache License 2.0 | 6 votes |
def griffin_lim(magnitudes, n_iters=50, n_fft=1024): """ Griffin-Lim algorithm to convert magnitude spectrograms to audio signals """ phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape)) complex_spec = magnitudes * phase signal = librosa.istft(complex_spec) if not np.isfinite(signal).all(): logging.warning("audio was not finite, skipping audio saving") return np.array([0]) for _ in range(n_iters): _, phase = librosa.magphase(librosa.stft(signal, n_fft=n_fft)) complex_spec = magnitudes * phase signal = librosa.istft(complex_spec) return signal
Example #6
Source File: utils.py From magenta with Apache License 2.0 | 6 votes |
def griffin_lim(mag, phase_angle, n_fft, hop, num_iters): """Iterative algorithm for phase retrieval from a magnitude spectrogram. Args: mag: Magnitude spectrogram. phase_angle: Initial condition for phase. n_fft: Size of the FFT. hop: Stride of FFT. Defaults to n_fft/2. num_iters: Griffin-Lim iterations to perform. Returns: audio: 1-D array of float32 sound samples. """ fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True) ifft_config = dict(win_length=n_fft, hop_length=hop, center=True) complex_specgram = inv_magphase(mag, phase_angle) for i in range(num_iters): audio = librosa.istft(complex_specgram, **ifft_config) if i != num_iters - 1: complex_specgram = librosa.stft(audio, **fft_config) _, phase = librosa.magphase(complex_specgram) phase_angle = np.angle(phase) complex_specgram = inv_magphase(mag, phase_angle) return audio
Example #7
Source File: vocoder.py From DeepPavlov with Apache License 2.0 | 5 votes |
def griffin_lim(self, magnitudes): """Griffin-Lim algorithm to convert magnitude spectrograms to audio signals.""" phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape)) complex_spec = magnitudes * phase signal = librosa.istft(complex_spec) for _ in range(self.n_iters): _, phase = librosa.magphase(librosa.stft(signal, n_fft=self.n_fft)) complex_spec = magnitudes * phase signal = librosa.istft(complex_spec) return signal
Example #8
Source File: audio.py From Speech_emotion_recognition_BLSTM with MIT License | 5 votes |
def split_vocal_to_wav(self, filename, fp_foreground, fp_background=None): print(filename.split('/')[-1]) y, sr = librosa.load(filename, sr=self._sr) S_full, phase = librosa.magphase(librosa.stft(y)) # To avoid being biased by local continuity, we constrain similar frames to be # separated by at least 1.2 seconds. S_filter = librosa.decompose.nn_filter(S_full, aggregate=np.median, metric='cosine', width=int(librosa.time_to_frames(self._constrained, sr=self._sr))) S_filter = np.minimum(S_full, S_filter) margin_i, margin_v = 2, 10 power = 2 mask_i = librosa.util.softmask(S_filter, margin_i * (S_full - S_filter), power=power) mask_v = librosa.util.softmask(S_full - S_filter, margin_v * S_filter, power=power) S_foreground = mask_v * S_full S_background = mask_i * S_full foreground = griffinlim(S_foreground) fp_foreground += filename.split('/')[-1] sf.write(fp_foreground, foreground, sr, 'PCM_16') if fp_background is not None: background = griffinlim(S_background) fp_background += filename.split('/')[-1] sf.write(fp_background, background, sr, 'PCM_16')
Example #9
Source File: data_loader.py From pytorch-nlp with MIT License | 5 votes |
def parse_audio(self, audio_path): if self.augment: y = load_randomly_augmented_audio(audio_path, self.sample_rate) else: y = load_audio(audio_path) if self.noiseInjector: add_noise = np.random.binomial(1, self.noise_prob) if add_noise: y = self.noiseInjector.inject_noise(y) n_fft = int(self.sample_rate * self.window_size) win_length = n_fft hop_length = int(self.sample_rate * self.window_stride) # STFT D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=self.window) spect, phase = librosa.magphase(D) # S = log(S+1) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if self.normalize: mean = spect.mean() std = spect.std() spect.add_(-mean) spect.div_(std) return spect
Example #10
Source File: helpers.py From NeMo with Apache License 2.0 | 5 votes |
def waveglow_log_to_tb_func( swriter, tensors, step, tag="train", log_images=False, log_images_freq=1, n_fft=1024, hop_length=256, window="hann", mel_fb=None, ): loss, audio_pred, spec_target, mel_length = tensors if loss: swriter.add_scalar("loss", loss, step) if log_images and step % log_images_freq == 0: mel_length = mel_length[0] spec_target = spec_target[0].data.cpu().numpy()[:, :mel_length] swriter.add_image( f"{tag}_mel_target", plot_spectrogram_to_numpy(spec_target), step, dataformats="HWC", ) if mel_fb is not None: mag, _ = librosa.core.magphase( librosa.core.stft( np.nan_to_num(audio_pred[0].cpu().detach().numpy()), n_fft=n_fft, hop_length=hop_length, window=window, ) ) mel_pred = np.matmul(mel_fb.cpu().numpy(), mag).squeeze() log_mel_pred = np.log(np.clip(mel_pred, a_min=1e-5, a_max=None)) swriter.add_image( f"{tag}_mel_predicted", plot_spectrogram_to_numpy(log_mel_pred[:, :mel_length]), step, dataformats="HWC", )
Example #11
Source File: datautils.py From panotti with MIT License | 5 votes |
def make_phase_gram(mono_sig, sr, n_bins=128): stft = librosa.stft(mono_sig)#, n_fft = (2*n_bins)-1) magnitude, phase = librosa.magphase(stft) # we don't need magnitude # resample the phase array to match n_bins phase = np.resize(phase, (n_bins, phase.shape[1]))[np.newaxis,:,:,np.newaxis] return phase # turn multichannel audio as multiple melgram layers
Example #12
Source File: spectrogram.py From cocktail-party with MIT License | 5 votes |
def griffin_lim(magnitude, n_fft, hop_length, n_iterations): """Iterative algorithm for phase retrival from a magnitude spectrogram.""" phase_angle = np.pi * np.random.rand(*magnitude.shape) D = invert_magnitude_phase(magnitude, phase_angle) signal = librosa.istft(D, hop_length=hop_length) for i in range(n_iterations): D = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length) _, phase = librosa.magphase(D) phase_angle = np.angle(phase) D = invert_magnitude_phase(magnitude, phase_angle) signal = librosa.istft(D, hop_length=hop_length) return signal
Example #13
Source File: data_loader.py From training with Apache License 2.0 | 5 votes |
def parse_audio(self, audio_path): if self.augment: y = load_randomly_augmented_audio(audio_path, self.sample_rate) else: y = load_audio(audio_path) if self.noiseInjector: add_noise = np.random.binomial(1, self.noise_prob) if add_noise: y = self.noiseInjector.inject_noise(y) n_fft = int(self.sample_rate * self.window_size) win_length = n_fft hop_length = int(self.sample_rate * self.window_stride) # STFT D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=self.window) spect, phase = librosa.magphase(D) # S = log(S+1) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if self.normalize: mean = spect.mean() std = spect.std() spect.add_(-mean) spect.div_(std) return spect
Example #14
Source File: data_tools.py From Speech-enhancement with MIT License | 5 votes |
def audio_to_magnitude_db_and_phase(n_fft, hop_length_fft, audio): """This function takes an audio and convert into spectrogram, it returns the magnitude in dB and the phase""" stftaudio = librosa.stft(audio, n_fft=n_fft, hop_length=hop_length_fft) stftaudio_magnitude, stftaudio_phase = librosa.magphase(stftaudio) stftaudio_magnitude_db = librosa.amplitude_to_db( stftaudio_magnitude, ref=np.max) return stftaudio_magnitude_db, stftaudio_phase
Example #15
Source File: audio_dataset.py From OpenNMT-py with MIT License | 5 votes |
def extract_features(self, audio_path): # torchaudio loading options recently changed. It's probably # straightforward to rewrite the audio handling to make use of # up-to-date torchaudio, but in the meantime there is a legacy # method which uses the old defaults sound, sample_rate_ = torchaudio.legacy.load(audio_path) if self.truncate and self.truncate > 0: if sound.size(0) > self.truncate: sound = sound[:self.truncate] assert sample_rate_ == self.sample_rate, \ 'Sample rate of %s != -sample_rate (%d vs %d)' \ % (audio_path, sample_rate_, self.sample_rate) sound = sound.numpy() if len(sound.shape) > 1: if sound.shape[1] == 1: sound = sound.squeeze() else: sound = sound.mean(axis=1) # average multiple channels n_fft = int(self.sample_rate * self.window_size) win_length = n_fft hop_length = int(self.sample_rate * self.window_stride) # STFT d = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=self.window) spect, _ = librosa.magphase(d) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if self.normalize_audio: mean = spect.mean() std = spect.std() spect.add_(-mean) spect.div_(std) return spect
Example #16
Source File: utils.py From Speaker-Diarization with Apache License 2.0 | 5 votes |
def load_data(path, win_length=400, sr=16000, hop_length=160, n_fft=512, spec_len=250, mode='train'): wav = load_wav(path, sr=sr, mode=mode) linear_spect = lin_spectogram_from_wav(wav, hop_length, win_length, n_fft) mag, _ = librosa.magphase(linear_spect) # magnitude mag_T = mag.T freq, time = mag_T.shape if mode == 'train': randtime = np.random.randint(0, time-spec_len) spec_mag = mag_T[:, randtime:randtime+spec_len] else: spec_mag = mag_T # preprocessing, subtract mean, divided by time-wise var mu = np.mean(spec_mag, 0, keepdims=True) std = np.std(spec_mag, 0, keepdims=True) return (spec_mag - mu) / (std + 1e-5)
Example #17
Source File: preprocess.py From Speaker-Diarization with Apache License 2.0 | 5 votes |
def load_data(path, split=False, win_length=400, sr=16000, hop_length=160, n_fft=512, min_slice=720): wav = load_wav(path, sr=sr) linear_spect = lin_spectogram_from_wav(wav, hop_length, win_length, n_fft) mag, _ = librosa.magphase(linear_spect) # magnitude mag_T = mag.T freq, time = mag_T.shape spec_mag = mag_T utterances_spec = [] if(split): minSpec = min_slice//(1000//(sr//hop_length)) # The minimum timestep of each slice in spectrum randStarts = np.random.randint(0,time, 10) # generate 10 slices at most. for start in randStarts: if(time-start<=minSpec): continue randDuration = np.random.randint(minSpec, time-start) spec_mag = mag_T[:, start:start+randDuration] # preprocessing, subtract mean, divided by time-wise var mu = np.mean(spec_mag, 0, keepdims=True) std = np.std(spec_mag, 0, keepdims=True) spec_mag = (spec_mag - mu) / (std + 1e-5) utterances_spec.append(spec_mag) else: # preprocessing, subtract mean, divided by time-wise var mu = np.mean(spec_mag, 0, keepdims=True) std = np.std(spec_mag, 0, keepdims=True) spec_mag = (spec_mag - mu) / (std + 1e-5) utterances_spec.append(spec_mag) return utterances_spec
Example #18
Source File: speakerDiarization.py From Speaker-Diarization with Apache License 2.0 | 5 votes |
def load_data(path, win_length=400, sr=16000, hop_length=160, n_fft=512, embedding_per_second=0.5, overlap_rate=0.5): wav, intervals = load_wav(path, sr=sr) linear_spect = lin_spectogram_from_wav(wav, hop_length, win_length, n_fft) mag, _ = librosa.magphase(linear_spect) # magnitude mag_T = mag.T freq, time = mag_T.shape spec_mag = mag_T spec_len = sr/hop_length/embedding_per_second spec_hop_len = spec_len*(1-overlap_rate) cur_slide = 0.0 utterances_spec = [] while(True): # slide window. if(cur_slide + spec_len > time): break spec_mag = mag_T[:, int(cur_slide+0.5) : int(cur_slide+spec_len+0.5)] # preprocessing, subtract mean, divided by time-wise var mu = np.mean(spec_mag, 0, keepdims=True) std = np.std(spec_mag, 0, keepdims=True) spec_mag = (spec_mag - mu) / (std + 1e-5) utterances_spec.append(spec_mag) cur_slide += spec_hop_len return utterances_spec, intervals
Example #19
Source File: pre_processing.py From audio-source-separation with MIT License | 5 votes |
def process(file_path,direc,destination_path,phase_bool,destination_phase_path): t1,t2=librosa.load(file_path,sr=None) duration=librosa.get_duration(t1,t2) regex = re.compile(r'\d+') index=regex.findall(direc) #print(index) num_segments=0 #mean=np.zeros((513,52)) #var=np.zeros((513,52)) for start in range(30,int(200)): wave_array, fs = librosa.load(file_path,sr=44100,offset=start*0.3,duration = 0.3) mag, phase = librosa.magphase(librosa.stft(wave_array, n_fft=1024,hop_length=256,window='hann',center='True')) #mean+=mag #num_segments+=1; if not os.path.exists(destination_path): os.makedirs(destination_path) #print(mag.shape) #print(torch.from_numpy(np.expand_dims(mag,axis=0)).shape) # magnitude stored as tensor, phase as np array #pickle.dump(torch.from_numpy(np.expand_dims(mag,axis=2)),open(os.path.join(destination_path,(index[0] +"_" + str(start) +'_m.pt')),'wb')) torch.save(torch.from_numpy(np.expand_dims(mag,axis=0)),os.path.join(destination_path,(index[0] +"_" + str(start) +'_m.pt'))) if phase_bool: if not os.path.exists(destination_phase_path): os.makedirs(destination_phase_path) np.save(os.path.join(destination_phase_path,(index[0]+"_" +str(start)+'_p.npy')),phase) return #--------- training data-------------------------------------
Example #20
Source File: audio_dataset.py From OpenNMT-kpg-release with MIT License | 5 votes |
def extract_features(self, audio_path): # torchaudio loading options recently changed. It's probably # straightforward to rewrite the audio handling to make use of # up-to-date torchaudio, but in the meantime there is a legacy # method which uses the old defaults sound, sample_rate_ = torchaudio.legacy.load(audio_path) if self.truncate and self.truncate > 0: if sound.size(0) > self.truncate: sound = sound[:self.truncate] assert sample_rate_ == self.sample_rate, \ 'Sample rate of %s != -sample_rate (%d vs %d)' \ % (audio_path, sample_rate_, self.sample_rate) sound = sound.numpy() if len(sound.shape) > 1: if sound.shape[1] == 1: sound = sound.squeeze() else: sound = sound.mean(axis=1) # average multiple channels n_fft = int(self.sample_rate * self.window_size) win_length = n_fft hop_length = int(self.sample_rate * self.window_stride) # STFT d = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=self.window) spect, _ = librosa.magphase(d) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if self.normalize_audio: mean = spect.mean() std = spect.std() spect.add_(-mean) spect.div_(std) return spect
Example #21
Source File: spectrogram.py From cocktail-party with MIT License | 5 votes |
def signal_to_mel_spectrogram(self, audio_signal, log=True, get_phase=False): signal = audio_signal.get_data(channel_index=0) D = librosa.core.stft(signal, n_fft=self._N_FFT, hop_length=self._HOP_LENGTH) magnitude, phase = librosa.core.magphase(D) mel_spectrogram = np.dot(self._MEL_FILTER, magnitude) mel_spectrogram = mel_spectrogram ** 2 if log: mel_spectrogram = librosa.power_to_db(mel_spectrogram) if get_phase: return mel_spectrogram, phase else: return mel_spectrogram
Example #22
Source File: audio_dataset.py From encoder-agnostic-adaptation with MIT License | 5 votes |
def extract_features(self, audio_path): # torchaudio loading options recently changed. It's probably # straightforward to rewrite the audio handling to make use of # up-to-date torchaudio, but in the meantime there is a legacy # method which uses the old defaults sound, sample_rate_ = torchaudio.legacy.load(audio_path) if self.truncate and self.truncate > 0: if sound.size(0) > self.truncate: sound = sound[:self.truncate] assert sample_rate_ == self.sample_rate, \ 'Sample rate of %s != -sample_rate (%d vs %d)' \ % (audio_path, sample_rate_, self.sample_rate) sound = sound.numpy() if len(sound.shape) > 1: if sound.shape[1] == 1: sound = sound.squeeze() else: sound = sound.mean(axis=1) # average multiple channels n_fft = int(self.sample_rate * self.window_size) win_length = n_fft hop_length = int(self.sample_rate * self.window_stride) # STFT d = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=self.window) spect, _ = librosa.magphase(d) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if self.normalize_audio: mean = spect.mean() std = spect.std() spect.add_(-mean) spect.div_(std) return spect
Example #23
Source File: data_loader.py From LipReading with MIT License | 5 votes |
def parse_audio(self, audio_path): if self.augment: y = load_randomly_augmented_audio(audio_path, self.sample_rate) else: y = load_audio(audio_path) if self.noiseInjector: add_noise = np.random.binomial(1, self.noise_prob) if add_noise: y = self.noiseInjector.inject_noise(y) n_fft = int(self.sample_rate * self.window_size) win_length = n_fft hop_length = int(self.sample_rate * self.window_stride) # STFT D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=self.window) spect, phase = librosa.magphase(D) # S = log(S+1) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if self.normalize: mean = spect.mean() std = spect.std() spect.add_(-mean) spect.div_(std) return spect
Example #24
Source File: audio_dataset.py From ITDD with MIT License | 5 votes |
def extract_features(audio_path, sample_rate, truncate, window_size, window_stride, window, normalize_audio): global torchaudio, librosa, np import torchaudio import librosa import numpy as np sound, sample_rate_ = torchaudio.load(audio_path) if truncate and truncate > 0: if sound.size(0) > truncate: sound = sound[:truncate] assert sample_rate_ == sample_rate, \ 'Sample rate of %s != -sample_rate (%d vs %d)' \ % (audio_path, sample_rate_, sample_rate) sound = sound.numpy() if len(sound.shape) > 1: if sound.shape[1] == 1: sound = sound.squeeze() else: sound = sound.mean(axis=1) # average multiple channels n_fft = int(sample_rate * window_size) win_length = n_fft hop_length = int(sample_rate * window_stride) # STFT d = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window) spect, _ = librosa.magphase(d) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if normalize_audio: mean = spect.mean() std = spect.std() spect.add_(-mean) spect.div_(std) return spect
Example #25
Source File: compute_features.py From laughter-detection with MIT License | 5 votes |
def compute_mfcc_features(y,sr): mfcc_feat = librosa.feature.mfcc(y,sr,n_mfcc=12,n_mels=12,hop_length=int(sr/100), n_fft=int(sr/40)).T S, phase = librosa.magphase(librosa.stft(y,hop_length=int(sr/100))) rms = librosa.feature.rms(S=S).T return np.hstack([mfcc_feat,rms])
Example #26
Source File: data_loader.py From end2end-asr-pytorch with MIT License | 5 votes |
def parse_audio(self, audio_path): if self.augment: y = load_randomly_augmented_audio(audio_path, self.sample_rate) else: y = load_audio(audio_path) if self.noiseInjector: logging.info("inject noise") add_noise = np.random.binomial(1, self.noise_prob) if add_noise: y = self.noiseInjector.inject_noise(y) n_fft = int(self.sample_rate * self.window_size) win_length = n_fft hop_length = int(self.sample_rate * self.window_stride) # Short-time Fourier transform (STFT) D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=self.window) spect, phase = librosa.magphase(D) # S = log(S+1) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if self.normalize: mean = spect.mean() std = spect.std() spect.add_(-mean) spect.div_(std) return spect
Example #27
Source File: fft.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the STFT magnitude and phase. Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT magnitude data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) D = stft(y, hop_length=self.hop_length, n_fft=self.n_fft) D = fix_length(D, n_frames) mag, phase = magphase(D) if self.log: mag = amplitude_to_db(mag, ref=np.max) return {'mag': to_dtype(mag.T[self.idx], self.dtype), 'phase': to_dtype(np.angle(phase.T)[self.idx], self.dtype)}
Example #28
Source File: fft.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the STFT magnitude and phase differential. Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT magnitude data['dphase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) D = stft(y, hop_length=self.hop_length, n_fft=self.n_fft) D = fix_length(D, n_frames) mag, phase = magphase(D) if self.log: mag = amplitude_to_db(mag, ref=np.max) phase = phase_diff(np.angle(phase.T)[self.idx], self.conv) return {'mag': to_dtype(mag.T[self.idx], self.dtype), 'dphase': to_dtype(phase, self.dtype)}
Example #29
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the CQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) cqtm, phase = magphase(C) if self.log: cqtm = amplitude_to_db(cqtm, ref=np.max) return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)}
Example #30
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the CQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) cqtm, phase = magphase(C) if self.log: cqtm = amplitude_to_db(cqtm, ref=np.max) dphase = phase_diff(np.angle(phase).T[self.idx], self.conv) return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 'dphase': to_dtype(dphase, self.dtype)}