Python librosa.get_duration() Examples
The following are 30
code examples of librosa.get_duration().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: Audio.py From mugen with MIT License | 6 votes |
def __init__(self, file: str, *, sample_rate: int = 44100): """ Parameters ---------- file Audio file to load """ self.file = file self.samples, self.sample_rate = librosa.load(file, sr=sample_rate) self.duration = librosa.get_duration(y=self.samples, sr=self.sample_rate)
Example #2
Source File: data_tools.py From Speech-enhancement with MIT License | 6 votes |
def audio_files_to_numpy(audio_dir, list_audio_files, sample_rate, frame_length, hop_length_frame, min_duration): """This function take audio files of a directory and merge them in a numpy matrix of size (nb_frame,frame_length) for a sliding window of size hop_length_frame""" list_sound_array = [] for file in list_audio_files: # open the audio file y, sr = librosa.load(os.path.join(audio_dir, file), sr=sample_rate) total_duration = librosa.get_duration(y=y, sr=sr) if (total_duration >= min_duration): list_sound_array.append(audio_to_audio_frame_stack( y, frame_length, hop_length_frame)) else: print( f"The following file {os.path.join(audio_dir,file)} is below the min duration") return np.vstack(list_sound_array)
Example #3
Source File: test_core.py From muda with ISC License | 6 votes |
def test_save(jam_in, audio_file, strict, fmt): jam = muda.load_jam_audio(jam_in, audio_file) _, jamfile = tempfile.mkstemp(suffix='.jams') _, audfile = tempfile.mkstemp(suffix='.wav') muda.save(audfile, jamfile, jam, strict=strict, fmt=fmt) jam2 = muda.load_jam_audio(jamfile, audfile, fmt=fmt) jam2_raw = jams.load(jamfile, fmt=fmt) os.unlink(audfile) os.unlink(jamfile) assert hasattr(jam2.sandbox, 'muda') assert '_audio' in jam2.sandbox.muda assert '_audio' not in jam2_raw.sandbox.muda duration = librosa.get_duration(**jam2.sandbox.muda['_audio']) assert jam2.file_metadata.duration == duration
Example #4
Source File: test_deformers.py From muda with ISC License | 6 votes |
def test_ir_convolution(ir_files,jam_fixture,n_fft,rolloff_value): D = muda.deformers.IRConvolution(ir_files = ir_files, n_fft=n_fft, rolloff_value = rolloff_value) jam_orig = deepcopy(jam_fixture) orig_duration = librosa.get_duration(**jam_orig.sandbox.muda['_audio']) for jam_new in D.transform(jam_orig): # Verify that the original jam reference hasn't changed assert jam_new is not jam_orig #Testing with shifted impulse __test_shifted_impulse(jam_orig, jam_new, ir_files, orig_duration,n_fft=n_fft, rolloff_value = rolloff_value) #Verify that the state and history objects are intact __test_deformer_history(D, jam_new.sandbox.muda.history[-1]) # Serialization test D2 = muda.deserialize(muda.serialize(D)) __test_params(D, D2)
Example #5
Source File: rhythm.py From pumpp with ISC License | 6 votes |
def transform_audio(self, y): '''Compute the tempogram Parameters ---------- y : np.ndarray Audio buffer Returns ------- data : dict data['tempogram'] : np.ndarray, shape=(n_frames, win_length) The tempogram ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) tgram = tempogram(y=y, sr=self.sr, hop_length=self.hop_length, win_length=self.win_length) tgram = to_dtype(fix_length(tgram, n_frames), self.dtype) return {'tempogram': tgram.T[self.idx]}
Example #6
Source File: test_deformers.py From muda with ISC License | 5 votes |
def test_colorednoise(n_samples, color, weight_min, weight_max, jam_test_silence): D = muda.deformers.ColoredNoise(n_samples=n_samples, color=color, weight_min=weight_min, weight_max=weight_max, rng=0) jam_orig = deepcopy(jam_test_silence) orig_duration = librosa.get_duration(**jam_orig.sandbox.muda['_audio']) n_out = 0 for jam_new in D.transform(jam_orig): assert jam_new is not jam_test_silence __test_effect(jam_orig, jam_test_silence) assert not np.allclose(jam_orig.sandbox.muda['_audio']['y'], jam_new.sandbox.muda['_audio']['y']) # verify that duration hasn't changed assert librosa.get_duration(**jam_new.sandbox.muda['_audio']) == orig_duration # Verify that the state and history objects are intact __test_deformer_history(D, jam_new.sandbox.muda.history[-1]) __test_effect(jam_orig, jam_new) # Verify the colored noise has desired slope for its log-log # scale power spectrum color = jam_new.sandbox.muda.history[-1]['state']['color'] __test_color_slope(jam_orig, jam_new, color) n_out += 1 assert n_out == n_samples # Serialization test D2 = muda.deserialize(muda.serialize(D)) __test_params(D, D2)
Example #7
Source File: labeled_example.py From speechless with MIT License | 5 votes |
def duration_in_s(self) -> float: try: return librosa.get_duration(filename=str(self.audio_file)) except Exception as e: log("Failed to get duration of {}: {}".format(self.audio_file, e)) return 0
Example #8
Source File: speech_cls_task.py From delta with Apache License 2.0 | 5 votes |
def get_duration(self, filename, sr): #pylint: disable=invalid-name ''' time in second ''' if filename.endswith('.npy'): nframe = np.load(filename).shape[0] return librosa.frames_to_time( nframe, hop_length=self._winstep * sr, sr=sr) if filename.endswith('.wav'): return librosa.get_duration(filename=filename) raise ValueError("filename suffix not .npy or .wav: {}".format( os.path.splitext(filename)[-1]))
Example #9
Source File: get_hi-mia_data.py From NeMo with Apache License 2.0 | 5 votes |
def process_single_line(line: str): line = line.strip() y, sr = librosa.load(line, sr=None) if sr != 16000: y, sr = librosa.load(line, sr=16000) librosa.output.write_wav(line, y, sr) dur = librosa.get_duration(y=y, sr=sr) if 'test' in line.split("/"): speaker = line.split('/')[-1].split('.')[0].split('_')[0] else: speaker = line.split('/')[-2] speaker = list(speaker) speaker = ''.join(speaker) meta = {"audio_filepath": line, "duration": float(dur), "label": speaker} return meta
Example #10
Source File: scp_to_manifest.py From NeMo with Apache License 2.0 | 5 votes |
def main(scp, id, out, split=False): if os.path.exists(out): os.remove(out) scp_file = open(scp, 'r').readlines() lines = [] speakers = [] with open(out, 'w') as outfile: for line in tqdm(scp_file): line = line.strip() y, sr = l.load(line, sr=None) dur = l.get_duration(y=y, sr=sr) speaker = line.split('/')[id] speaker = list(speaker) speaker = ''.join(speaker) speakers.append(speaker) meta = {"audio_filepath": line, "duration": float(dur), "label": speaker} lines.append(meta) json.dump(meta, outfile) outfile.write("\n") path = os.path.dirname(out) if split: sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42) for train_idx, test_idx in sss.split(speakers, speakers): logging.info(len(train_idx)) out = os.path.join(path, 'train.json') write_file(out, lines, train_idx) out = os.path.join(path, 'dev.json') write_file(out, lines, test_idx)
Example #11
Source File: get_databaker_data.py From NeMo with Apache License 2.0 | 5 votes |
def __convert_waves(wavedir, converted_wavedir, wavename, sr): """ Converts a wav file to target sample rate. """ wavepath = os.path.join(wavedir, wavename) converted_wavepath = os.path.join(converted_wavedir, wavename) y, sr = librosa.load(wavepath, sr=sr) duration = librosa.get_duration(y=y, sr=sr) librosa.output.write_wav(converted_wavepath, y, sr) return wavename, round(duration, 2)
Example #12
Source File: test_deformers.py From muda with ISC License | 5 votes |
def __test_time(jam_orig, jam_new, rate): # Test the track length ap_(librosa.get_duration(**jam_orig.sandbox.muda['_audio']), rate * librosa.get_duration(**jam_new.sandbox.muda['_audio'])) # Test the metadata ap_(jam_orig.file_metadata.duration, rate * jam_new.file_metadata.duration) # Test each annotation for ann_orig, ann_new in zip(jam_orig.annotations, jam_new.annotations): # JAMS 0.2.1 support if hasattr(ann_orig, 'time'): ap_(ann_orig.time, rate * ann_new.time) ap_(ann_orig.duration, rate * ann_new.duration) assert len(ann_orig.data) == len(ann_new.data) for obs1, obs2 in zip(ann_orig, ann_new): ap_(obs1.time, rate * obs2.time) ap_(obs1.duration, rate * obs2.duration) if ann_orig.namespace == 'tempo': ap_(rate * obs1.value, obs2.value)
Example #13
Source File: test_deformers.py From muda with ISC License | 5 votes |
def test_background(noise, n_samples, weight_min, weight_max, jam_fixture): D = muda.deformers.BackgroundNoise(files=noise, n_samples=n_samples, weight_min=weight_min, weight_max=weight_max) jam_orig = deepcopy(jam_fixture) orig_duration = librosa.get_duration(**jam_orig.sandbox.muda['_audio']) n_out = 0 for jam_new in D.transform(jam_orig): assert jam_new is not jam_fixture __test_effect(jam_orig, jam_fixture) assert not np.allclose(jam_orig.sandbox.muda['_audio']['y'], jam_new.sandbox.muda['_audio']['y']) d_state = jam_new.sandbox.muda.history[-1]['state'] filename = d_state['filename'] start = d_state['start'] stop = d_state['stop'] with psf.SoundFile(str(filename), mode='r') as soundf: max_index = len(soundf) noise_sr = soundf.samplerate assert 0 <= start < stop assert start < stop <= max_index assert ((stop - start) / float(noise_sr)) == orig_duration __test_effect(jam_orig, jam_new) n_out += 1 assert n_out == n_samples # Serialization test D2 = muda.deserialize(muda.serialize(D)) __test_params(D, D2)
Example #14
Source File: base.py From crema with BSD 2-Clause "Simplified" License | 5 votes |
def predict(self, filename=None, y=None, sr=None, outputs=None): '''Predict annotations Parameters ---------- filename : str (optional) Path to audio file y, sr : (optional) Audio buffer and sample rate outputs : (optional) Pre-computed model outputs as produced by `CremaModel.outputs`. If provided, then predictions are derived from these instead of `filename` or `(y, sr)`. .. note:: At least one of `filename`, `y, sr` must be provided. Returns ------- jams.Annotation The predicted annotation ''' # Pump the input features output_key = self.model.output_names[0] if outputs is None: outputs = self.outputs(filename=filename, y=y, sr=sr) # Invert the prediction. This is always the first output layer. ann = self.pump[output_key].inverse(outputs[output_key]) # Populate the metadata ann.annotation_metadata.version = self.version ann.annotation_metadata.annotation_tools = 'CREMA {}'.format(version) ann.annotation_metadata.data_source = 'program' ann.duration = librosa.get_duration(y=y, sr=sr, filename=filename) return ann
Example #15
Source File: test_deformers.py From muda with ISC License | 5 votes |
def __test_duration(jam_orig, jam_shifted, orig_duration): #Verify the duration of last delayed annotation is in valid range #Verify the total duration hasn't changed assert (librosa.get_duration(**jam_shifted.sandbox.muda['_audio'])) == orig_duration shifted_data = jam_shifted.search(namespace='chord')[0].data #the expected duration of last annotation = Duration - Onset of last annotation ref_duration = orig_duration - shifted_data[-1][0] #[-1][0] indicates the 'time' of last observation #deformed duration: derformed_duration = shifted_data[-1][1] #[-1][0] indicates the 'duration' of last observation isclose_(ref_duration,derformed_duration,rtol=1e-5, atol=1e-1)
Example #16
Source File: pre_processing.py From audio-source-separation with MIT License | 5 votes |
def process(file_path,direc,destination_path,phase_bool,destination_phase_path): t1,t2=librosa.load(file_path,sr=None) duration=librosa.get_duration(t1,t2) regex = re.compile(r'\d+') index=regex.findall(direc) #print(index) num_segments=0 #mean=np.zeros((513,52)) #var=np.zeros((513,52)) for start in range(30,int(200)): wave_array, fs = librosa.load(file_path,sr=44100,offset=start*0.3,duration = 0.3) mag, phase = librosa.magphase(librosa.stft(wave_array, n_fft=1024,hop_length=256,window='hann',center='True')) #mean+=mag #num_segments+=1; if not os.path.exists(destination_path): os.makedirs(destination_path) #print(mag.shape) #print(torch.from_numpy(np.expand_dims(mag,axis=0)).shape) # magnitude stored as tensor, phase as np array #pickle.dump(torch.from_numpy(np.expand_dims(mag,axis=2)),open(os.path.join(destination_path,(index[0] +"_" + str(start) +'_m.pt')),'wb')) torch.save(torch.from_numpy(np.expand_dims(mag,axis=0)),os.path.join(destination_path,(index[0] +"_" + str(start) +'_m.pt'))) if phase_bool: if not os.path.exists(destination_phase_path): os.makedirs(destination_phase_path) np.save(os.path.join(destination_phase_path,(index[0]+"_" +str(start)+'_p.npy')),phase) return #--------- training data-------------------------------------
Example #17
Source File: audio.py From Multilingual_Text_to_Speech with MIT License | 5 votes |
def duration(data): """Return duration of an audio signal in seconds.""" return librosa.get_duration(data, sr=hp.sample_rate)
Example #18
Source File: utils.py From vadnet with GNU Lesser General Public License v3.0 | 5 votes |
def audio_dur(path, ext='', root=''): path = os.path.join(root, '{}{}'.format(path, ext)) try: return lr.get_duration(filename=path) except Exception as ex: print_err('could not read {}\n{}'.format(path, ex)) return 0
Example #19
Source File: eda_vlsp.py From automatic_speech_recognition with GNU General Public License v3.0 | 5 votes |
def stat_acoustic(): print("\nAcoustic Data:") wav_folder = join(ROOT_FOLDER, "data", "vlsp", "wav") files = listdir(wav_folder) files = [join(wav_folder, file) for file in files] durations = [librosa.get_duration(filename=file) for file in files] durations = pd.Series(durations) print(f"Total: {durations.sum():.2f} seconds ({durations.sum() / 3600:.2f} hours)") print(durations.describe())
Example #20
Source File: mel.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the Mel spectrogram Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape=(n_frames, n_mels) The Mel spectrogram ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) mel = np.sqrt(melspectrogram(y=y, sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels, fmax=self.fmax)) mel = fix_length(mel, n_frames) if self.log: mel = amplitude_to_db(mel, ref=np.max) # Type convert mel = to_dtype(mel, self.dtype) return {'mag': mel.T[self.idx]}
Example #21
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the CQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) cqtm, phase = magphase(C) if self.log: cqtm = amplitude_to_db(cqtm, ref=np.max) dphase = phase_diff(np.angle(phase).T[self.idx], self.conv) return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 'dphase': to_dtype(dphase, self.dtype)}
Example #22
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the CQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) cqtm, phase = magphase(C) if self.log: cqtm = amplitude_to_db(cqtm, ref=np.max) return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)}
Example #23
Source File: fft.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the STFT magnitude and phase differential. Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT magnitude data['dphase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) D = stft(y, hop_length=self.hop_length, n_fft=self.n_fft) D = fix_length(D, n_frames) mag, phase = magphase(D) if self.log: mag = amplitude_to_db(mag, ref=np.max) phase = phase_diff(np.angle(phase.T)[self.idx], self.conv) return {'mag': to_dtype(mag.T[self.idx], self.dtype), 'dphase': to_dtype(phase, self.dtype)}
Example #24
Source File: fft.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the STFT magnitude and phase. Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT magnitude data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) D = stft(y, hop_length=self.hop_length, n_fft=self.n_fft) D = fix_length(D, n_frames) mag, phase = magphase(D) if self.log: mag = amplitude_to_db(mag, ref=np.max) return {'mag': to_dtype(mag.T[self.idx], self.dtype), 'phase': to_dtype(np.angle(phase.T)[self.idx], self.dtype)}
Example #25
Source File: time.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the time position encoding Parameters ---------- y : np.ndarray Audio buffer Returns ------- data : dict data['relative'] = np.ndarray, shape=(n_frames, 2) data['absolute'] = np.ndarray, shape=(n_frames, 2) Relative and absolute time positional encodings. ''' duration = get_duration(y=y, sr=self.sr) n_frames = self.n_frames(duration) relative = np.zeros((n_frames, 2), dtype=np.float32) relative[:, 0] = np.cos(np.pi * np.linspace(0, 1, num=n_frames)) relative[:, 1] = np.sin(np.pi * np.linspace(0, 1, num=n_frames)) absolute = relative * np.sqrt(duration) return {'relative': to_dtype(relative[self.idx], self.dtype), 'absolute': to_dtype(absolute[self.idx], self.dtype)}
Example #26
Source File: Input.py From vimss with GNU General Public License v3.0 | 5 votes |
def randomPositionInAudio(audio_path, duration): length = librosa.get_duration(filename=audio_path) if duration >= length: return 0.0, None else: offset = np.random.uniform() * (length - duration) return offset, duration
Example #27
Source File: analyze.py From crema with BSD 2-Clause "Simplified" License | 4 votes |
def analyze(filename=None, y=None, sr=None): '''Analyze a recording for all tasks. Parameters ---------- filename : str, optional Path to audio file y : np.ndarray, optional sr : number > 0, optional Audio buffer and sampling rate .. note:: At least one of `filename` or `y, sr` must be provided. Returns ------- jam : jams.JAMS a JAMS object containing all estimated annotations Examples -------- >>> from crema.analyze import analyze >>> import librosa >>> jam = analyze(filename=librosa.util.example_audio_file()) >>> jam <JAMS(file_metadata=<FileMetadata(...)>, annotations=[1 annotation], sandbox=<Sandbox(...)>)> >>> # Get the chord estimates >>> chords = jam.annotations['chord', 0] >>> chords.to_dataframe().head(5) time duration value confidence 0 0.000000 0.092880 E:maj 0.336977 1 0.092880 0.464399 E:7 0.324255 2 0.557279 1.021678 E:min 0.448759 3 1.578957 2.693515 E:maj 0.501462 4 4.272472 1.486077 E:min 0.287264 ''' _load_models() jam = jams.JAMS() # populate file metadata jam.file_metadata.duration = librosa.get_duration(y=y, sr=sr, filename=filename) for model in __MODELS__: jam.annotations.append(model.predict(filename=filename, y=y, sr=sr)) return jam
Example #28
Source File: speech_cls_task.py From delta with Apache License 2.0 | 4 votes |
def get_class_files_duration(self): ''' dirnames under dataset is class name all data_path have same dirnames ''' classes = None for root, dirnames, filenames in os.walk(self._data_path[0]): classes = dirnames break assert classes, 'can not acsess {}'.format(self._data_path[0]) assert set(classes) == set(self._classes.keys()), '{} {}'.format( classes, self._classes.keys()) def _get_class(path): ret = None for cls in self._classes: if cls in path: ret = cls return ret # to exclude some data under some dir excludes = [] #pylint: disable=too-many-nested-blocks for data_path in self._data_path: logging.debug("data path: {}".format(data_path)) for root, dirname, filenames in os.walk(data_path): del dirname for filename in filenames: if filename.endswith(self._file_suffix): class_name = _get_class(root) # 'conflict' or 'normal' str assert class_name is not None filename = os.path.join(root, filename) if excludes: for exclude in excludes: if exclude in filename: pass duration = self.get_duration( filename=filename, sr=self._sample_rate) self._class_file[class_name].append( (filename, duration, class_name)) else: pass if not self._class_file: logging.debug("class file: {}".format(self._class_file)) logging.warn("maybe the suffix {} file not exits".format( self._file_suffix))
Example #29
Source File: convert.py From ZeroSpeech-TTS-without-T with MIT License | 4 votes |
def encode_for_tacotron(target, trainer, seg_len, multi2idx_path, wav_path, result_path): wavs = sorted(glob.glob(os.path.join(wav_path, '*.wav'))) print('[Converter] - Number of wav files to encoded: ', len(wavs)) names = [] enc_outputs = [] for wav_path in tqdm(wavs): name = wav_path.split('/')[-1].split('.')[0] s_id = name.split('_')[0] u_id = name.split('_')[1] if s_id != target: continue y, sr = librosa.load(wav_path) d = librosa.get_duration(y=y, sr=sr) if d > 25: continue # --> this filter out too long utts, 3523/3533 for V001 and V002 together in the english dataset _, spec = get_spectrograms(wav_path) encodings = encode(spec, trainer, seg_len, save=False) encodings = parse_encodings(encodings) enc_outputs.append(encodings) names.append((s_id, u_id)) # build encodings to character mapping idx = 0 multi2idx = {} print('[Converter] - Building encoding to symbol mapping...') for encodings in tqdm(enc_outputs): for encoding in encodings: if str(encoding) not in multi2idx: multi2idx[str(encoding)] = symbols[idx] idx += 1 print('[Converter] - Number of unique discret units: ', len(multi2idx)) with open(multi2idx_path, 'w') as file: file.write(json.dumps(multi2idx)) result_path = result_path.replace('target', target) print('[Converter] - Writing to meta file...') with open(result_path, 'w') as file: for i, encodings in enumerate(enc_outputs): file.write(str(names[i][0]) + '_' + str(names[i][1] + '|')) for encoding in encodings: file.write(multi2idx[str(encoding)]) file.write('\n')
Example #30
Source File: cqt.py From pumpp with ISC License | 4 votes |
def transform_audio(self, y): '''Compute the HCQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' cqtm, phase = [], [] n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) for h in self.harmonics: C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin * h, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) C, P = magphase(C) if self.log: C = amplitude_to_db(C, ref=np.max) cqtm.append(C) phase.append(P) cqtm = to_dtype(np.asarray(cqtm), self.dtype) phase = np.angle(np.asarray(phase)) dphase = to_dtype(phase_diff(self._index(phase), self.conv), self.dtype) return {'mag': self._index(cqtm), 'dphase': dphase}