Python librosa.time_to_frames() Examples
The following are 17
code examples of librosa.time_to_frames().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: base.py From pumpp with ISC License | 6 votes |
def n_frames(self, duration): '''Get the number of frames for a given duration Parameters ---------- duration : number >= 0 The duration, in seconds Returns ------- n_frames : int >= 0 The number of frames at this extractor's sampling rate and hop length ''' return int(time_to_frames(duration, sr=self.sr, hop_length=self.hop_length))
Example #2
Source File: audio.py From amen with BSD 2-Clause "Simplified" License | 6 votes |
def _get_beats(self): """ Gets beats using librosa's beat tracker. """ _, beat_frames = librosa.beat.beat_track( y=self.analysis_samples, sr=self.analysis_sample_rate, trim=False ) # pad beat times to full duration f_max = librosa.time_to_frames(self.duration, sr=self.analysis_sample_rate) beat_frames = librosa.util.fix_frames(beat_frames, x_min=0, x_max=f_max) # convert frames to times beat_times = librosa.frames_to_time(beat_frames, sr=self.analysis_sample_rate) # make the list of (start, duration) tuples that TimingList expects starts_durs = [(s, t - s) for (s, t) in zip(beat_times, beat_times[1:])] return starts_durs
Example #3
Source File: tags.py From pumpp with ISC License | 6 votes |
def inverse(self, encoded, duration=None): '''Inverse transformation''' ann = jams.Annotation(namespace=self.namespace, duration=duration) for start, end, value in self.decode_intervals(encoded, duration=duration, transition=self.transition, p_init=self.p_init, p_state=self.p_state): # Map start:end to frames f_start, f_end = time_to_frames([start, end], sr=self.sr, hop_length=self.hop_length) confidence = np.mean(encoded[f_start:f_end+1, value]) value_dec = self.encoder.inverse_transform(np.atleast_2d(value))[0] for vd in value_dec: ann.append(time=start, duration=end-start, value=vd, confidence=confidence) return ann
Example #4
Source File: speech_cls_task.py From delta with Apache License 2.0 | 6 votes |
def feat_output_shape(config): ''' without batch_size''' if 'feature_shape' in config['task']['audio'] and config['task']['audio'][ 'feature_shape']: return config['task']['audio']['feature_shape'] if config['task']['suffix'] == '.npy': input_channels = 3 if config['task']['audio']['add_delta_deltas'] else 1 nframe = librosa.time_to_frames( config['task']['audio']['clip_size'], sr=config['task']['audio']['sr'], hop_length=config['task']['audio']['winstep'] * config['task']['audio']['sr']) feature_shape = [ nframe, config['task']['audio']['feature_size'], input_channels ] else: feature_shape = [ config['task']['audio']['sr'] * config['task']['audio']['clip_size'] ] config['task']['audio']['feature_shape'] = feature_shape return feature_shape
Example #5
Source File: audio.py From Speech_emotion_recognition_BLSTM with MIT License | 6 votes |
def split_vocal(self, y): S_full, phase = librosa.magphase(librosa.stft(y)) # To avoid being biased by local continuity, we constrain similar frames to be # separated by at least 1.2 seconds. S_filter = librosa.decompose.nn_filter(S_full, aggregate=np.median, metric='cosine', width=int(librosa.time_to_frames(self._constrained, sr=self._sr))) S_filter = np.minimum(S_full, S_filter) margin_v = 10 power = 2 mask_v = librosa.util.softmask(S_full - S_filter, margin_v * S_filter, power=power) S_foreground = mask_v * S_full foreground = griffinlim(S_foreground) return foreground
Example #6
Source File: emotion_solver.py From delta with Apache License 2.0 | 5 votes |
def process_config(self, config): ''' preprocess config ''' data_conf = config['data'] class_vocab = data_conf['task']['classes']['vocab'] assert len(class_vocab) == data_conf['task']['classes']['num'] # add revere_vocab, positive_id reverse_vocab = {val: key for key, val in class_vocab.items()} data_conf['task']['classes']['reverse_vocab'] = reverse_vocab # binary class pos_id = config['solver']['metrics']['pos_label'] data_conf['task']['classes']['positive_id'] = pos_id data_conf['task']['classes']['positive'] = reverse_vocab[pos_id] # add feature shape, withoud batch_size if data_conf['task']['suffix'] == '.npy': input_channels = 3 if data_conf['task']['audio']['add_delta_deltas'] else 1 nframe = librosa.time_to_frames( data_conf['task']['audio']['clip_size'], sr=data_conf['task']['audio']['sr'], hop_length=data_conf['task']['audio']['winstep'] * data_conf['task']['audio']['sr']) feature_shape = [ nframe, data_conf['task']['audio']['feature_size'], input_channels ] else: feature_shape = [ data_conf['task']['audio']['sr'] * data_conf['task']['audio']['clip_size'] ] data_conf['task']['audio']['feature_shape'] = feature_shape return config
Example #7
Source File: speaker_solver.py From delta with Apache License 2.0 | 5 votes |
def process_config(self, config): data_conf = config['data'] feature_shape = data_conf['task']['audio'].get('feature_shape', None) if not feature_shape: # add feature shape, withoud batch_size if data_conf['task']['suffix'] == '.npy': input_channels = 3 if data_conf['task']['audio'][ 'add_delta_deltas'] else 1 nframe = librosa.time_to_frames( data_conf['task']['audio']['clip_size'], sr=data_conf['task']['audio']['sr'], hop_length=data_conf['task']['audio']['winstep'] * data_conf['task']['audio']['sr']) feature_shape = [ nframe, data_conf['task']['audio']['feature_size'], input_channels ] else: feature_shape = [ data_conf['task']['audio']['sr'] * data_conf['task']['audio']['clip_size'] ] data_conf['task']['audio']['feature_shape'] = feature_shape logging.info(f"FEATURE SHAPE: {feature_shape}") return config
Example #8
Source File: audio.py From Speech_emotion_recognition_BLSTM with MIT License | 5 votes |
def split_vocal_to_wav(self, filename, fp_foreground, fp_background=None): print(filename.split('/')[-1]) y, sr = librosa.load(filename, sr=self._sr) S_full, phase = librosa.magphase(librosa.stft(y)) # To avoid being biased by local continuity, we constrain similar frames to be # separated by at least 1.2 seconds. S_filter = librosa.decompose.nn_filter(S_full, aggregate=np.median, metric='cosine', width=int(librosa.time_to_frames(self._constrained, sr=self._sr))) S_filter = np.minimum(S_full, S_filter) margin_i, margin_v = 2, 10 power = 2 mask_i = librosa.util.softmask(S_filter, margin_i * (S_full - S_filter), power=power) mask_v = librosa.util.softmask(S_full - S_filter, margin_v * S_filter, power=power) S_foreground = mask_v * S_full S_background = mask_i * S_full foreground = griffinlim(S_foreground) fp_foreground += filename.split('/')[-1] sf.write(fp_foreground, foreground, sr, 'PCM_16') if fp_background is not None: background = griffinlim(S_background) fp_background += filename.split('/')[-1] sf.write(fp_background, background, sr, 'PCM_16')
Example #9
Source File: 02-train.py From crema with BSD 2-Clause "Simplified" License | 5 votes |
def val_sampler(max_duration, pump, seed): '''validation sampler''' n_frames = librosa.time_to_frames(max_duration, sr=pump['cqt'].sr, hop_length=pump['cqt'].hop_length) return pumpp.sampler.VariableLengthSampler(None, 32, n_frames, *pump.ops, random_state=seed)
Example #10
Source File: 02-train.py From crema with BSD 2-Clause "Simplified" License | 5 votes |
def make_sampler(max_samples, duration, pump, seed): '''stochastic training sampler''' n_frames = librosa.time_to_frames(duration, sr=pump['cqt'].sr, hop_length=pump['cqt'].hop_length) return pump.sampler(max_samples, n_frames, random_state=seed)
Example #11
Source File: 02-train.py From crema with BSD 2-Clause "Simplified" License | 5 votes |
def make_sampler(max_samples, duration, pump, seed): n_frames = librosa.time_to_frames(duration, sr=pump['mel'].sr, hop_length=pump['mel'].hop_length)[0] return pump.sampler(max_samples, n_frames, random_state=seed)
Example #12
Source File: base.py From msaf with MIT License | 5 votes |
def read_ann_beats(self): """Reads the annotated beats if available. Returns ------- times: np.array Times of annotated beats in seconds. frames: np.array Frame indeces of annotated beats. """ times, frames = (None, None) # Read annotations if they exist in correct folder if os.path.isfile(self.file_struct.ref_file): try: jam = jams.load(self.file_struct.ref_file) except TypeError: logging.warning( "Can't read JAMS file %s. Maybe it's not " "compatible with current JAMS version?" % self.file_struct.ref_file) return times, frames beat_annot = jam.search(namespace="beat.*") # If beat annotations exist, get times and frames if len(beat_annot) > 0: beats_inters, _ = beat_annot[0].to_interval_values() times = beats_inters[:, 0] frames = librosa.time_to_frames(times, sr=self.sr, hop_length=self.hop_length) return times, frames
Example #13
Source File: key.py From pumpp with ISC License | 5 votes |
def inverse(self, encoded, duration=None): '''Inverse transformation''' ann = jams.Annotation(self.namespace, duration=duration) for start, end, value in self.decode_intervals(encoded, duration=duration, multi=False, sparse=self.sparse, transition=self.transition, p_init=self.p_init, p_state=self.p_state): # Map start:end to frames f_start, f_end = time_to_frames([start, end], sr=self.sr, hop_length=self.hop_length) # Reverse the index if self.sparse: # Compute the confidence if encoded.shape[1] == 1: # This case is for full-confidence prediction (just the index) confidence = 1. else: confidence = np.mean(encoded[f_start:f_end+1, value]) value_dec = self.encoder.inverse_transform(value) else: confidence = np.mean(encoded[f_start:f_end+1, np.argmax(value)]) value_dec = self.encoder.inverse_transform(np.atleast_2d(value)) for vd in value_dec: ann.append(time=start, duration=end-start, value=vd, confidence=float(confidence)) return ann
Example #14
Source File: beat.py From pumpp with ISC License | 5 votes |
def inverse(self, encoded, downbeat=None, duration=None): '''Inverse transformation for beats and optional downbeats''' ann = jams.Annotation(namespace=self.namespace, duration=duration) beat_times = np.asarray([t for t, _ in self.decode_events(encoded, transition=self.beat_transition, p_init=self.beat_p_init, p_state=self.beat_p_state) if _]) beat_frames = time_to_frames(beat_times, sr=self.sr, hop_length=self.hop_length) if downbeat is not None: downbeat_times = set([t for t, _ in self.decode_events(downbeat, transition=self.down_transition, p_init=self.down_p_init, p_state=self.down_p_state) if _]) pickup_beats = len([t for t in beat_times if t < min(downbeat_times)]) else: downbeat_times = set() pickup_beats = 0 value = - pickup_beats - 1 for beat_t, beat_f in zip(beat_times, beat_frames): if beat_t in downbeat_times: value = 1 else: value += 1 confidence = encoded[beat_f] ann.append(time=beat_t, duration=0, value=value, confidence=confidence) return ann
Example #15
Source File: base.py From pumpp with ISC License | 5 votes |
def encode_events(self, duration, events, values, dtype=np.bool): '''Encode labeled events as a time-series matrix. Parameters ---------- duration : number The duration of the track events : ndarray, shape=(n,) Time index of the events values : ndarray, shape=(n, m) Values array. Must have the same first index as `events`. dtype : numpy data type Returns ------- target : ndarray, shape=(n_frames, n_values) ''' frames = time_to_frames(events, sr=self.sr, hop_length=self.hop_length) n_total = int(time_to_frames(duration, sr=self.sr, hop_length=self.hop_length)) n_alloc = n_total if np.any(frames): n_alloc = max(n_total, 1 + int(frames.max())) target = np.empty((n_alloc, values.shape[1]), dtype=dtype) target.fill(fill_value(dtype)) values = values.astype(dtype) for column, event in zip(values, frames): target[event] += column return target[:n_total]
Example #16
Source File: chord.py From pumpp with ISC License | 5 votes |
def inverse(self, encoded, duration=None): '''Inverse transformation''' ann = jams.Annotation(self.namespace, duration=duration) for start, end, value in self.decode_intervals(encoded, duration=duration, multi=False, sparse=self.sparse, transition=self.transition, p_init=self.p_init, p_state=self.p_state): # Map start:end to frames f_start, f_end = time_to_frames([start, end], sr=self.sr, hop_length=self.hop_length) # Reverse the index if self.sparse: # Compute the confidence if encoded.shape[1] == 1: # This case is for full-confidence prediction (just the index) confidence = 1. else: confidence = np.mean(encoded[f_start:f_end+1, value]) value_dec = self.encoder.inverse_transform(value) else: confidence = np.mean(encoded[f_start:f_end+1, np.argmax(value)]) value_dec = self.encoder.inverse_transform(np.atleast_2d(value)) for vd in value_dec: ann.append(time=start, duration=end-start, value=vd, confidence=float(confidence)) return ann
Example #17
Source File: base.py From pumpp with ISC License | 4 votes |
def transform(self, jam, query=None): '''Transform jam object to make data for this task Parameters ---------- jam : jams.JAMS The jams container object query : string, dict, or callable [optional] An optional query to narrow the elements of `jam.annotations` to be considered. If not provided, all annotations are considered. Returns ------- data : dict A dictionary of transformed annotations. All annotations which can be converted to the target namespace will be converted. ''' anns = [] if query: results = jam.search(**query) else: results = jam.annotations # Find annotations that can be coerced to our target namespace for ann in results: try: anns.append(jams.nsconvert.convert(ann, self.namespace)) except jams.NamespaceError: pass duration = jam.file_metadata.duration # If none, make a fake one if not anns: anns = [self.empty(duration)] # Apply transformations results = [] for ann in anns: results.append(self.transform_annotation(ann, duration)) # If the annotation range is None, it spans the entire track if ann.time is None or ann.duration is None: valid = [0, duration] else: valid = [ann.time, ann.time + ann.duration] results[-1]['_valid'] = time_to_frames(valid, sr=self.sr, hop_length=self.hop_length) # Prefix and collect return self.merge(results)