Python vggish_slim.define_vggish_slim() Examples
The following are 3
code examples of vggish_slim.define_vggish_slim().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
vggish_slim
, or try the search function
.
Example #1
Source File: audio_feature_extractor.py From Tensorflow-Audio-Classification with Apache License 2.0 | 6 votes |
def __init__(self, checkpoint, pca_params, input_tensor_name, output_tensor_name): """Create a new Graph and a new Session for every VGGishExtractor object.""" super(VGGishExtractor, self).__init__() self.graph = tf.Graph() with self.graph.as_default(): vggish_slim.define_vggish_slim(training=False) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True self.sess = tf.Session(graph=self.graph, config=sess_config) vggish_slim.load_defined_vggish_slim_checkpoint(self.sess, checkpoint) # use the self.sess to init others self.input_tensor = self.graph.get_tensor_by_name(input_tensor_name) self.output_tensor = self.graph.get_tensor_by_name(output_tensor_name) # postprocessor self.postprocess = vggish_postprocess.Postprocessor(pca_params)
Example #2
Source File: audio_transfer_learning.py From sklearn-audio-transfer-learning with ISC License | 5 votes |
def extract_vggish_features(paths, path2gt, model): """Extracts VGGish features and their corresponding ground_truth and identifiers (the path). VGGish features are extracted from non-overlapping audio patches of 0.96 seconds, where each audio patch covers 64 mel bands and 96 frames of 10 ms each. We repeat ground_truth and identifiers to fit the number of extracted VGGish features. """ # 1) Extract log-mel spectrograms first_audio = True for p in paths: if first_audio: input_data = vggish_input.wavfile_to_examples(config['audio_folder'] + p) ground_truth = np.repeat(path2gt[p], input_data.shape[0], axis=0) identifiers = np.repeat(p, input_data.shape[0], axis=0) first_audio = False else: tmp_in = vggish_input.wavfile_to_examples(config['audio_folder'] + p) input_data = np.concatenate((input_data, tmp_in), axis=0) tmp_gt = np.repeat(path2gt[p], tmp_in.shape[0], axis=0) ground_truth = np.concatenate((ground_truth, tmp_gt), axis=0) tmp_id = np.repeat(p, tmp_in.shape[0], axis=0) identifiers = np.concatenate((identifiers, tmp_id), axis=0) # 2) Load Tensorflow model to extract VGGish features with tf.Graph().as_default(), tf.Session() as sess: vggish_slim.define_vggish_slim(training=False) vggish_slim.load_vggish_slim_checkpoint(sess, 'vggish_model.ckpt') features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME) embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME) extracted_feat = sess.run([embedding_tensor], feed_dict={features_tensor: input_data}) feature = np.squeeze(np.asarray(extracted_feat)) return [feature, ground_truth, identifiers]
Example #3
Source File: extract_audioset_embedding.py From audioset_classification with MIT License | 4 votes |
def extract_audioset_embedding(): """Extract log mel spectrogram features. """ # Arguments & parameters mel_bins = vggish_params.NUM_BANDS sample_rate = vggish_params.SAMPLE_RATE input_len = vggish_params.NUM_FRAMES embedding_size = vggish_params.EMBEDDING_SIZE '''You may modify the EXAMPLE_HOP_SECONDS in vggish_params.py to change the hop size. ''' # Paths audio_path = 'appendixes/01.wav' checkpoint_path = os.path.join('vggish_model.ckpt') pcm_params_path = os.path.join('vggish_pca_params.npz') if not os.path.isfile(checkpoint_path): raise Exception('Please download vggish_model.ckpt from ' 'https://storage.googleapis.com/audioset/vggish_model.ckpt ' 'and put it in the root of this codebase. ') if not os.path.isfile(pcm_params_path): raise Exception('Please download pcm_params_path from ' 'https://storage.googleapis.com/audioset/vggish_pca_params.npz ' 'and put it in the root of this codebase. ') # Load model sess = tf.Session() vggish_slim.define_vggish_slim(training=False) vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path) features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME) embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME) pproc = vggish_postprocess.Postprocessor(pcm_params_path) # Read audio (audio, _) = read_audio(audio_path, target_fs=sample_rate) # Extract log mel feature logmel = vggish_input.waveform_to_examples(audio, sample_rate) # Extract embedding feature [embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: logmel}) # PCA postprocessed_batch = pproc.postprocess(embedding_batch) print('Audio length: {}'.format(len(audio))) print('Log mel shape: {}'.format(logmel.shape)) print('Embedding feature shape: {}'.format(postprocessed_batch.shape))