Python Examples of pydub.AudioSegment.from

Source File: audio.py From parallel-wavenet-vocoder with MIT License

8 votes

def prepro_audio(source_path, target_path, format=None, sr=None, db=None):
    """
    Read a wav, change sample rate, format, and average decibel and write to target path.
    :param source_path: source wav file path
    :param target_path: target wav file path
    :param sr: sample rate.
    :param format: output audio format.
    :param db: decibel.
    """
    sound = AudioSegment.from_file(source_path, format)
    if sr:
        sound = sound.set_frame_rate(sr)
    if db:
        change_dBFS = db - sound.dBFS
        sound = sound.apply_gain(change_dBFS)
    sound.export(target_path, 'wav')

Source File: data_maker.py From AudioNet with MIT License

7 votes

def makechunks(path):
    folders=glob.glob(path+'*')
    for folder in folders:
      waves = glob.glob(folder+'/'+ '*.wav')
      print ('w',waves)
      if len(waves) == 0:
          return 10
      for i in waves:
          w = i
          myaudio = AudioSegment.from_file(i, 'wav')
          chunk_length_ms = 20000
          chunks = make_chunks(myaudio, chunk_length_ms)
          print (chunks)
          for i, chunk in enumerate(chunks):
              chunk_name = w.split('.')[0] + "chunk{0}.wav".format(i)
              print (chunk_name)
              print ("exporting", chunk_name)
              chunk.export(folder+'/'+chunk_name, format="wav")

Source File: audioBasicIO.py From pyAudioAnalysis with Apache License 2.0

7 votes

def read_audio_generic(input_file):
    """
    Function to read audio files with the following extensions
    [".mp3", ".wav", ".au", ".ogg"]
    """
    sampling_rate = -1
    signal = np.array([])
    try:
        audiofile = AudioSegment.from_file(input_file)
        data = np.array([])
        if audiofile.sample_width == 2:
            data = numpy.fromstring(audiofile._data, numpy.int16)
        elif audiofile.sample_width == 4:
            data = numpy.fromstring(audiofile._data, numpy.int32)

        if data.size > 0:
            sampling_rate = audiofile.frame_rate
            temp_signal = []
            for chn in list(range(audiofile.channels)):
                temp_signal.append(data[chn::audiofile.channels])
            signal = numpy.array(temp_signal).T
    except:
        print("Error: file not found or other I/O error. (DECODING FAILED)")
    return sampling_rate, signal

Source File: testing.py From Jamais-Vu with MIT License

6 votes

def get_length_audio(audiopath, extension):
    """
    Returns length of audio in seconds.
    Returns None if format isn't supported or in case of error.
    """
    try:
        audio = AudioSegment.from_file(audiopath, extension.replace(".", ""))
    except:
        print "Error in get_length_audio(): %s" % traceback.format_exc()
        return None
    return int(len(audio) / 1000.0)

Source File: utils.py From bard with GNU General Public License v3.0

6 votes

def decodeAudio(filething):
    if hasattr(filething, 'seek'):
        filething.seek(0)
        filecontents = filething.read()
        data, properties = bard_audiofile.decode(data=filecontents)
    else:
        data, properties = bard_audiofile.decode(path=filething)

    if config['enable_internal_checks']:
        FILES_PYDUB_CANT_DECODE_RIGHT = \
            ['/mnt/DD8/media/mp3/id13/k3/software_libre-hq.ogg']
        if hasattr(filething, 'seek'):
            filething.seek(0)
        audio_segment = AudioSegment.from_file(filething)
        if (audio_segment.raw_data != data and
                filething not in FILES_PYDUB_CANT_DECODE_RIGHT):
            with open('/tmp/decoded-song-pydub.raw', 'wb') as f:
                f.write(audio_segment.raw_data)
            with open('/tmp/decoded-song-bard_audiofile.raw', 'wb') as f:
                f.write(data)
            raise Exception('DECODED AUDIO IS DIFFERENT BETWEEN '
                            'BARD_AUDIOFILE AND PYDUB')
        print('bard_audiofile/pydub decode check ' +
              TerminalColors.Ok + 'OK' + TerminalColors.ENDC)
    return data, DecodedAudioPropertiesTupleFromDict(properties)

Source File: audio.py From deep-voice-conversion with MIT License

6 votes

def prepro_audio(source_path, target_path, format=None, sr=None, db=None):
    """
    Read a wav, change sample rate, format, and average decibel and write to target path.
    :param source_path: source wav file path
    :param target_path: target wav file path
    :param sr: sample rate.
    :param format: output audio format.
    :param db: decibel.
    """
    sound = AudioSegment.from_file(source_path, format)
    if sr:
        sound = sound.set_frame_rate(sr)
    if db:
        change_dBFS = db - sound.dBFS
        sound = sound.apply_gain(change_dBFS)
    sound.export(target_path, 'wav')

Source File: song.py From bard with GNU General Public License v3.0

6 votes

def calculateSilences(self, threshold=None, min_length=None):
        try:
            audio_segment = AudioSegment.from_file(self.path())
        except PydubException as exc:
            print('Error processing:', self.path(), ':', exc)
            raise
        self._audioSha256sum = calculateSHA256_data(audio_segment.raw_data)
        thr = threshold or Song.silence_threshold
        minlen = min_length or Song.min_silence_length
        silences = detect_silence_at_beginning_and_end(audio_segment,
                                                       min_silence_len=minlen,
                                                       silence_thresh=thr)
        if silences:
            silence1, silence2 = silences
            self._silenceAtStart = (silence1[1] - silence1[0]) / 1000
            self._silenceAtEnd = (silence2[1] - silence2[0]) / 1000

Source File: VinylBackgroundSpectrogramGenerator.py From crnn-lid with GNU General Public License v3.0

6 votes

def audioToSpectrogram(self, file, pixel_per_sec, height):

        noise_file_index = random.randint(1, len(NOISE_FILES_LENGTH))
        noise_file_name = "vinyl_noise/normalized-noise{}.wav".format(noise_file_index)

        with tempfile.NamedTemporaryFile(suffix='.wav') as noisy_speech_file:

            noise = AudioSegment.from_file(noise_file_name)
            speech = AudioSegment.from_file(file)

            speech.apply_gain(noise.dBFS - speech.dBFS)

            noisy_speech = speech.overlay(noise - 10, loop=True)
            noisy_speech.export(noisy_speech_file.name, format="wav")

            # shutil.copyfile(noisy_speech_file.name, os.path.join("/extra/tom/news2/debug", "mixed_" + os.path.basename(noisy_speech_file.name)))

            with tempfile.NamedTemporaryFile(suffix='.png') as image_file:
                command = "{} -n remix 1 rate 10k spectrogram -y {} -X {} -m -r -o {}". format(noisy_speech_file.name, height, pixel_per_sec, image_file.name)
                sox.core.sox([command])

                # spectrogram can be inspected at image_file.name
                image = Image.open(image_file.name)

                return np.array(image)

Source File: MusicBackgroundSpectrogramGenerator.py From crnn-lid with GNU General Public License v3.0

6 votes

def audioToSpectrogram(self, file, pixel_per_sec, height):

        noise_file_index = random.randint(1, 190)
        noise_file_name = "/data/tom/backgroundmusic/normalized-background_{}.wav".format(noise_file_index)

        with tempfile.NamedTemporaryFile(suffix='.wav') as noisy_speech_file:

            noise = AudioSegment.from_file(noise_file_name)
            speech = AudioSegment.from_file(file)

            #speech.apply_gain(noise.dBFS - speech.dBFS)

            noisy_speech = speech.overlay(noise - 5, loop=True)
            noisy_speech.export(noisy_speech_file.name, format="wav")

            # shutil.copyfile(noisy_speech_file.name, os.path.join("/extra/tom/news2/debug", "mixed_" + os.path.basename(noisy_speech_file.name)))

            with tempfile.NamedTemporaryFile(suffix='.png') as image_file:
                command = "{} -n remix 1 rate 10k spectrogram -y {} -X {} -m -r -o {}". format(noisy_speech_file.name, height, pixel_per_sec, image_file.name)
                sox.core.sox([command])

                # spectrogram can be inspected at image_file.name
                image = Image.open(image_file.name)

                return np.array(image)

Source File: fft_analyser.py From code-jam-5 with MIT License

6 votes

def reset_media(self):
        """Resets the media to the currently playing song."""
        audio_file = self.player.currentMedia().canonicalUrl().path()
        if os.name == 'nt' and audio_file.startswith('/'):
            audio_file = audio_file[1:]
        if audio_file:
            try:
                self.song = AudioSegment.from_file(audio_file).set_channels(1)
            except PermissionError:
                self.start_animate = False
            else:
                self.samples = np.array(self.song.get_array_of_samples())

                self.max_sample = self.samples.max()
                self.maximum_amp = self.max_sample // 4
                self.points = np.zeros(self.resolution)
                self.start_animate = True
        else:
            self.start_animate = False

Source File: prepro.py From voice-vector with MIT License

6 votes

def prepro_audio(source_path, target_path, format=None, sr=None, db=None):
    """
    Read a wav, change sample rate, format, and average decibel and write to target path.
    :param source_path: source wav file path
    :param target_path: target wav file path
    :param sr: sample rate.
    :param format: output audio format.
    :param db: decibel.
    """
    sound = AudioSegment.from_file(source_path, format)
    if sr:
        sound = sound.set_frame_rate(sr)
    if db:
        change_dBFS = db - sound.dBFS
        sound = sound.apply_gain(change_dBFS)
    sound.export(target_path, 'wav')

Source File: audio.py From voice-vector with MIT License

6 votes

def prepro_audio(source_path, target_path, format=None, sr=None, db=None):
    """
    Read a wav, change sample rate, format, and average decibel and write to target path.
    :param source_path: source wav file path
    :param target_path: target wav file path
    :param sr: sample rate.
    :param format: output audio format.
    :param db: decibel.
    """
    sound = AudioSegment.from_file(source_path, format)
    if sr:
        sound = sound.set_frame_rate(sr)
    if db:
        change_dBFS = db - sound.dBFS
        sound = sound.apply_gain(change_dBFS)
    sound.export(target_path, 'wav')

Source File: decoder.py From Jamais-Vu with MIT License

6 votes

def read(filename, limit=None):
    """
    Reads any file supported by pydub (ffmpeg) and returns the data contained
    within. If file reading fails due to input being a 24-bit wav file,
    wavio is used as a backup.

    Can be optionally limited to a certain amount of seconds from the start
    of the file by specifying the `limit` parameter. This is the amount of
    seconds from the start of the file.

    returns: (channels, samplerate)
    """
    # pydub does not support 24-bit wav files, use wavio when this occurs
    try:
        audiofile = AudioSegment.from_file(filename)

        if limit:
            audiofile = audiofile[:limit * 1000]

        data = np.fromstring(audiofile._data, np.int16)

        channels = []
        for chn in xrange(audiofile.channels):
            channels.append(data[chn::audiofile.channels])

        fs = audiofile.frame_rate
    except audioop.error:
        fs, _, audiofile = wavio.readwav(filename)

        if limit:
            audiofile = audiofile[:limit * 1000]

        audiofile = audiofile.T
        audiofile = audiofile.astype(np.int16)

        channels = []
        for chn in audiofile:
            channels.append(chn)

    return channels, audiofile.frame_rate, unique_hash(filename)

Source File: reader_file.py From audio-fingerprint-identifying-python with MIT License

6 votes

def parse_audio(self):
    limit = None
    # limit = 10

    songname, extension = os.path.splitext(os.path.basename(self.filename))

    try:
      audiofile = AudioSegment.from_file(self.filename)

      if limit:
        audiofile = audiofile[:limit * 1000]

      data = np.fromstring(audiofile._data, np.int16)

      channels = []
      for chn in xrange(audiofile.channels):
        channels.append(data[chn::audiofile.channels])

      fs = audiofile.frame_rate
    except audioop.error:
      print('audioop.error')
      pass
        # fs, _, audiofile = wavio.readwav(filename)

        # if limit:
        #     audiofile = audiofile[:limit * 1000]

        # audiofile = audiofile.T
        # audiofile = audiofile.astype(np.int16)

        # channels = []
        # for chn in audiofile:
        #     channels.append(chn)

    return {
      "songname": songname,
      "extension": extension,
      "channels": channels,
      "Fs": audiofile.frame_rate,
      "file_hash": self.parse_file_hash()
    }

Source File: test_accuracy.py From nyumaya_audio_recognition with Apache License 2.0

6 votes

def load_audio_file(filename,resize=False):
	sound = None
	try:
		if filename.endswith('.mp3') or filename.endswith('.MP3'):
			sound = AudioSegment.from_mp3(filename)
		elif filename.endswith('.wav') or filename.endswith('.WAV'):
			sound = AudioSegment.from_wav(filename)
		elif filename.endswith('.ogg'):
			sound = AudioSegment.from_ogg(filename)
		elif filename.endswith('.flac'):
			sound = AudioSegment.from_file(filename, "flac")
		elif filename.endswith('.3gp'):
			sound = AudioSegment.from_file(filename, "3gp")
		elif filename.endswith('.3g'):
			sound = AudioSegment.from_file(filename, "3gp")

		sound = sound.set_frame_rate(samplerate)
		sound = sound.set_channels(1)
		sound = sound.set_sample_width(2)
		duration = sound.duration_seconds
	except:
		print("Couldn't load file")
		return None,None
		
		
	
	return sound,duration

Source File: io.py From auditok with MIT License

5 votes

def get_audio_source(input=None, **kwargs):
    """
    Create and return an AudioSource from input.

    Parameters:

        ´input´ : str, bytes, "-" or None
        Source to read audio data from. If str, it should be a path to a valid
        audio file. If bytes, it is interpreted as raw audio data. if equals to
        "-", raw data will be read from stdin. If None, read audio data from
        microphone using PyAudio.
    """
    if input == "-":
        return StdinAudioSource(*_get_audio_parameters(kwargs))

    if isinstance(input, bytes):
        return BufferAudioSource(input, *_get_audio_parameters(kwargs))

    # read data from a file
    if input is not None:
        return from_file(filename=input, **kwargs)

    # read data from microphone via pyaudio
    else:
        frames_per_buffer = kwargs.get("frames_per_buffer", 1024)
        input_device_index = kwargs.get("input_device_index")
        return PyAudioSource(
            *_get_audio_parameters(kwargs),
            frames_per_buffer=frames_per_buffer,
            input_device_index=input_device_index
        )

Source File: voice.py From voice-corpus-tool with Mozilla Public License 2.0

5 votes

def augment_sample(augmentation):
    index, src_file, dst_file, overlays, gain = augmentation
    orig_seg = AudioSegment.from_file(src_file, format="wav")
    aug_seg = AudioSegment.silent(duration=len(orig_seg))
    for overlay in overlays:
        offset, overlay_file = overlay
        overlay_seg = AudioSegment.from_file(overlay_file, format="wav")
        if offset < 0:
            overlay_seg = overlay_seg[-offset:]
            offset = 0
        aug_seg = aug_seg.overlay(overlay_seg, position=offset)
    aug_seg = aug_seg + (orig_seg.dBFS - aug_seg.dBFS + gain)
    orig_seg = orig_seg.overlay(aug_seg)
    orig_seg.export(dst_file, format="wav")
    return (index, dst_file)

Source File: voice.py From voice-corpus-tool with Mozilla Public License 2.0

5 votes

def _compr(self, kbit):
        def add_compr(s):
            with tempfile.TemporaryFile() as f:
                seg = s.read_audio_segment()
                seg.export(f, format='mp3', bitrate='%dk' % kbit)
                f.seek(0, 0)
                s.write_audio_segment(AudioSegment.from_file(f, format='mp3'))
        self._map('Adding compression artifacts...', self.samples, add_compr, worker_count=1)
        log('Applied compression artifacts to %d samples in buffer.' % len(self.samples))

Source File: voice.py From voice-corpus-tool with Mozilla Public License 2.0

5 votes

def read_audio_segment(self):
        self.write()
        return AudioSegment.from_file(self.file.filename, format="wav")

Source File: io.py From auditok with MIT License

5 votes

def _load_with_pydub(filename, audio_format):
    """Open compressed audio file using pydub. If a video file
    is passed, its audio track(s) are extracted and loaded.
    This function should not be called directely, use :func:`from_file`
    instead.

    :Parameters:

    `filename`:
        path to audio file.
    `audio_format`:
        string, audio file format (e.g. raw, webm, wav, ogg)
    """
    func_dict = {
        "mp3": AudioSegment.from_mp3,
        "ogg": AudioSegment.from_ogg,
        "flv": AudioSegment.from_flv,
    }
    open_function = func_dict.get(audio_format, AudioSegment.from_file)
    segment = open_function(filename)
    return BufferAudioSource(
        data=segment.raw_data,
        sampling_rate=segment.frame_rate,
        sample_width=segment.sample_width,
        channels=segment.channels,
    )

Source File: feature.py From mgc-django with GNU General Public License v2.0

5 votes

def extract(file):
    """
    Extracts audio from a given file
    First the audio is converted into wav format
    """
    s = file.split('.')
    file_format = s[len(s) - 1]

    try:
        song = AudioSegment.from_file(file, file_format)
        #song = AudioSegment.from_mp3(file)
        song =  song[: 30 * 1000 ]
        song.export(file[:-3] + "wav", format="wav")
        file = file[:-3] + "wav"
    except Exception as e:
        print(e)
    try:
        (rate, data) = scipy.io.wavfile.read(file)
        mfcc_feat = mfcc(data,rate)
        #redusing mfcc dimension to 104
        mm = np.transpose(mfcc_feat)
        mf = np.mean(mm,axis=1)
        cf = np.cov(mm)
        ff=mf  

        #ff is a vector of size 104
        for i in range(mm.shape[0]):
            ff = np.append(ff,np.diag(cf,i))
        if file_format != 'wav':
            os.remove(file)
        return ff.reshape(1, -1)
    except Exception as e:
            print(e)

Source File: utils.py From bard with GNU General Public License v3.0

5 votes

def calculateAudioTrackSHA256_pyav(path):
    data, properties = audioSamplesFromAudioFile(path)
    audioSha256sum = calculateSHA256_data(data)
    # print('size:', len(audio_segment.raw_data))
    if config['enable_internal_checks']:
        if hasattr(path, 'seek'):
            path.seek(0)
        audio_segment = AudioSegment.from_file(path)
        pydubAudioSha256sum = calculateSHA256_data(audio_segment.raw_data)
        if audio_segment.raw_data != data or \
                pydubAudioSha256sum != audioSha256sum:
            raise Exception('SHA256sum IS DIFFERENT BETWEEN PYAV AND PYDUB')
        print('pyav/pydub decode check ' +
              TerminalColors.Ok + 'OK' + TerminalColors.ENDC)
    return audioSha256sum, data, properties

Source File: filereader.py From shazam-demo with MIT License

5 votes

def parse_audio(self):
    limit = None

    songname, extension = os.path.splitext(os.path.basename(self.filename))

    try:
      audiofile = AudioSegment.from_file(self.filename)

      if limit:
        audiofile = audiofile[:limit * 1000]

      data = np.fromstring(audiofile._data, np.int16)

      channels = []
      for chn in xrange(audiofile.channels):
        channels.append(data[chn::audiofile.channels])

      fs = audiofile.frame_rate
    except audioop.error:
      print('audioop.error')
      pass

    return {
      "songname": songname,
      "extension": extension,
      "channels": channels,
      "Fs": audiofile.frame_rate,
      "file_hash": self.parse_file_hash()
    }

Source File: utils.py From bard with GNU General Public License v3.0

5 votes

def calculateAudioTrackSHA256_pydub(path):
    audio_segment = AudioSegment.from_file(path)
    audioSha256sum = calculateSHA256_data(audio_segment.raw_data)
    # print('size:', len(audio_segment.raw_data))
    return audioSha256sum

Source File: backup.py From bard with GNU General Public License v3.0

5 votes

def remoteFileAudioSha256Sum(path, sftp):
    data = remoteFile(path, sftp)
    if not data:
        return None
    audio_segment = AudioSegment.from_file(data)
    return calculateSHA256_data(audio_segment.raw_data)

Source File: audiotools.py From kur with Apache License 2.0

5 votes

def get_mime_type(filename):
	""" Returns the MIME type associated with a particular audio file.
	"""
	try:
		import magic
	except ImportError:
		if get_mime_type.warn:
			logger.warning('Python package "magic" could not be loaded, '
				'possibly because system library "libmagic" could not be '
				'found. We are falling back on our own heuristics.')
			get_mime_type.warn = False

		ext = os.path.splitext(filename)[1].lower()
		return {
			'.wav' : 'audio/x-wav',
			'.mp3' : 'audio/mpeg',
			'.flac' : 'audio/x-flac'
		}.get(ext, 'unknown')
	else:
		# Read off magic numbers and return MIME types
		mime_magic = magic.Magic(mime=True)
		ftype = mime_magic.from_file(filename)
		if isinstance(ftype, bytes):
			ftype = ftype.decode('utf-8')

		# If we are dealing with a symlink, read the link
		# and try again with the target file.  We do this in 
		# a while loop to cover the case of symlinks which
		# point to other symlinks
		current_filename = filename
		while ftype == 'inode/symlink':
			current_filename = os.readlink(current_filename)
			ftype = mime_magic.from_file(current_filename)
			ftype = ftype.decode('utf-8') if isinstance(ftype, bytes) else ftype
			
		return ftype

Source File: audiotools.py From kur with Apache License 2.0

5 votes

def load_pydub(filename):
	""" Loads an MP3 or FLAC file.
	"""
	try:
		from pydub import AudioSegment
		data = AudioSegment.from_file(filename)
	except ImportError:
		logger.exception('"pydub" is a required Python dependency for '
			'handling this audio file: %s.', filename)
		raise
	except FileNotFoundError:
		if os.path.isfile(filename):
			raise DependencyError()
		else:
			raise

	if data.channels > 1:
		data = functools.reduce(
			lambda x, y: x.overlay(y),
			data.split_to_mono()
		)

	raw = data.get_array_of_samples()
	raw = numpy.frombuffer(raw, dtype=raw.typecode)

	return {
		'signal' : raw,
		'sample_rate' : data.frame_rate,
		'sample_width' : data.sample_width * 8,
		'channels' : data.channels
	}

###############################################################################

Source File: audio.py From Speech_emotion_recognition_BLSTM with MIT License

5 votes

def extract_audio_track(self):
        for video in glob.glob(self._video_path + '*.mp4'):
            wav_filename = self._out_path + os.path.splitext(os.path.basename(video))[0] + '.wav'
            AudioSegment.from_file(video).export(wav_filename, format='wav')

Source File: pydub_utils.py From nnabla with Apache License 2.0

5 votes

def auread(path, channel_first=False, raw_format_param=None):
    """
    Read audio with pydub module.

    Args:
        path (str or 'file object'): File path or object to read from.
            Currently only support .wav format audio, .raw format audio could be read only when
            additional params are provided.
        channel_first (bool):
            This argument specifies the shape of audio is whether (samples, channels) or (channels, samples).
            Default value is False, which means the audio shape shall be (samples, channels).
        raw_format_param(object):
            If audio is raw format, user should provide this object, example:
            { 'sample_width': 2, 'channels': 2, 'frame_rate': 44100 }

    Returns:
         numpy.ndarray
    """

    _auread_before(path, raw_format_param)

    filepath = path if isinstance(path, str) else path.name
    audio_format = os.path.splitext(filepath)[-1][1:]
    if audio_format == 'raw':
        audio = AudioSegment.from_file(
            path, format=audio_format, **raw_format_param)
    else:
        audio = AudioSegment.from_file(path, format=audio_format)

    audio_arr = get_nparray_from_pydub(audio)
    if audio_arr.dtype.itemsize == 1 and audio_format == 'wav':
        # 8-bit wav file value should in uint8 range, but pydub read it as int8
        audio_arr = audio_arr.astype(np.uint8)

    return _auread_after(audio_arr, channel_first)

Source File: utils.py From fake-voice-detection with Apache License 2.0

5 votes

def convert_to_flac(dir_path):
    for file_path in os.listdir(dir_path):
        if file_path.split('.')[-1] != "flac":
            read_file = AudioSegment.from_file(os.path.join(dir_path,file_path), file_path.split('.')[-1])
            os.remove(os.path.join(dir_path,file_path))
            base_name = file_path.split('.')[:-1]
            # read_file = read_file.set_channels(8)
            # base_name = ".".join(base_name)
            read_file.export(os.path.join(dir_path,f"{base_name[0]}.flac"), format="flac")

Python pydub.AudioSegment.from_file() Examples