Python torch.hamming_window() Examples
The following are 22
code examples of torch.hamming_window().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
Example #1
Source File: kaldi.py From audio with BSD 2-Clause "Simplified" License | 6 votes |
def _feature_window_function(window_type: str, window_size: int, blackman_coeff: float, device: torch.device, dtype: int, ) -> Tensor: r"""Returns a window function with the given type and size """ if window_type == HANNING: return torch.hann_window(window_size, periodic=False, device=device, dtype=dtype) elif window_type == HAMMING: return torch.hamming_window(window_size, periodic=False, alpha=0.54, beta=0.46, device=device, dtype=dtype) elif window_type == POVEY: # like hanning but goes to zero at edges return torch.hann_window(window_size, periodic=False, device=device, dtype=dtype).pow(0.85) elif window_type == RECTANGULAR: return torch.ones(window_size, device=device, dtype=dtype) elif window_type == BLACKMAN: a = 2 * math.pi / (window_size - 1) window_function = torch.arange(window_size, device=device, dtype=dtype) # can't use torch.blackman_window as they use different coefficients return (blackman_coeff - 0.5 * torch.cos(a * window_function) + (0.5 - blackman_coeff) * torch.cos(2 * a * window_function)).to(device=device, dtype=dtype) else: raise Exception('Invalid window type ' + window_type)
Example #2
Source File: audio_preprocessing.py From NeMo with Apache License 2.0 | 6 votes |
def __init__(self, win_length, hop_length): super().__init__() self.win_length = win_length self.hop_length = hop_length self.disable_casts = self._opt_level == Optimization.mxprO1 self.torch_windows = { 'hann': torch.hann_window, 'hamming': torch.hamming_window, 'blackman': torch.blackman_window, 'bartlett': torch.bartlett_window, 'ones': torch.ones, None: torch.ones, }
Example #3
Source File: modules.py From ddsp_pytorch with GNU General Public License v3.0 | 6 votes |
def get_window(name, window_length, squared=False): """ Returns a windowing function. Arguments: ---------- window (str) : name of the window, currently only 'hann' is available window_length (int) : length of the window squared (bool) : if true, square the window Returns: ---------- torch.FloatTensor : window of size `window_length` """ if name == "hann": window = torch.hann_window(window_length) elif name == "hamming": window = torch.hamming_window(window_length) elif name == "blackman": window = torch.blackman_window(window_length) else: raise ValueError("Invalid window name {}".format(name)) if squared: window *= window return window
Example #4
Source File: test_transforms.py From audio with BSD 2-Clause "Simplified" License | 5 votes |
def test_mel2(self): top_db = 80. s2db = transforms.AmplitudeToDB('power', top_db) waveform = self.waveform.clone() # (1, 16000) waveform_scaled = self.scale(waveform) # (1, 16000) mel_transform = transforms.MelSpectrogram() # check defaults spectrogram_torch = s2db(mel_transform(waveform_scaled)) # (1, 128, 321) self.assertTrue(spectrogram_torch.dim() == 3) self.assertTrue(spectrogram_torch.ge(spectrogram_torch.max() - top_db).all()) self.assertEqual(spectrogram_torch.size(1), mel_transform.n_mels) # check correctness of filterbank conversion matrix self.assertTrue(mel_transform.mel_scale.fb.sum(1).le(1.).all()) self.assertTrue(mel_transform.mel_scale.fb.sum(1).ge(0.).all()) # check options kwargs = {'window_fn': torch.hamming_window, 'pad': 10, 'win_length': 500, 'hop_length': 125, 'n_fft': 800, 'n_mels': 50} mel_transform2 = transforms.MelSpectrogram(**kwargs) spectrogram2_torch = s2db(mel_transform2(waveform_scaled)) # (1, 50, 513) self.assertTrue(spectrogram2_torch.dim() == 3) self.assertTrue(spectrogram_torch.ge(spectrogram_torch.max() - top_db).all()) self.assertEqual(spectrogram2_torch.size(1), mel_transform2.n_mels) self.assertTrue(mel_transform2.mel_scale.fb.sum(1).le(1.).all()) self.assertTrue(mel_transform2.mel_scale.fb.sum(1).ge(0.).all()) # check on multi-channel audio filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav') x_stereo, sr_stereo = torchaudio.load(filepath) # (2, 278756), 44100 spectrogram_stereo = s2db(mel_transform(x_stereo)) # (2, 128, 1394) self.assertTrue(spectrogram_stereo.dim() == 3) self.assertTrue(spectrogram_stereo.size(0) == 2) self.assertTrue(spectrogram_torch.ge(spectrogram_torch.max() - top_db).all()) self.assertEqual(spectrogram_stereo.size(1), mel_transform.n_mels) # check filterbank matrix creation fb_matrix_transform = transforms.MelScale( n_mels=100, sample_rate=16000, f_min=0., f_max=None, n_stft=400) self.assertTrue(fb_matrix_transform.fb.sum(1).le(1.).all()) self.assertTrue(fb_matrix_transform.fb.sum(1).ge(0.).all()) self.assertEqual(fb_matrix_transform.fb.size(), (400, 100))
Example #5
Source File: functional_cpu_test.py From audio with BSD 2-Clause "Simplified" License | 5 votes |
def test_istft_is_inverse_of_stft3(self): # hamming_window, centered, normalized, not onesided kwargs3 = { 'n_fft': 15, 'hop_length': 3, 'win_length': 11, 'window': torch.hamming_window(11), 'center': True, 'pad_mode': 'constant', 'normalized': True, 'onesided': False, } _test_istft_is_inverse_of_stft(kwargs3)
Example #6
Source File: functional_cpu_test.py From audio with BSD 2-Clause "Simplified" License | 5 votes |
def test_istft_is_inverse_of_stft4(self): # hamming_window, not centered, not normalized, onesided # window same size as n_fft kwargs4 = { 'n_fft': 5, 'hop_length': 2, 'win_length': 5, 'window': torch.hamming_window(5), 'center': False, 'pad_mode': 'constant', 'normalized': False, 'onesided': True, } _test_istft_is_inverse_of_stft(kwargs4)
Example #7
Source File: functional_cpu_test.py From audio with BSD 2-Clause "Simplified" License | 5 votes |
def test_istft_is_inverse_of_stft5(self): # hamming_window, not centered, not normalized, not onesided # window same size as n_fft kwargs5 = { 'n_fft': 3, 'hop_length': 2, 'win_length': 3, 'window': torch.hamming_window(3), 'center': False, 'pad_mode': 'reflect', 'normalized': False, 'onesided': False, } _test_istft_is_inverse_of_stft(kwargs5)
Example #8
Source File: functional_cpu_test.py From audio with BSD 2-Clause "Simplified" License | 5 votes |
def test_linearity_of_istft4(self): # hamming_window, not centered, not normalized, onesided kwargs4 = { 'n_fft': 12, 'window': torch.hamming_window(12), 'center': False, 'pad_mode': 'constant', 'normalized': False, 'onesided': True, } data_size = (2, 7, 3, 2) self._test_linearity_of_istft(data_size, kwargs4, atol=1e-5, rtol=1e-8)
Example #9
Source File: features.py From training with Apache License 2.0 | 5 votes |
def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01, n_fft=None, window="hamming", normalize="per_feature", log=True, center=True, dither=constant, pad_to=8, max_duration=16.7, frame_splicing=1): super(SpectrogramFeatures, self).__init__() torch_windows = { 'hann': torch.hann_window, 'hamming': torch.hamming_window, 'blackman': torch.blackman_window, 'bartlett': torch.bartlett_window, 'none': None, } self.win_length = int(sample_rate * window_size) self.hop_length = int(sample_rate * window_stride) self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length)) window_fn = torch_windows.get(window, None) window_tensor = window_fn(self.win_length, periodic=False) if window_fn else None self.window = window_tensor self.normalize = normalize self.log = log self.center = center self.dither = dither self.pad_to = pad_to self.frame_splicing = frame_splicing max_length = 1 + math.ceil( (max_duration * sample_rate - self.win_length) / self.hop_length ) max_pad = 16 - (max_length % 16) self.max_length = max_length + max_pad
Example #10
Source File: feature.py From KoSpeech with Apache License 2.0 | 5 votes |
def __call__(self, signal): spectrogram = torch.stft( torch.FloatTensor(signal), self.n_fft, hop_length=self.hop_length, win_length=self.n_fft, window=torch.hamming_window(self.n_fft), center=False, normalized=False, onesided=True ) spectrogram = (spectrogram[:, :, 0].pow(2) + spectrogram[:, :, 1].pow(2)).pow(0.5) spectrogram = np.log1p(spectrogram.numpy()) return spectrogram
Example #11
Source File: features.py From training with Apache License 2.0 | 4 votes |
def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01, window="hamming", normalize="per_feature", n_fft=None, preemph=0.97, nfilt=64, lowfreq=0, highfreq=None, log=True, dither=constant, pad_to=8, max_duration=16.7, frame_splicing=1): super(FilterbankFeatures, self).__init__() # print("PADDING: {}".format(pad_to)) torch_windows = { 'hann': torch.hann_window, 'hamming': torch.hamming_window, 'blackman': torch.blackman_window, 'bartlett': torch.bartlett_window, 'none': None, } self.win_length = int(sample_rate * window_size) # frame size self.hop_length = int(sample_rate * window_stride) self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length)) self.normalize = normalize self.log = log self.dither = dither self.frame_splicing = frame_splicing self.nfilt = nfilt self.preemph = preemph self.pad_to = pad_to highfreq = highfreq or sample_rate / 2 window_fn = torch_windows.get(window, None) window_tensor = window_fn(self.win_length, periodic=False) if window_fn else None filterbanks = torch.tensor( librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq), dtype=torch.float).unsqueeze(0) # self.fb = filterbanks # self.window = window_tensor self.register_buffer("fb", filterbanks) self.register_buffer("window", window_tensor) # Calculate maximum sequence length (# frames) max_length = 1 + math.ceil( (max_duration * sample_rate - self.win_length) / self.hop_length ) max_pad = 16 - (max_length % 16) self.max_length = max_length + max_pad
Example #12
Source File: dnn_models.py From SincNet with MIT License | 4 votes |
def __init__(self, out_channels, kernel_size, sample_rate=16000, in_channels=1, stride=1, padding=0, dilation=1, bias=False, groups=1, min_low_hz=50, min_band_hz=50): super(SincConv_fast,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, (self.kernel_size/2)-1, steps=int((self.kernel_size/2))) # computing only half of the window self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (1, kernel_size/2) n = (self.kernel_size - 1) / 2.0 self.n_ = 2*math.pi*torch.arange(-n, 0).view(1, -1) / self.sample_rate # Due to symmetry, I only need half of the time axes
Example #13
Source File: features.py From inference with Apache License 2.0 | 4 votes |
def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01, window="hamming", normalize="per_feature", n_fft=None, preemph=0.97, nfilt=64, lowfreq=0, highfreq=None, log=True, dither=constant, pad_to=8, max_duration=16.7, frame_splicing=1): super(FilterbankFeatures, self).__init__() # print("PADDING: {}".format(pad_to)) torch_windows = { 'hann': torch.hann_window, 'hamming': torch.hamming_window, 'blackman': torch.blackman_window, 'bartlett': torch.bartlett_window, 'none': None, } self.win_length = int(sample_rate * window_size) # frame size self.hop_length = int(sample_rate * window_stride) self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length)) self.normalize = normalize self.log = log self.dither = dither self.frame_splicing = frame_splicing self.nfilt = nfilt self.preemph = preemph self.pad_to = pad_to # For now, always enable this. # See https://docs.google.com/presentation/d/1IVC3J-pHB-ipJpKsJox_SqmDHYdkIaoCXTbKmJmV2-I/edit?usp=sharing for elaboration self.use_deterministic_dithering = True highfreq = highfreq or sample_rate / 2 window_fn = torch_windows.get(window, None) window_tensor = window_fn(self.win_length, periodic=False) if window_fn else None filterbanks = torch.tensor( librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq), dtype=torch.float).unsqueeze(0) # self.fb = filterbanks # self.window = window_tensor self.register_buffer("fb", filterbanks) self.register_buffer("window", window_tensor) # Calculate maximum sequence length (# frames) max_length = 1 + math.ceil( (max_duration * sample_rate - self.win_length) / self.hop_length ) max_pad = 16 - (max_length % 16) self.max_length = max_length + max_pad
Example #14
Source File: neural_networks.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv_fast,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, (self.kernel_size/2)-1, steps=int((self.kernel_size/2))) # computing only half of the window self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2.0 self.n_ = 2*math.pi*torch.arange(-n, 0).view(1, -1) / self.sample_rate # Due to symmetry, I only need half of the time axes
Example #15
Source File: neural_networks.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) / self.sample_rate # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, self.kernel_size, steps=self.kernel_size) self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2 self.n_ = torch.arange(-n, n+1).view(1, -1) / self.sample_rate
Example #16
Source File: neural_networks.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv_fast,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, (self.kernel_size/2)-1, steps=int((self.kernel_size/2))) # computing only half of the window self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2.0 self.n_ = 2*math.pi*torch.arange(-n, 0).view(1, -1) / self.sample_rate # Due to symmetry, I only need half of the time axes
Example #17
Source File: neural_networks.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) / self.sample_rate # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, self.kernel_size, steps=self.kernel_size) self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2 self.n_ = torch.arange(-n, n+1).view(1, -1) / self.sample_rate
Example #18
Source File: neural_networks.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv_fast,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, (self.kernel_size/2)-1, steps=int((self.kernel_size/2))) # computing only half of the window self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2.0 self.n_ = 2*math.pi*torch.arange(-n, 0).view(1, -1) / self.sample_rate # Due to symmetry, I only need half of the time axes
Example #19
Source File: neural_networks.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) / self.sample_rate # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, self.kernel_size, steps=self.kernel_size) self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2 self.n_ = torch.arange(-n, n+1).view(1, -1) / self.sample_rate
Example #20
Source File: modules.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding='VALID', pad_mode='reflect', dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv_fast,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.pad_mode = pad_mode self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, (self.kernel_size/2)-1, steps=int((self.kernel_size/2))) # computing only half of the window self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2.0 self.n_ = 2*math.pi*torch.arange(-n, 0).view(1, -1) / self.sample_rate # Due to symmetry, I only need half of the time axes
Example #21
Source File: neural_networks.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv_fast,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, (self.kernel_size/2)-1, steps=int((self.kernel_size/2))) # computing only half of the window self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2.0 self.n_ = 2*math.pi*torch.arange(-n, 0).view(1, -1) / self.sample_rate # Due to symmetry, I only need half of the time axes
Example #22
Source File: neural_networks.py From pase with MIT License | 4 votes |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=False, groups=1, sample_rate=16000, min_low_hz=50, min_band_hz=50): super(SincConv,self).__init__() if in_channels != 1: #msg = (f'SincConv only support one input channel ' # f'(here, in_channels = {in_channels:d}).') msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels) raise ValueError(msg) self.out_channels = out_channels self.kernel_size = kernel_size # Forcing the filters to be odd (i.e, perfectly symmetrics) if kernel_size%2==0: self.kernel_size=self.kernel_size+1 self.stride = stride self.padding = padding self.dilation = dilation if bias: raise ValueError('SincConv does not support bias.') if groups > 1: raise ValueError('SincConv does not support groups.') self.sample_rate = sample_rate self.min_low_hz = min_low_hz self.min_band_hz = min_band_hz # initialize filterbanks such that they are equally spaced in Mel scale low_hz = 30 high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz) mel = np.linspace(self.to_mel(low_hz), self.to_mel(high_hz), self.out_channels + 1) hz = self.to_hz(mel) / self.sample_rate # filter lower frequency (out_channels, 1) self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1)) # filter frequency band (out_channels, 1) self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1)) # Hamming window #self.window_ = torch.hamming_window(self.kernel_size) n_lin=torch.linspace(0, self.kernel_size, steps=self.kernel_size) self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size); # (kernel_size, 1) n = (self.kernel_size - 1) / 2 self.n_ = torch.arange(-n, n+1).view(1, -1) / self.sample_rate