Source code for vak.transforms.functional

import numpy as np
import torch

__all__ = [
    "pad_to_window",
    "standardize_spect",
    "to_floattensor",
    "to_longtensor",
    "view_as_window_batch",
]


[docs] def standardize_spect(spect, mean_freqs, std_freqs, non_zero_std): """standardize spectrogram by subtracting off mean and dividing by standard deviation. Parameters ---------- spect : numpy.ndarray with shape (frequencies, time bins) mean_freqs : numpy.ndarray vector of mean values for each frequency bin across the fit set of spectrograms std_freqs : numpy.ndarray vector of standard deviations for each frequency bin across the fit set of spectrograms non_zero_std : numpy.ndarray boolean, indicates where std_freqs has non-zero values. Used to avoid divide-by-zero errors. Returns ------- transformed : numpy.ndarray with same shape as spect but with (approximately) zero mean and unit standard deviation (mean and standard devation will still vary by batch). """ tfm = spect - mean_freqs[:, np.newaxis] # need axis for broadcasting # keep any stds that are zero from causing NaNs tfm[non_zero_std, :] = ( tfm[non_zero_std, :] / std_freqs[non_zero_std, np.newaxis] ) return tfm
[docs] def pad_to_window(arr, window_size, padval=0.0, return_padding_mask=True): """pad a 1d or 2d array so that it can be reshaped into consecutive windows of specified size Parameters ---------- arr : numpy.ndarray with 1 or 2 dimensions, e.g. a vector of labeled timebins or a spectrogram. window_size : int width of window in number of elements. padval : float value to pad with. Added to end of array, the "right side" if 2-dimensional. return_padding_mask : bool if True, return a boolean vector to use for cropping back down to size before padding. padding_mask has size equal to width of padded array, i.e. original size plus padding at the end, and has values of 1 where columns in padded are from the original array, and values of 0 where columns were added for padding. Returns ------- padded : numpy.ndarray Padded with ``padval`` padding_mask : numpy.ndarray Boolean vector with size equal to width of padded, i.e. original size plus padding at the end. Has values of ``True`` where columns in ``padded`` are from the original array, and values of ``False`` where columns were added for padding. Only returned if ``return_padding_mask`` is ``True``. """ if arr.ndim == 1: width = arr.shape[0] elif arr.ndim == 2: height, width = arr.shape else: raise ValueError( f"input array must be 1d or 2d but number of dimensions was: {arr.ndim}" ) target_width = int(np.ceil(width / window_size) * window_size) if arr.ndim == 1: padded = np.ones((target_width,)) * padval padded[:width] = arr elif arr.ndim == 2: padded = np.ones((height, target_width)) * padval padded[:, :width] = arr if return_padding_mask: padding_mask = np.zeros((target_width,), dtype=bool) padding_mask[:width] = True return padded, padding_mask else: return padded
[docs] def view_as_window_batch(arr, window_width): """return view of a 1d or 2d array as a batch of non-overlapping windows Parameters ---------- arr : numpy.ndarray with 1 or 2 dimensions, e.g. a vector of labeled timebins or a 2-d array representing a spectrogram. If the array has 2-d dimensions, the returned array will have dimensions (batch, height of array, window width) window_width : int width of window in number of elements. Returns ------- batch_windows : numpy.ndarray with shape (batch size, window_size) if array is 1d, or with shape (batch size, height, window_size) if array is 2d. Batch size will be arr.shape[-1] // window_width. Window width must divide arr.shape[-1] evenly. To pad the array so it can be divided into windows of the specified width, use the `pad_to_window` transform Notes ----- adapted from skimage.util.view_as_blocks https://github.com/scikit-image/scikit-image/blob/f1b7cf60fb80822849129cb76269b75b8ef18db1/skimage/util/shape.py#L9 """ if not isinstance(window_width, int) or window_width < 1: raise ValueError( f"`window_width` must be a positive integer, but was: {window_width}" ) if arr.ndim == 1: window_shape = (window_width,) elif arr.ndim == 2: height, _ = arr.shape window_shape = (height, window_width) else: raise ValueError( f"input array must be 1d or 2d but number of dimensions was: {arr.ndim}" ) window_shape = np.array(window_shape) arr_shape = np.array(arr.shape) if (arr_shape % window_shape).sum() != 0: raise ValueError( "'window_width' does not divide evenly into with 'arr' shape. " "Use 'pad_to_window' transform to pad array so it can be windowed." ) new_shape = tuple(arr_shape // window_shape) + tuple(window_shape) new_strides = tuple(arr.strides * window_shape) + arr.strides batch_windows = np.lib.stride_tricks.as_strided( arr, shape=new_shape, strides=new_strides ) # TODO: figure out if there's a better way to do this where we don't need to squeeze # The current version always add an initial dim of size 1 batch_windows = np.squeeze(batch_windows, axis=0) # By squeezing just that first axis, we always end up with (batch, freq. bins, time bins) for a spectrogram return batch_windows
[docs] def to_floattensor(arr): """convert Numpy array to torch.FloatTensor. Parameters ---------- arr : numpy.ndarray Returns ------- float_tensor with dtype 'float32' """ return torch.from_numpy(arr).float()
[docs] def to_longtensor(arr): """convert Numpy array to torch.LongTensor. Parameters ---------- arr : numpy.ndarray Returns ------- long_tensor : torch.Tensor with dtype 'float64' """ return torch.from_numpy(arr).long()
[docs] def add_channel(input, channel_dim=0): """Add a "channel" dimension to a tensor. Transform that makes it easy to treat a spectrogram as an image, by adding a dimension with a single 'channel', analogous to grayscale. In this way the tensor can be fed to e.g. convolutional layers. Parameters ---------- input : torch.Tensor channel_dim : int dimension where "channel" is added. Default is 0. """ return torch.unsqueeze(input, dim=channel_dim)