[docs]deffind_audio_fname(spect_path:str|pathlib.Path,audio_ext:str|None=None):"""finds name of audio file in a path to a spectrogram file, if one is present. Checks for any extension that is a valid audio file format and returns path up to and including that extension, i.e. with the spectrogram file extension removed. Parameters ---------- spect_path : str, Path path to a spectrogram file audio_ext : str extension associated with an audio file format, used to find audio file name in spect_path. Default is None. If None, search for any valid audio format (as defined by vak.config.constants.VALID_AUDIO_FORMATS) Returns ------- audio_fname : str name of audio file found in spect_path """ifaudio_extisNone:audio_ext=constants.VALID_AUDIO_FORMATSeliftype(audio_ext)isstr:audio_ext=[audio_ext]else:raiseTypeError(f"invalid type for audio_ext: {type(audio_ext)}")# We force spect_path to be a pathlib.Path and only use name attribute# so we don't have to worry about handling whitespace elsewhere in the pathspect_fname=pathlib.Path(spect_path).nameaudio_fnames=[]forextinaudio_ext:audio_fnames.append(find_fname(spect_fname,ext))# remove Nonesaudio_fnames=[fnameforfnameinaudio_fnamesiffnameisnotNone]iflen(audio_fnames)==1:returnaudio_fnames[0]else:raiseValueError(f"unable to determine filename of audio file from: {spect_path}")
[docs]defload(spect_path:str|pathlib.Path,spect_format:str|None=None):"""load spectrogram and related arrays from a file, return as an object that provides Python dictionary-like access Parameters ---------- spect_path : str, Path to an array file. spect_format : str Valid formats are defined in vak.io.spect.SPECT_FORMAT_LOAD_FUNCTION_MAP. Default is None, in which case the extension of the file is used. Returns ------- spect_dict : dict-like either a dictionary or dictionary-like object that provides access to arrays from the filefrom pathlib import Path via keys, e.g. spect_dict['s'] for the spectrogram. See docstring for vak.audio.to_spect for default keys for spectrogram array files that function creates. """spect_path=pathlib.Path(spect_path)ifspect_formatisNone:# "replace('.', '')", because suffix returns file extension with period includedspect_format=spect_path.suffix.replace(".","")spect_dict=constants.SPECT_FORMAT_LOAD_FUNCTION_MAP[spect_format](spect_path)returnspect_dict
[docs]deftimebin_dur(spect_path:str|pathlib.Path,spect_format:str,timebins_key:str="t",n_decimals_trunc:int=5,):"""get duration of time bins from a spectrogram file Parameters ---------- spect_path: str, Path path to spectrogram file. spect_format : str format of file containing spectrogram. One of {'mat', 'npz'} timebins_key : str key for accessing vector of time bins in files. Default is 't'. n_decimals_trunc : int number of decimal places to keep when truncating the timebin duration calculated from the vector of time bins. Default is 3, i.e. assumes milliseconds is the last significant digit. Returns ------- timebin_dur : float """spect_path=pathlib.Path(spect_path)spect_dict=load(spect_path,spect_format)time_bins=spect_dict[timebins_key]timebin_dur=timebin_dur_from_vec(time_bins,n_decimals_trunc)returntimebin_dur
[docs]defis_valid_set_of_spect_files(spect_paths,spect_format,freqbins_key="f",timebins_key="t",spect_key="s",n_decimals_trunc=5,):"""validate a set of spectrogram files that will be used as a dataset. Validates that: - all files contain a spectrogram array that can be accessed with the specified key - the length of the frequency bin array in each file equals the number of rows in the spectrogram array - the frequency bins are the same across all files - the length of the time bin array in each file equals the number of columns in the spectrogram array - the duration of a spectrogram time bin is the same across all files Parameters ---------- spect_paths: list of strings or pathlib.Path objects; paths to spectrogram files. spect_format : str format of files containing spectrograms. One of {'mat', 'npz'} freqbins_key : str key for accessing vector of frequency bins in files. Default is 'f'. timebins_key : str key for accessing vector of time bins in files. Default is 't'. spect_key : str key for accessing spectrogram in files. Default is 's'. n_decimals_trunc : int number of decimal places to keep when truncating the timebin duration calculated from the vector of time bins. Default is 3, i.e. assumes milliseconds is the last significant digit. Other Parameters ---------------- logger : logging.Logger instance created by vak.logging.get_logger. Default is None. Returns ------- returns True if all validation checks pass. If not, an error is raised. """spect_paths=[pathlib.Path(spect_path)forspect_pathinspect_paths]def_validate(spect_path):"""validates each spectrogram file, then returns frequency bin array and duration of time bins, so that those can be validated across all files """spect_dict=load(spect_path,spect_format)ifspect_keynotinspect_dict:raiseKeyError(f"Did not find a spectrogram in file '{spect_path.name}' "f"using spect_key '{spect_key}'.")freq_bins=spect_dict[freqbins_key]time_bins=spect_dict[timebins_key]timebin_dur=timebin_dur_from_vec(time_bins,n_decimals_trunc)# number of freq. bins should equal number of rowsif(spect_dict[freqbins_key].shape[-1]!=spect_dict[spect_key].shape[0]):raiseValueError(f"length of frequency bins in {spect_path.name} ""does not match number of rows in spectrogram")# number of time bins should equal number of columnsif(spect_dict[timebins_key].shape[-1]!=spect_dict[spect_key].shape[1]):raiseValueError(f"length of time_bins in {spect_path.name} "f"does not match number of columns in spectrogram")returnspect_path,freq_bins,timebin_durspect_paths_bag=db.from_sequence(spect_paths)logger.info("validating set of spectrogram files")withProgressBar():path_freqbins_timebin_dur_tups=list(spect_paths_bag.map(_validate))all_freq_bins=np.stack([tup[1]fortupinpath_freqbins_timebin_dur_tups])uniq_freq_bins=np.unique(all_freq_bins,axis=0)iflen(uniq_freq_bins)!=1:raiseValueError(f"Found more than one frequency bin vector across files. "f"Instead found {len(uniq_freq_bins)}")timebin_durs=[tup[2]fortupinpath_freqbins_timebin_dur_tups]uniq_durs=np.unique(timebin_durs)iflen(uniq_durs)!=1:raiseValueError("Found more than one duration for time bins across spectrogram files. "f"Durations found were: {uniq_durs}")returnTrue