Source code for vak.common.files.files

from __future__ import annotations

import fnmatch
import pathlib
import re


[docs] def find_fname(fname: str, ext: str) -> str | None: """given a file extension, finds a filename with that extension within another filename. Useful to find e.g. names of audio files in names of spectrogram files. Parameters ---------- fname : str filename to search for another filename with a specific extension ext : str extension to search for in filename Returns ------- sub_fname : str or None Examples -------- >>> vak.files.find_fname(fname='llb3_0003_2018_04_23_14_18_54.wav.mat', ext='wav') 'llb3_0003_2018_04_23_14_18_54.wav' """ if ext.startswith("."): ext = ext[1:] m = re.match(f"[\S ]*{ext}", fname) # noqa: W605 if hasattr(m, "group"): return m.group() elif m is None: return m
[docs] def from_dir(dir_path: str | pathlib.Path, ext: str) -> list[str]: """Gets all files with a given extension from a directory or its sub-directories. :func:`vak.files.from_dir`` is case-insensitive. For example, if you specify the extension as ``'wav'`` then it will return files that end in ``'.wav'`` or ``'.WAV'``. Similarly, if you specify ``'TextGrid'`` as the extension, it will return files that end in ``.textgrid`` and ``.TextGrid``. If no files with the specified extension are found in the directory, then the function looks in all directories within ``dir_path`` and returns any files with the extension in those directories. The function does not look any deeper than one level below ``dir_path``. Parameters ---------- dir_path : str Path to target directory ext : str File extension to search for. E.g., ``'.wav'``. Returns ------- files : list List of strings, paths to files with specified file extension. Notes ----- This function is used by :func:`vak.io.audio.files_from_dir` and :func:`vak.annotation.files_from_dir`. """ dir_path = pathlib.Path(dir_path) if not dir_path.is_dir(): raise NotADirectoryError( f"dir_path not recognized as a directory: {dir_path}" ) if ext.startswith("."): ext = ext[1:] # use fnmatch + re to make search case-insensitive # adopted from: # https://gist.github.com/techtonik/5694830 # https://jdhao.github.io/2019/06/24/python_glob_case_sensitivity/ glob_pat = f"*.{ext}" rule = re.compile(fnmatch.translate(glob_pat), re.IGNORECASE) files = [ file for file in dir_path.iterdir() if file.is_file() and rule.match(file.name) ] if len(files) == 0: # if we don't any files with extension, look in sub-directories files = [] subdirs = [subdir for subdir in dir_path.iterdir() if subdir.is_dir()] for subdir in subdirs: files.extend( [ file for file in subdir.iterdir() if file.is_file() and rule.match(file.name) ] ) if len(files) == 0: raise FileNotFoundError( f"No files with extension {ext} found in " f"{dir_path} or immediate sub-directories" ) # TODO: use / return Path instead of strings return [str(file) for file in files]