Source code for vak.config.validators
"""validators used by attrs-based classes and by vak.parse.parse_config"""
from pathlib import Path
import toml
from .. import models
from ..common import constants
[docs]
def is_a_directory(instance, attribute, value):
"""check if given path is a directory"""
if not Path(value).is_dir():
raise NotADirectoryError(
f"Value specified for {attribute.name} of {type(instance)} not recognized as a directory:\n"
f"{value}"
)
[docs]
def is_a_file(instance, attribute, value):
"""check if given path is a file"""
if not Path(value).is_file():
raise FileNotFoundError(
f"Value specified for {attribute.name} of {type(instance)} not recognized as a file:\n"
f"{value}"
)
[docs]
def is_valid_model_name(instance, attribute, value: str) -> None:
"""Validate model name."""
if value not in models.registry.MODEL_NAMES:
raise ValueError(
f"Invalid model name: {value}.\nValid model names are: {models.registry.MODEL_NAMES}"
)
[docs]
def is_audio_format(instance, attribute, value):
"""check if valid audio format"""
if value not in constants.VALID_AUDIO_FORMATS:
raise ValueError(f"{value} is not a valid format for audio files")
[docs]
def is_annot_format(instance, attribute, value):
"""check if valid annotation format"""
if value not in constants.VALID_ANNOT_FORMATS:
raise ValueError(
f"{value} is not a valid format for annotation files.\n"
f"Valid formats are: {constants.VALID_ANNOT_FORMATS}"
)
[docs]
def is_spect_format(instance, attribute, value):
"""check if valid format for spectrograms"""
if value not in constants.VALID_SPECT_FORMATS:
raise ValueError(
f"{value} is not a valid format for spectrogram files.\n"
f"Valid formats are: {constants.VALID_SPECT_FORMATS}"
)
CONFIG_DIR = Path(__file__).parent
VALID_TOML_PATH = CONFIG_DIR.joinpath("valid.toml")
with VALID_TOML_PATH.open("r") as fp:
VALID_DICT = toml.load(fp)
VALID_SECTIONS = list(VALID_DICT.keys())
VALID_OPTIONS = {
section: list(options.keys()) for section, options in VALID_DICT.items()
}
[docs]
def are_sections_valid(config_dict, toml_path=None):
sections = list(config_dict.keys())
from ..cli.cli import CLI_COMMANDS # avoid circular import
cli_commands_besides_prep = [
command for command in CLI_COMMANDS if command != "prep"
]
sections_that_are_commands_besides_prep = [
section
for section in sections
if section.lower() in cli_commands_besides_prep
]
if len(sections_that_are_commands_besides_prep) == 0:
raise ValueError(
"did not find a section related to a vak command in config besides `prep`.\n"
f"Sections in config were: {sections}"
)
if len(sections_that_are_commands_besides_prep) > 1:
raise ValueError(
"found multiple sections related to a vak command in config besides `prep`.\n"
f"Those sections are: {sections_that_are_commands_besides_prep}. "
f"Please use just one command besides `prep` per .toml configuration file"
)
MODEL_NAMES = list(models.registry.MODEL_NAMES)
# add model names to valid sections so users can define model config in sections
valid_sections = VALID_SECTIONS + MODEL_NAMES
for section in sections:
if (
section not in valid_sections
and f"{section}Model" not in valid_sections
):
if toml_path:
err_msg = (
f"section defined in {toml_path} is not valid: {section}"
)
else:
err_msg = (
f"section defined in toml config is not valid: {section}"
)
raise ValueError(err_msg)
[docs]
def are_options_valid(config_dict, section, toml_path=None):
user_options = set(config_dict[section].keys())
valid_options = set(VALID_OPTIONS[section])
if not user_options.issubset(valid_options):
invalid_options = user_options - valid_options
if toml_path:
err_msg = (
f"the following options from {section} section in "
f"the config file '{toml_path.name}' are not valid:\n{invalid_options}"
)
else:
err_msg = (
f"the following options from {section} section in "
f"the toml config are not valid:\n{invalid_options}"
)
raise ValueError(err_msg)