Source code for vak.datasets.parametric_umap.metadata

"""A dataclass that represents metadata
associated with a dimensionality reduction dataset,
as generated by
:func:`vak.core.prep.frame_classification.prep_dimensionality_reduction_dataset`"""

from __future__ import annotations

import json
import pathlib
from typing import ClassVar

import attr


[docs] def is_valid_dataset_csv_filename(instance, attribute, value): valid = "_prep_" in value and value.endswith(".csv") if not valid: raise ValueError( f"Invalid dataset csv filename: {value}." f'Filename should contain the string "_prep_" ' f"and end with the extension .csv." f"Valid filenames are generated by " f"vak.core.prep.generate_dataset_csv_filename" )
[docs] def is_valid_audio_format(instance, attribute, value): import vak.common.constants if value not in vak.common.constants.VALID_AUDIO_FORMATS: raise ValueError( f"Not a valid audio format: {value}. Valid audio formats are: {vak.common.constants.VALID_AUDIO_FORMATS}" )
[docs] def is_valid_spect_format(instance, attribute, value): import vak.common.constants if value not in vak.common.constants.VALID_SPECT_FORMATS: raise ValueError( f"Not a valid spectrogram format: {value}. " f"Valid spectrogram formats are: {vak.common.constants.VALID_SPECT_FORMATS}" )
[docs] @attr.define class Metadata: """A dataclass that represents metadata associated with a dataset that was generated by :func:`vak.core.prep.prep`. Attributes ---------- dataset_csv_filename : str Name of csv file representing the source files in the dataset. Csv file will be located in root of directory representing dataset, so only the filename is given. audio_format """ # declare this as a constant to avoid # needing to remember this in multiple places, and to use in unit tests METADATA_JSON_FILENAME: ClassVar = "metadata.json" dataset_csv_filename: str = attr.field( converter=str, validator=is_valid_dataset_csv_filename ) shape: tuple = attr.field(converter=tuple) @shape.validator def is_valid_shape(self, attribute, value): if not isinstance(value, tuple): raise TypeError( f"`shape` should be a tuple but type was: {type(value)}" ) if not all([isinstance(val, int) and val > 0 for val in value]): raise ValueError( f"All values of `shape` should be positive integers but values were: {value}" ) audio_format: str = attr.field( converter=attr.converters.optional(str), validator=attr.validators.optional(is_valid_audio_format), default=None, )
[docs] @classmethod def from_path(cls, json_path: str | pathlib.Path): """Load dataset metadata from a json file. Class method that returns an instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`. Parameters ---------- json_path : string, pathlib.Path Path to a 'metadata.json' file created by :func:`vak.core.prep.prep` when generating a dataset. Returns ------- metadata : vak.datasets.frame_classification.FrameClassificationDatatsetMetadata Instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata` with metadata loaded from json file. """ json_path = pathlib.Path(json_path) with json_path.open("r") as fp: metadata_json = json.load(fp) return cls(**metadata_json)
@classmethod def from_dataset_path(cls, dataset_path: str | pathlib.Path): dataset_path = pathlib.Path(dataset_path) if not dataset_path.exists() or not dataset_path.is_dir(): raise NotADirectoryError( f"`dataset_path` not found or not recognized as a directory: {dataset_path}" ) metadata_json_path = dataset_path / cls.METADATA_JSON_FILENAME if not metadata_json_path.exists(): raise FileNotFoundError( f"Metadata file not found: {metadata_json_path}" ) return cls.from_path(metadata_json_path)
[docs] def to_json(self, dataset_path: str | pathlib.Path) -> None: """Dump dataset metadata to a json file. This method is called by :func:`vak.core.prep.prep` after it generates a dataset and then creates an instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata` with metadata about that dataset. Parameters ---------- dataset_path : string, pathlib.Path Path to root of a directory representing a dataset generated by :func:`vak.core.prep.prep`. where 'metadata.json' file should be saved. """ dataset_path = pathlib.Path(dataset_path) if not dataset_path.exists() or not dataset_path.is_dir(): raise NotADirectoryError( f"dataset_path not recognized as a directory: {dataset_path}" ) json_dict = attr.asdict(self) json_path = dataset_path / self.METADATA_JSON_FILENAME with json_path.open("w") as fp: json.dump(json_dict, fp, indent=4)