"""Functions dealing with ``tensorboard``"""from__future__importannotationsfrompathlibimportPathimportpandasaspdfromtensorboard.backend.event_processing.event_accumulatorimport(EventAccumulator,)fromtorch.utils.tensorboardimportSummaryWriterfrom..common.typingimportPathLike
[docs]defget_summary_writer(log_dir:PathLike,filename_suffix:str)->SummaryWriter:"""Get an instance of ``tensorboard.SummaryWriter``, to use with a vak.Model during training. Parameters ---------- log_dir : str, pathlib.Path directory where event file will be written filename_suffix : str suffix added to events file name Returns ------- summary_writer : torch.utils.tensorboard.SummaryWriter Examples -------- >>> summary_writer = vak.summary_writer.get_summary_writer(log_dir='./experiments') >>> tweety_net_model.summary_writer = summary_writer # set attribute equal to instance we just made >>> tweety_net_model.train() # now events during training will be logged with that summary writer """returnSummaryWriter(log_dir=log_dir,filename_suffix=filename_suffix)
DEFAULT_SIZE_GUIDANCE={"compressedHistograms":1,"images":1,"scalars":0,# 0 means load all"histograms":1,}
[docs]defevents2df(events_path:PathLike,size_guidance:dict|None=None,drop_wall_time:bool=True,)->pd.DataFrame:"""Convert :mod:`tensorboard` events file to pandas.DataFrame Events files are created by SummaryWriter from PyTorch or Tensorflow. Parameters ---------- events_path : str, pathlib.Path Path to either a log directory or a specific events file saved by a SummaryWriter in a log directory. By default, ``vak`` saves logs in a directory with the model name inside a ``results`` directory generated at the start of training. size_guidance: dict Argument passed to the ``EventAccumlator`` class from ``tensorboard`` that is used to load the events file. Information on how much data the EventAccumulator should store in memory. Dict that maps a `tagType` string to an integer representing the number of items to keep per tag for items of that `tagType`. If the size is 0, all events are stored. Default is None, in which case ``vak.tensorboard.DEFAULT_SIZE_GUIDANCE`` is used. For more information see https://github.com/tensorflow/tensorboard/blob/master/tensorboard/backend/event_processing/event_accumulator.py drop_wall_time : bool If True, drop wall times logged in events file. Default is True. Returns ------- df : pandas.Dataframe With index 'step' and all Scalars from the events file Examples -------- >>> events_path = 'tweetynet/results_210322_103904/train_dur_6s/replicate_2/TweetyNet/' >>> events_df = vak.tensorboard.events2df(events_path) >>> events_df loss/train avg_acc/val avg_levenshtein/val avg_character_error_rate/val avg_loss/val step 0 2.479142 NaN NaN NaN NaN 1 2.458833 NaN NaN NaN NaN 2 2.441571 NaN NaN NaN NaN 3 2.402737 NaN NaN NaN NaN 4 2.404369 NaN NaN NaN NaN ... ... ... ... ... ... 996 0.171681 NaN NaN NaN NaN 997 0.100202 NaN NaN NaN NaN 998 0.073055 NaN NaN NaN NaN 999 0.031479 NaN NaN NaN NaN 1000 NaN 0.902475 42.0 0.880533 0.310385 [1001 rows x 5 columns] """ifisinstance(events_path,Path):events_path=str(events_path)ifsize_guidanceisNone:size_guidance=DEFAULT_SIZE_GUIDANCEea=EventAccumulator(path=events_path,size_guidance=size_guidance)ea.Reload()# load all data written so farscalar_tags=ea.Tags()["scalars"]# list of tags for values written to scalar# make a dataframe for each tag, which we will then concatenate using 'step' as the index# so that pandas will fill in with NaNs for any scalars that were not measured on every stepdfs={}forscalar_taginscalar_tags:dfs[scalar_tag]=pd.DataFrame([(scalar.wall_time,scalar.step,scalar.value)forscalarinea.Scalars(scalar_tag)],columns=["wall_time","step",scalar_tag],).set_index("step")ifdrop_wall_time:dfs[scalar_tag].drop("wall_time",axis=1,inplace=True)df=pd.concat([vfork,vindfs.items()ifk!="epoch"],axis=1)returndf