[docs]deflevenshtein(source,target):"""Levenshtein distance: number of deletions, insertions, or substitutions required to convert source string into target string. Parameters ---------- source, target : str Returns ------- distance : int number of deletions, insertions, or substitutions required to convert source into target. adapted from https://github.com/toastdriven/pylev/blob/master/pylev.py to fix issues with the Numpy implementation in https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python """ifsource==target:return0# We call tuple() to force strings to be used as sequences# ('c', 'a', 't', 's') - numpy uses them as values by default.source=np.array(tuple(source))target=np.array(tuple(target))len_source=source.sizelen_target=target.sizeiflen_source==0:returnlen_targetiflen_target==0:returnlen_sourceiflen_source>len_target:source,target=target,sourcelen_source,len_target=len_target,len_source# We use a dynamic programming algorithm, but with the# added optimization that we only need the last two rows# of the matrix.d0=np.arange(len_target+1)d1=np.arange(len_target+1)foriinrange(len_source):d1[0]=i+1forjinrange(len_target):cost=d0[j]ifsource[i]!=target[j]:cost+=1# substitutionx_cost=d1[j]+1# insertionifx_cost<cost:cost=x_costy_cost=d0[j+1]+1ify_cost<cost:cost=y_costd1[j+1]=costd0,d1=d1,d0returntorch.tensor(d0[-1],dtype=torch.int32)
[docs]defcharacter_error_rate(y_pred,y_true):"""Levenshtein edit distance normalized by length of true sequence. Also known as word error distance; here applied to other vocalizations in addition to speech. Parameters ---------- y_pred : str predicted labels for a series of songbird syllables y_true : str ground truth labels for a series of songbird syllables Returns ------- Levenshtein distance / len(y_true) """ifnotisinstance(y_true,str)ornotisinstance(y_pred,str):raiseTypeError("Both `y_true` and `y_pred` must be of type `str")# handle divide by zero edge casesiflen(y_true)==0andlen(y_pred)==0:return0.0eliflen(y_true)==0andlen(y_pred)!=0:raiseValueError("segment error rate is undefined when length of y_true is zero")rate=levenshtein(y_pred,y_true)/len(y_true)returntorch.tensor(rate,dtype=torch.float32)