Calculate the edit or hamming distance between String and pattern Extend a String Return -1 if max is reached
# File lib/cassiopee.rb, line 24 def computeAmbiguousDistance(pattern,hamming,edit,ambiguous) if(edit==0) return computeHammingAmbiguous(pattern,hamming,ambiguous) else return computeLevenshteinAmbiguous(pattern,edit,ambiguous) end end
Calculate the edit or hamming distance between String and pattern Extend a String Return -1 if max is reached
# File lib/cassiopee.rb, line 12 def computeDistance(pattern,hamming,edit) if(edit==0) return computeHamming(pattern,hamming) else return computeLevenshtein(pattern,edit) end end
Calculate number of substitution between string and pattern Extend a String Return -1 if max is reached
# File lib/cassiopee.rb, line 51 def computeHamming(pattern,hamming) nberr = 0 (0..(self.length-1)).each do |c| if(pattern[c] != self[c]) nberr = nberr+1 if(nberr>hamming.to_i) return -1 end end end return nberr end
Compute Hamming distance but using a mapping matrix of alphabet ambiguity
# File lib/cassiopee.rb, line 34 def computeHammingAmbiguous(pattern,hamming,ambiguous) nberr = 0 (0..(self.length-1)).each do |c| if(!isAmbiguousEqual(pattern[c],self[c],ambiguous)) nberr = nberr+1 if(nberr>hamming.to_i) return -1 end end end return nberr end
Calculate the edit distance between string and pattern Extend a String Return -1 if max is reached
# File lib/cassiopee.rb, line 70 def computeLevenshtein(pattern,edit) distance = Text::Levenshtein.distance(self, pattern) if(distance>edit) return -1 end return distance end
Compute Levenshtein distance but using a mapping matrix of alphabet ambiguity Code comes from Text gem, Text::Levenshtein.distance, adapted for ambiguity comparison
# File lib/cassiopee.rb, line 85 def computeLevenshteinAmbiguous(pattern, edit, ambiguous) prepare = if "ruby".respond_to?(:encoding) lambda { |str| str.encode(Encoding::UTF_8).unpack("U*") } else rule = $KCODE.match(/^U/) ? "U*" : "C*" lambda { |str| str.unpack(rule) } end s, t = [self, pattern].map(&prepare) n = s.length m = t.length return m if (0 == n) return n if (0 == m) d = (0..m).to_a x = nil (0...n).each do |i| e = i+1 (0...m).each do |j| cost = (isAmbiguousEqual(s[i],t[j],ambiguous)) ? 0 : 1 x = [ d[j+1] + 1, # insertion e + 1, # deletion d[j] + cost # substitution ].min d[j] = e e = x end d[m] = x end if(x>edit) return -1 end return x end
checks if 2 chars are equal with ambiguity rules
ambigous is a Hash of char/Array of char mapping
# File lib/cassiopee.rb, line 130 def isAmbiguousEqual(a,b,ambiguous) if(ambiguous==nil || (ambiguous[a.chr]==nil && ambiguous[b.chr]==nil )) if(a==b) return true else return false end end if(a==b || (ambiguous[a.chr]!=nil && ambiguous[a.chr].index(b.chr)!=nil) || (ambiguous[b.chr]!=nil && ambiguous[b.chr].index(a.chr)!=nil)) return true else return false end end