module Cassiopee

Public Instance Methods

computeAmbiguousDistance(pattern,hamming,edit,ambiguous) click to toggle source

Calculate the edit or hamming distance between String and pattern Extend a String Return -1 if max is reached

# File lib/cassiopee.rb, line 24
def computeAmbiguousDistance(pattern,hamming,edit,ambiguous)
  if(edit==0)
    return computeHammingAmbiguous(pattern,hamming,ambiguous)
  else
   return computeLevenshteinAmbiguous(pattern,edit,ambiguous)
  end
end
computeDistance(pattern,hamming,edit) click to toggle source

Calculate the edit or hamming distance between String and pattern Extend a String Return -1 if max is reached

# File lib/cassiopee.rb, line 12
def computeDistance(pattern,hamming,edit)
  if(edit==0)
    return computeHamming(pattern,hamming)
  else
   return computeLevenshtein(pattern,edit)
  end
end
computeHamming(pattern,hamming) click to toggle source

Calculate number of substitution between string and pattern Extend a String Return -1 if max is reached

# File lib/cassiopee.rb, line 51
def computeHamming(pattern,hamming)
    nberr = 0
    (0..(self.length-1)).each do |c|
            if(pattern[c] != self[c])
                    nberr = nberr+1
                    if(nberr>hamming.to_i)
                            return -1               
                    end
            end
    end
    return nberr
end
computeHammingAmbiguous(pattern,hamming,ambiguous) click to toggle source

Compute Hamming distance but using a mapping matrix of alphabet ambiguity

# File lib/cassiopee.rb, line 34
def computeHammingAmbiguous(pattern,hamming,ambiguous)
    nberr = 0
    (0..(self.length-1)).each do |c|
            if(!isAmbiguousEqual(pattern[c],self[c],ambiguous))
                    nberr = nberr+1
                    if(nberr>hamming.to_i)
                            return -1               
                    end
            end
    end
    return nberr
end
computeLevenshtein(pattern,edit) click to toggle source

Calculate the edit distance between string and pattern Extend a String Return -1 if max is reached

# File lib/cassiopee.rb, line 70
def computeLevenshtein(pattern,edit)
    
    distance = Text::Levenshtein.distance(self, pattern)
    
    if(distance>edit)
            return -1
    end
    return distance
    
end
computeLevenshteinAmbiguous(pattern, edit, ambiguous) click to toggle source

Compute Levenshtein distance but using a mapping matrix of alphabet ambiguity Code comes from Text gem, Text::Levenshtein.distance, adapted for ambiguity comparison

# File lib/cassiopee.rb, line 85
  def computeLevenshteinAmbiguous(pattern, edit, ambiguous)

      prepare =
        if "ruby".respond_to?(:encoding)
          lambda { |str| str.encode(Encoding::UTF_8).unpack("U*") }
        else
          rule = $KCODE.match(/^U/) ? "U*" : "C*"
          lambda { |str| str.unpack(rule) }
        end

      s, t = [self, pattern].map(&prepare)

      
              n = s.length
              m = t.length
              return m if (0 == n)
              return n if (0 == m)

              d = (0..m).to_a
              x = nil

              (0...n).each do |i|
                      e = i+1
                      (0...m).each do |j|
                              cost = (isAmbiguousEqual(s[i],t[j],ambiguous)) ? 0 : 1
                              x = [
                                      d[j+1] + 1, # insertion
                                      e + 1,      # deletion
                                      d[j] + cost # substitution
                              ].min
                              d[j] = e
                              e = x
                      end
                      d[m] = x
              end
              if(x>edit)
                      return -1
              end
              return x
end
isAmbiguousEqual(a,b,ambiguous) click to toggle source

checks if 2 chars are equal with ambiguity rules

  • ambigous is a Hash of char/Array of char mapping

# File lib/cassiopee.rb, line 130
def isAmbiguousEqual(a,b,ambiguous)
      if(ambiguous==nil || (ambiguous[a.chr]==nil && ambiguous[b.chr]==nil ))
        if(a==b)
          return true
        else
          return false
        end
      end
      if(a==b || (ambiguous[a.chr]!=nil && ambiguous[a.chr].index(b.chr)!=nil) || (ambiguous[b.chr]!=nil && ambiguous[b.chr].index(a.chr)!=nil))
         return true
  else
         return false
      end
end