Sunday, April 22, 2012

Edit Distance

[Question]

Given two words word1 and word2, find the minimum number of steps required to convert word1 to word2. (each operation is counted as 1 step.)
You have the following 3 operations permitted on a word:
a) Insert a character
b) Delete a character
c) Replace a character

[Analysis]
The problem is a classic example of Dynamic Programming. It is called Levenshtein Distance. There is an excellent analysis in wiki: http://en.wikipedia.org/wiki/Levenshtein_distance.

Suppose word1 is A[0]...A[m] and word2 is B[0]...B[n]. The Edit Distance E[m,n] can be derived as follow:
    E[i, j] = min (E[i-1,j]+1, E[i,j-1]+1, E[i-1, j-1]+1) if A[i]!=B[j];
    E[i, j] = min (E[i-1,j]+1, E[i,j-1]+1, E[i-1, j-1]) if A[i]==B[j];

When A and B are both empty, the Edit Distance is 0. To simplify the coding, we can add one extra column and row in E. E takes (m+1) x (n+1) space; this can be simplified to linear (m+1).
     
[Solution]
//--- C++ ---
class Solution {
public:
    int minDistance(string word1, string word2) {
        if (word1.size()==0) return word2.size();
        if (word2.size()==0) return word1.size();
       
        vector<vector<int>> E (word2.size()+1, vector<int>(word1.size()+1, 0));

        for (int i=0; i<word1.size()+1; i++)
            E[0][i] = i;
           
        for (int j=0; j<word2.size()+1; j++)
            E[j][0] = j;
           
        for (int i=1; i<word2.size()+1; i++) {
            for (int j=1; j<word1.size()+1; j++) { //note: j needs to start from index 1.
                int tmp = min (E[i-1][j]+1, E[i][j-1]+1);
                mtx[i][j] =  min( tmp, (word2[i-1]==word1[j-1])?E[i-1][j-1] : E[i-1][j-1]+1 );
            }
        }
        return E.back().back();
    }
};

//--- Java ---
public class Solution {
    public int minDistance(String word1, String word2) {
        if (word1.length()==0) return word2.length();
        if (word2.length()==0) return word1.length();
     
        int [] dist = new int[word1.length()+1];
     
        for (int i=0; i<dist.length; i++)
            dist[i] = i;
         
        for (int j=0; j<word2.length(); j++) {
            int d0= dist[0];
            int d1= 0;
            dist[0] = j+1;
         
            for (int i=1; i<dist.length; i++) {
                d1 = dist[i];
                if (word2.charAt(j) == word1.charAt(i-1)) {
                    dist[i] = d0;
                } else {
                    dist[i] = Math.min(Math.min(d0+1, d1+1), dist[i-1]+1);
                }
                d0=d1;              
            }
        }
        return dist[dist.length-1];
    }
}

No comments:

Post a Comment