Thursday, April 26, 2012

Regular Expression Matching


[Question]

Implement regular expression matching with support for '.' and '*'.
'.' Matches any single character.
'*' Matches zero or more of the preceding element.

The matching should cover the entire input string (not partial).

The function prototype should be:
bool isMatch(const char *s, const char *p)

Some examples:
isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false
isMatch("aa", "a*") → true
isMatch("aa", ".*") → true
isMatch("ab", ".*") → true
isMatch("aab", "c*a*b") → true

[Analysis]
This problem can be solved in a similar way with Edit Distance. According to its rules, there are four kinds of patterns in this regular expression matching:

"." - Matches any single character.
"c*" - Matches zero or more of character 'c';
".*" - Matches zero or more of any characters;
"c" - Matches a single character 'c';

First, make a match matrix between the word and patterns: e.g. pattern is ".*bc*." and the word is "abbcc"

     .*   b   c*   .
a    0   1   1    0
b    0   0   1    0
b    0   0   1    0
c    0   1   0    0
c    0   1   0    0

"0" in (i,j) represents a possible match between the pattern p[j] and character w[i].
"1" in (i,j) represents no match at all between the pattern p[j] and character w[i].

If there is a match between the pattern and word, there is a path from [1,1] to [m, n] (m is the length of the word and n is the number of patterns), which composed of a series of "0" and the following actions:
1) [i,j] -> [i+1,j+1]  -- end the pattern match at [i,j] and move to next pattern match to [i+1, j+1];
2) [i,j] -> [i,j+1]  -- skip the current pattern match at [i,j] and use next pattern (j+1) on character (i);
3) [i,j]->  [i+1, j] -- continue use current pattern at letter (i) and try to apply to (i+1);

2) 3) are operations allowed only for pattern columns "c*" and ".*".
1) is the only operation allowed for pattern columns "c" and ".".

So back to the above example, we found a path (a match) from [1,1] to [5,4] (marked by red color).


     .*   b   c*   .
   0   1   1    0
   0   0   1    0
   0   0   1    0
c    0   1   0    0
c    0   1   0    0 



[Solution]
//-------------
//--- C++ ---
//-------------
class Solution {
public:
    bool isMatch(const char *s, const char *p) {
        string s1 (s);
        string p1 (p);
     
        if ( p1.empty()) return s1 == "";
     
        vector<vector<bool>> map (s1.size()+1, vector<bool>(p1.size()+1, false));
     
        map[0][0] = true;
        for (int i=1; i<p1.size()+1; i++) {
            if (p1[i-1] == '*') map[0][i] = map[0][i-2];
        }
     
        for (int i=1; i<s1.size()+1; i++) {
            for (int j=1; j<p1.size()+1; j++) {
                if (p1[j-1] == '.') {
                    map[i][j] = map[i-1][j-1];
                }
                else if (p1[j-1] == '*') {
                    map[i][j] = map[i][j-2]     //'*' is counted as zero
                            || map[i][j-1]      //'*' is counted as 1
                            || p1[j-2] == '.' && map[i-1][j]  //'.*'
                            || s1[i-1] == p1[j-2] && map[i-1][j];
                }
                else
                    map[i][j] = (s1[i-1] == p1[j-1])? map[i-1][j-1] : false;
            }
        }
        return map.back().back();
    }
};

//---------------
//--- JAVA ---
//---------------
import java.util.*;

public class Solution {
    public static boolean isMatch(String s, String p) {

        List<String> pat = new ArrayList<String> ();
        char[] ap = p.toCharArray();
     
        if (s.length() == 0) return (p.length()==0 || p.equals(".*"));
     
        if (ap.length<=1) {
            if (ap.length==0 && s.length() ==1) return true;
            if (ap.length==0 && ap[0]=='.' && s.length() == 1) return true;
            return false;
        }
     
        // -- parse and generate patterns sub-strings.
        int i = 0;
        while(i<ap.length-1) {
            char a = ap[i];
            char b = ap[i+1];
            if (a == '.') {
                if (b=='*') {
                    pat.add(new String("") + a+b);
                    i++;
                } else {
                    pat.add(new String("") +a);
                }
            }
            else if (a != '*') {
                if (b=='*') {
                    pat.add(new String("") + a +b);
                    i++;
                } else {
                    pat.add(new String("") +a);
                }
            } else { // a=='*'
                return false;
            }
            i++;
        }
        if (i<ap.length) {
        pat.add(new String("") + ap[i]);
        }
     
        // -- generate the matrix.
        int[][] matrix;
        int rows = s.length();
        int cols = pat.size();      
        matrix = new int[rows][cols];

        for (i=0; i<rows; i++) {
            for (int j=0; j<cols; j++) {
                char cs = s.charAt(i);
                matrix[i][j] = 1;
                if (pat.get(j).length() == 2){
                    if ( pat.get(j).equals(".*") ){
                        matrix[i][j] = 0;
                    }
                    else { //"c*"
                        if (pat.get(j).charAt(0) == cs) {
                            matrix[i][j] = 0;
                        }
                    }
                }
                else {
                    if (pat.get(j).equals(".") || pat.get(j).charAt(0) == cs) {
                        matrix[i][j] = 0;
                    }
                }
                System.out.println(matrix[i][j]);
            }
            System.out.println("");
        }
     
       // -- to find a path/match.
        if (matrix[rows-1][cols-1] != 0) return false;
     
        return validateMatch(matrix, 0, 0, pat);
    }
 
    public static boolean validateMatch(int[][] m, int i, int j, List<String> pat) {
        int rows = m.length;
        int cols = m[0].length;
     
        if (i== rows-1 && j==cols-1) return true;

        boolean ret = false;
        if (pat.get(j).length()==2) {
            if (j+1<cols && m[i][j+1] == 0) {
                ret = ret || validateMatch(m, i, j+1, pat);
            }
            if (i+1<rows && m[i+1][j] ==0) {
                ret = ret || validateMatch(m, i+1, j, pat);
            }
        }
     
        if (j+1<cols && i+1<rows && m[i+1][j+1] ==0) {
                ret = ret || validateMatch(m, i+1, j+1, pat);
        }
     
        return ret;  
    }
 
public static void main(String args[])
{
String s = "aasdfasdfasdfasdfas";
String p =  "aasdf.*asdf.*asdf.*asdf.*s";

boolean a = Solution.isMatch(s, p);
System.out.println("a:" + a);
}
}


No comments:

Post a Comment