[Question]
Implement regular expression matching with support for '.'
and '*'
.
'.' Matches any single character.
'*' Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).
The function prototype should be:
bool isMatch(const char *s, const char *p)
Some examples:
isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false
isMatch("aa", "a*") → true
isMatch("aa", ".*") → true
isMatch("ab", ".*") → true
isMatch("aab", "c*a*b") → true
[Analysis]
This problem can be solved in a similar way with Edit Distance. According to its rules, there are four kinds of patterns in this regular expression matching:
"." - Matches any single character.
"c*" - Matches zero or more of character 'c';
".*" - Matches zero or more of any characters;
"c" - Matches a single character 'c';
First, make a match matrix between the word and patterns: e.g. pattern is ".*bc*." and the word is "abbcc"
.* b c* .
a 0 1 1 0
b 0 0 1 0
b 0 0 1 0
c 0 1 0 0
c 0 1 0 0
"0" in (i,j) represents a possible match between the pattern p[j] and character w[i].
"1" in (i,j) represents no match at all between the pattern p[j] and character w[i].
If there is a match between the pattern and word, there is a path from [1,1] to [m, n] (m is the length of the word and n is the number of patterns), which composed of a series of "0" and the following actions:
1) [i,j] -> [i+1,j+1] -- end the pattern match at [i,j] and move to next pattern match to [i+1, j+1];
2) [i,j] -> [i,j+1] -- skip the current pattern match at [i,j] and use next pattern (j+1) on character (i);
3) [i,j]-> [i+1, j] -- continue use current pattern at letter (i) and try to apply to (i+1);
2) 3) are operations allowed only for pattern columns "c*" and ".*".
1) is the only operation allowed for pattern columns "c" and ".".
So back to the above example, we found a path (a match) from [1,1] to [5,4] (marked by red color).
.* b c* .
a 0 1 1 0
b 0 0 1 0
b 0
0 1 0
c 0 1
0 0
c 0 1 0
0
[Solution]
//-------------
//--- C++ ---
//-------------
class Solution {
public:
bool isMatch(const char *s, const char *p) {
string s1 (s);
string p1 (p);
if ( p1.empty()) return s1 == "";
vector<vector<bool>> map (s1.size()+1, vector<bool>(p1.size()+1, false));
map[0][0] = true;
for (int i=1; i<p1.size()+1; i++) {
if (p1[i-1] == '*') map[0][i] = map[0][i-2];
}
for (int i=1; i<s1.size()+1; i++) {
for (int j=1; j<p1.size()+1; j++) {
if (p1[j-1] == '.') {
map[i][j] = map[i-1][j-1];
}
else if (p1[j-1] == '*') {
map[i][j] = map[i][j-2] //'*' is counted as zero
|| map[i][j-1] //'*' is counted as 1
|| p1[j-2] == '.' && map[i-1][j] //'.*'
|| s1[i-1] == p1[j-2] && map[i-1][j];
}
else
map[i][j] = (s1[i-1] == p1[j-1])? map[i-1][j-1] : false;
}
}
return map.back().back();
}
};
//---------------
//--- JAVA ---
//---------------
import java.util.*;
public class Solution {
public static boolean isMatch(String s, String p) {
List<String> pat = new ArrayList<String> ();
char[] ap = p.toCharArray();
if (s.length() == 0) return (p.length()==0 || p.equals(".*"));
if (ap.length<=1) {
if (ap.length==0 && s.length() ==1) return true;
if (ap.length==0 && ap[0]=='.' && s.length() == 1) return true;
return false;
}
// -- parse and generate patterns sub-strings.
int i = 0;
while(i<ap.length-1) {
char a = ap[i];
char b = ap[i+1];
if (a == '.') {
if (b=='*') {
pat.add(new String("") + a+b);
i++;
} else {
pat.add(new String("") +a);
}
}
else if (a != '*') {
if (b=='*') {
pat.add(new String("") + a +b);
i++;
} else {
pat.add(new String("") +a);
}
} else { // a=='*'
return false;
}
i++;
}
if (i<ap.length) {
pat.add(new String("") + ap[i]);
}
// -- generate the matrix.
int[][] matrix;
int rows = s.length();
int cols = pat.size();
matrix = new int[rows][cols];
for (i=0; i<rows; i++) {
for (int j=0; j<cols; j++) {
char cs = s.charAt(i);
matrix[i][j] = 1;
if (pat.get(j).length() == 2){
if ( pat.get(j).equals(".*") ){
matrix[i][j] = 0;
}
else { //"c*"
if (pat.get(j).charAt(0) == cs) {
matrix[i][j] = 0;
}
}
}
else {
if (pat.get(j).equals(".") || pat.get(j).charAt(0) == cs) {
matrix[i][j] = 0;
}
}
System.out.println(matrix[i][j]);
}
System.out.println("");
}
// -- to find a path/match.
if (matrix[rows-1][cols-1] != 0) return false;
return validateMatch(matrix, 0, 0, pat);
}
public static boolean validateMatch(int[][] m, int i, int j, List<String> pat) {
int rows = m.length;
int cols = m[0].length;
if (i== rows-1 && j==cols-1) return true;
boolean ret = false;
if (pat.get(j).length()==2) {
if (j+1<cols && m[i][j+1] == 0) {
ret = ret || validateMatch(m, i, j+1, pat);
}
if (i+1<rows && m[i+1][j] ==0) {
ret = ret || validateMatch(m, i+1, j, pat);
}
}
if (j+1<cols && i+1<rows && m[i+1][j+1] ==0) {
ret = ret || validateMatch(m, i+1, j+1, pat);
}
return ret;
}
public static void main(String args[])
{
String s = "aasdfasdfasdfasdfas";
String p = "aasdf.*asdf.*asdf.*asdf.*s";
boolean a = Solution.isMatch(s, p);
System.out.println("a:" + a);
}
}