/**
* Do pattern matching using KMP algorithm
*
* Runtime complexity - O(m + n) where m is length of text and n is length of pattern
* Space complexity - O(n)
*/
class SubstringSearch {
/**
* Slow method of pattern matching
*/
public boolean hasSubstring(char[] text, char[] pattern){
int i=0;
int j=0;
int k = 0;
while(i < text.length && j < pattern.length){
if(text[i] == pattern[j]){
i++;
j++;
}else{
j=0;
k++;
i = k;
}
}
if(j == pattern.length){
return true;
}
return false;
}
/**
* Compute temporary array to maintain size of suffix which is same as prefix
* Time/space complexity is O(size of pattern)
*/
private int[] computeTemporaryArray(char pattern[]){
int [] lps = new int[pattern.length];
int index =0;
for(int i=1; i < pattern.length;){
if(pattern[i] == pattern[index]){
lps[i] = index + 1;
index++;
i++;
}else{
if(index != 0){
index = lps[index-1];
}else{
lps[i] =0;
i++;
}
}
}
return lps;
}
/**
* KMP algorithm of pattern matching.
*/
public boolean KMP(char[] text, char[] pattern){
int lps[] = computeTemporaryArray(pattern);
int i=0;
int j=0;
while(i < text.length && j < pattern.length){
if(text[i] == pattern[j]){
i++;
j++;
}else{
if(j!=0){
j = lps[j-1];
}else{
i++;
}
}
}
if(j == pattern.length){
// return i - j; // if we need index where pattern found
// if we have multiple occurances of pattern then add (i - j) to list at this point
return true;
}
return false;
}
/**
* KMP algorithm of pattern matching.
* Limitations: We use text characters as array index so
* if it is unicode then we need array of size 144,697 to
* cover all characters.
*/
public boolean KMP2(String text, String pattern){
int i, j;
int[][] dfa = preComputeDFA(pattern);
int M = pattern.length();
for(i = 0, j = 0; i < text.length() && j < M;) {
j = dfa[text.charAt(i)][j];
if(j == M) {
// return i - M;
return true;
}
}
// return -1;
return false;
}
private int[][] preComputeDFA(String pat) {
int M = pat.length();
int[][] dfa = new int[128][M]; // assuming ASCII characters only
dfa[pat.charAt(0)][0] = 1;
for (int X = 0, j = 1; j < M; j++) {
for (int c = 0; c < 128; c++)
dfa[c][j] = dfa[c][X];
dfa[pat.charAt(j)][j] = j+1;
X = dfa[pat.charAt(j)][X];
}
return dfa;
}
public static void main(String args[]){
String str = "abcxabcdabcdabcy";
String subString = "abcdabcy";
SubstringSearch ss = new SubstringSearch();
boolean result = ss.KMP(str.toCharArray(), subString.toCharArray());
System.out.print("Does " + str + "have pattern: " + subString + "? " + result);
System.out.println("\n\nUsing DFA (Deterministic Finite State Machine)");
result = ss.KMP(str.toCharArray(), subString.toCharArray());
System.out.print("Does " + str + "have pattern: " + subString + "? " + result);
}
}
Output
Does abcxabcdabcdabcyhave pattern: abcdabcy? true
Using DFA (Deterministic Finite State Machine)
Does abcxabcdabcdabcyhave pattern: abcdabcy? true
Comments
Post a Comment