Knuth–Morris–Pratt(KMP) Pattern Matching

 
/**
 * Do pattern matching using KMP algorithm
 * 
 * Runtime complexity - O(m + n) where m is length of text and n is length of pattern
 * Space complexity - O(n)
 */
class SubstringSearch {

    /**
     * Slow method of pattern matching
     */
    public boolean hasSubstring(char[] text, char[] pattern){
        int i=0;
        int j=0;
        int k = 0;
        while(i < text.length && j < pattern.length){
            if(text[i] == pattern[j]){
                i++;
                j++;
            }else{
                j=0;
                k++;
                i = k;
            }
        }
        if(j == pattern.length){
            return true;
        }
        return false;
    }
    
    /**
     * Compute temporary array to maintain size of suffix which is same as prefix
     * Time/space complexity is O(size of pattern)
     */
    private int[] computeTemporaryArray(char pattern[]){
        int [] lps = new int[pattern.length];
        int index =0;
        for(int i=1; i < pattern.length;){
            if(pattern[i] == pattern[index]){
                lps[i] = index + 1;
                index++;
                i++;
            }else{
                if(index != 0){
                    index = lps[index-1];
                }else{
                    lps[i] =0;
                    i++;
                }
            }
        }
        return lps;
    }
    
    /**
     * KMP algorithm of pattern matching.
     */
    public boolean KMP(char[] text, char[] pattern){
        int lps[] = computeTemporaryArray(pattern);
        int i=0;
        int j=0;
        while(i < text.length && j < pattern.length){
            if(text[i] == pattern[j]){
                i++;
                j++;
            }else{
                if(j!=0){
                    j = lps[j-1];
                }else{
                    i++;
                }
            }
        }
        if(j == pattern.length){
            // return i - j; // if we need index where pattern found 
            // if we have multiple occurances of pattern then add (i - j) to list at this point
            return true;
        }
        return false;
    }
    
    /**
     * KMP algorithm of pattern matching.
     * Limitations: We use text characters as array index so 
     * if it is unicode then we need array of size 144,697 to
     * cover all characters.
     */
    public boolean KMP2(String text, String pattern){
        int i, j;
        int[][] dfa = preComputeDFA(pattern);
        int M = pattern.length();
        for(i = 0, j = 0; i < text.length() && j < M;) {
            j = dfa[text.charAt(i)][j];
            if(j == M) {
                // return i - M;
                return true;
            } 
        }
        
        // return -1;
        return false;
    }
    
    private int[][] preComputeDFA(String pat) {
        int M = pat.length();
        int[][] dfa = new int[128][M]; // assuming ASCII characters only
        dfa[pat.charAt(0)][0] = 1;
        for (int X = 0, j = 1; j < M; j++) {
            for (int c = 0; c < 128; c++)
                dfa[c][j] = dfa[c][X];
            dfa[pat.charAt(j)][j] = j+1;
            X = dfa[pat.charAt(j)][X];
        }
        return dfa;
    }
        
    public static void main(String args[]){
        String str = "abcxabcdabcdabcy";
        String subString = "abcdabcy";
        SubstringSearch ss = new SubstringSearch();
        boolean result = ss.KMP(str.toCharArray(), subString.toCharArray());
        System.out.print("Does " + str + "have pattern: " + subString + "? " + result);
        
        System.out.println("\n\nUsing DFA (Deterministic Finite State Machine)");
        result = ss.KMP(str.toCharArray(), subString.toCharArray());
        System.out.print("Does " + str + "have pattern: " + subString + "? " + result);
        
    }
}

Output
Does abcxabcdabcdabcyhave pattern: abcdabcy? true

Using DFA (Deterministic Finite State Machine)
Does abcxabcdabcdabcyhave pattern: abcdabcy? true

Comments

Popular posts from this blog

SQL basic interview question

gsutil Vs Storage Transfer Service Vs Transfer Appliance