""" This is a final implementation of the KMP algorithm, including the overlap function computation. """ __author__ = "Marina Barsky, and Catherine Liu" import sys # general computation of an overlap function def overlap_function(pattern): """ General computation of an overlap function in linear time: overlap function of position i is a length of the longest suffix in substring pattern[0:i] which is at the same time a prefix of pattern[0:i] :param pattern: string to be preprocessed :return: list with the values of an overlap function for each position """ ol_list = [0] * len(pattern) pos = 1 # first is always zero while (pos < len(pattern)): ol_prev = ol_list[pos - 1] if pattern[pos] == pattern[ol_prev]: ol_list[pos] = ol_prev + 1 else: found = False j = ol_prev curr_overlap = ol_prev while not found and j >= 1: if pattern[pos] == pattern[j]: found = True ol_list[pos] = curr_overlap + 1 else: # try extend a smaller prefix - based on pattern [ol[pos-1]] curr_overlap = ol_list[j-1] j = ol_list[j-1] if not found: # compare with the first if pattern[pos] == pattern[0]: ol_list[pos] = 1 pos += 1 return ol_list def kmp(T, P, of_list): matches = [] M = len(P) N = len(T) i = 0 #current position at which to compare character in T j = 0 #current position at which to compare character in P while i < N: #while current position in T is within bounds #loop while characters match while j < M and T[i] == P[j]: i = i + 1 j = j + 1 if j == M: matches.append((i - M)) of_prev = of_list[j-1] if of_prev == 0: # not even first character match - move to the next i, restart pattern i = i + 1 j = 0 else: #stay on the same character in T j = of_prev #skip characters in P according to the OF return matches if __name__ == '__main__': #default demo values T = 'tictictictactictictic' P = 'tictic' if len(sys.argv)>2: T = sys.argv[1] P = sys.argv[2] of_list = overlap_function(P) matches = kmp (T, P, of_list) print("Search for pattern '"+P +"': results") if len(matches)>0: print("Found match at position(s):", matches) else: print ("No matches found.")