第一个是最大公共子序列,使用的是动态规划的技术
str1 = 'GTACCGTCA'
str2 = 'CATCGA'
def LCS_table(str1, str2):
"""
这部分主要使用了动态规划的技术,就是如果两个最大公共子序列相等的话,必然前面的也相等
"""
str_length1 = len(str1)
str_length2 = len(str2)
lcs_table = [ [0 for j in range(str_length2 + 1)] for i in range(str_length1 + 1)]
print(lcs_table)
for index1,char1 in enumerate(str1):
i = index1 + 1
for index2,char2 in enumerate(str2):
j = index2 + 1
print(i,j)
if char1 == char2:
lcs_table[i][j] = lcs_table[i-1][j-1] + 1
else:
lcs_table[i][j] = max(lcs_table[i][j-1], lcs_table[i-1][j])
return lcs_table
"""
这个是KMP算法,首先求出next_table,这里记录着要偏移的位数,通过这个减少判断量
"""
pattern = 'ABCDABD'
text = 'BBC ABCDAB ABCDABCDABDE'
def prefix(string):
"""
用于求前缀的函数
"""
prefix_set = set()
length = len(string)
if length == 1:
return prefix_set
for i in range(length):
substring = string[:i]
prefix_set.add(substring)
prefix_set.remove('')
return prefix_set
def suffiex(string):
"""
求后缀的函数
"""
suffiex_set = set()
length = len(string)
for i in range(length):
substring = string[i + 1:length]
suffiex_set.add(substring)
suffiex_set.remove('')
return suffiex_set
def next_table(string):
"""
求前缀和后缀的并集最大长度的长度,与对应的字符放在一块,这个就是部分匹配值
"""
next_state_table = []
for index,char in enumerate(string):
strd = index + 1
substring = string[:strd]
p_set = prefix(substring)
s_set = suffiex(substring)
interset = p_set & s_set
if interset:
interset_number = len(max(interset))
else:
interset_number = 0
next_state_table.append(interset_number)
return next_state_table
def KMP(pattern,text):
"""
"""
offset = 0#偏移位数
number = 0#模式的第几位
next_state = next_table(pattern)
for char in text:
if offset != 0:#用于跳转
offset -= 1
continue
if number == 7:#说明都匹配上了
print('success')
break
if char == pattern[number]:
number += 1
continue#这样可以一直匹配到不匹配
if char != pattern[number] and number != 0:
offset = (number) - next_state[number - 1]
number = 0