I have been looking at Paul Heckel's Diff Algorithm and I don't seem to understand it fully.
I copied steps 1-5 as shown in Python code but I can't get it to show the differences using the final step of the algorithm. I would be grateful if someone explained the final step along with Python code.
Also, I don't fully understand why you need a reference to the table lines in step 4 and 5, so an explanation of that would be amazing too!
Many thanks
Here's my current code:
def find_diff(current_file_as_list, different_file_as_list):
N = current_file_as_list
O = different_file_as_list
table = {}
OA = []
NA = []
for i in O:
OA.append(i)
for i in N:
NA.append(i)
# First pass
i = 0
for line in N:
if not line in table:
table[line] = {}
table[line]["NC"] = 1
else:
if table[line]["NC"] == 1:
table[line]["NC"] = 2
else:
table[line]["NC"] = "many"
NA[i] = table[line]
i += 1
# second pass
j = 0
for line in O:
if not line in table:
table[line] = {}
table[line]["OC"] = 1
else:
if not "OC" in table[line]:
table[line]["OC"] = 1
elif table[line]["OC"] == 1:
table[line]["OC"] = 2
else:
table[line]["OC"] = "many"
table[line]["OLNO"] = j # Gets overwritten with multiple occurrences.
# Check to see if this is the intended implementation.
# Maybe only relevant for "OC" == "NC" == 1
OA[j] = table[line]
j += 1
# third pass
i = 0
for i in range(0, len(NA)):
# Check if they appear in both files
if "OC" in NA[i] and "NC" in NA[i]:
# Check if they appear exactly once
if NA[i]["OC"] == NA[i]["NC"] == 1:
olno = NA[i]["OLNO"]
NA[i], OA[olno] = olno, i
i += 1
# fourth pass
# ascending
for i in range(0, len(NA)):
for j in range(0 , len(OA)):
if NA[i] == OA[j] and i + 1 < len(NA) and j + 1 < len(OA) and NA[i + 1] == OA[j + 1]:
OA[j + 1] = table[O[i + 1]]
NA[i + 1] = table[N[j + 1]]
# fifth pass
# descending
for i in range(len(NA) - 1, 0, -1):
for j in range(len(OA) - 1, 0, -1):
if NA[i] == OA[j] and i - 1 > 0 and j - 1 > 0 and NA[i - 1] == OA[j - 1]:
OA[j - 1] = table[O[i - 1]]
NA[i - 1] = table[N[j - 1]]
# final step implementation should go here but I'm not sure how to approach it but this is my current attempt (which I am certain is wrong):
k = 0
array = []
for i in range(0, len(NA)):
if isinstance(NA[i], int):
array.append("= " + str(N[i]))
k = NA[i] + 1
elif isinstance(NA[i], dict):
array.append("+ " + N[i])
for j in range(k, len(OA)):
k = j + 1
print("j - " + str(j))
if not isinstance(OA[j], int):
array.append("- " + O[j])
else:
break
You can pass any two string's or list of string's as input to the function eg. find_diff("hello", "hell")