-
Notifications
You must be signed in to change notification settings - Fork 0
/
spellcorrection.py
60 lines (43 loc) · 1.33 KB
/
spellcorrection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import sys
import numpy as np
def levenshtein_distance(word1, word2):
m = len(word1)
n = len(word2)
distance_array = np.zeros((m+1, n+1))
for i in range(m+1):
distance_array[i, 0] = i
for j in range(n+1):
distance_array[0, j] = j
for j in range(n):
for i in range(0, m):
if word1[i] == word2[j]:
substitution_cost = 0
else:
substitution_cost = 1
distance_array[i+1, j+1] = min(
distance_array[i, j+1] + 1,
distance_array[i+1, j] + 1,
distance_array[i, j] + substitution_cost
)
return distance_array[-1, -1]
def spell_correction(data, k):
train_set = ['sitting', 'meeting', 'kitchen',
'friend', 'meat']
distances = np.array([])
suggested_word = []
for train_data in train_set:
distances = np.hstack((distances,
np.array([
levenshtein_distance(train_data, data)
])
))
indexes = distances.argsort()
nearest = indexes[:k]
if min(distances) == 0:
return suggested_word
for i in nearest:
suggested_word.append(train_set[i])
return suggested_word
if __name__ == '__main__':
out = spell_correction(sys.argv[1], 3)
print(out)