forked from crystal-lang/crystal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
levenshtein.cr
111 lines (94 loc) · 2.6 KB
/
levenshtein.cr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Levensthein distance methods.
module Levenshtein
# Computes the [levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance) of two strings.
#
# ```
# require "levenshtein"
#
# Levenshtein.distance("algorithm", "altruistic") # => 6
# Levenshtein.distance("hello", "hallo") # => 1
# Levenshtein.distance("こんにちは", "こんちは") # => 1
# Levenshtein.distance("hey", "hey") # => 0
# ```
def self.distance(string1 : String, string2 : String) : Int32
return 0 if string1 == string2
s = string1.chars
t = string2.chars
s_size = s.size
t_size = t.size
return t_size if s_size == 0
return s_size if t_size == 0
# This is to allocate less memory
if t_size > s_size
t, s = s, t
t_size, s_size = s_size, t_size
end
v = Pointer(Int32).malloc(t_size + 1) { |i| i }
s_size.times do |i|
last_cost = i + 1
t_size.times do |j|
sub_cost = s[i] == t[j] ? 0 : 1
cost = Math.min(Math.min(last_cost + 1, v[j + 1] + 1), v[j] + sub_cost)
v[j] = last_cost
last_cost = cost
end
v[t_size] = last_cost
end
v[t_size]
end
# Finds the closest string to a given string amongst many strings.
#
# ```
# finder = Levenshtein::Finder.new "hallo"
# finder.test "hay"
# finder.test "hall"
# finder.test "hallo world"
#
# finder.best_match # => "hall"
# ```
class Finder
# :nodoc:
record Entry,
value : String,
distance : Int32
@tolerance : Int32
def initialize(@target : String, tolerance : Int? = nil)
@tolerance = tolerance || (target.size / 5.0).ceil.to_i
end
def test(name : String, value : String = name)
distance = Levenshtein.distance(@target, name)
if distance <= @tolerance
if best_entry = @best_entry
if distance < best_entry.distance
@best_entry = Entry.new(value, distance)
end
else
@best_entry = Entry.new(value, distance)
end
end
end
def best_match
@best_entry.try &.value
end
def self.find(name, tolerance = nil)
sn = new name, tolerance
yield sn
sn.best_match
end
def self.find(name, all_names, tolerance = nil)
find(name, tolerance) do |similar|
all_names.each do |a_name|
similar.test(a_name)
end
end
end
end
def self.find(name, tolerance = nil)
Finder.find(name, tolerance) do |sn|
yield sn
end
end
def self.find(name, all_names, tolerance = nil)
Finder.find(name, all_names, tolerance)
end
end