forked from Plachtaa/VITS-fast-fine-tuning
-
Notifications
You must be signed in to change notification settings - Fork 5
/
sanskrit.py
62 lines (58 loc) · 1.5 KB
/
sanskrit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import re
from indic_transliteration import sanscript
# List of (iast, ipa) pairs:
_iast_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('a', 'ə'),
('ā', 'aː'),
('ī', 'iː'),
('ū', 'uː'),
('ṛ', 'ɹ`'),
('ṝ', 'ɹ`ː'),
('ḷ', 'l`'),
('ḹ', 'l`ː'),
('e', 'eː'),
('o', 'oː'),
('k', 'k⁼'),
('k⁼h', 'kʰ'),
('g', 'g⁼'),
('g⁼h', 'gʰ'),
('ṅ', 'ŋ'),
('c', 'ʧ⁼'),
('ʧ⁼h', 'ʧʰ'),
('j', 'ʥ⁼'),
('ʥ⁼h', 'ʥʰ'),
('ñ', 'n^'),
('ṭ', 't`⁼'),
('t`⁼h', 't`ʰ'),
('ḍ', 'd`⁼'),
('d`⁼h', 'd`ʰ'),
('ṇ', 'n`'),
('t', 't⁼'),
('t⁼h', 'tʰ'),
('d', 'd⁼'),
('d⁼h', 'dʰ'),
('p', 'p⁼'),
('p⁼h', 'pʰ'),
('b', 'b⁼'),
('b⁼h', 'bʰ'),
('y', 'j'),
('ś', 'ʃ'),
('ṣ', 's`'),
('r', 'ɾ'),
('l̤', 'l`'),
('h', 'ɦ'),
("'", ''),
('~', '^'),
('ṃ', '^')
]]
def devanagari_to_ipa(text):
text = text.replace('ॐ', 'ओम्')
text = re.sub(r'\s*।\s*$', '.', text)
text = re.sub(r'\s*।\s*', ', ', text)
text = re.sub(r'\s*॥', '.', text)
text = sanscript.transliterate(text, sanscript.DEVANAGARI, sanscript.IAST)
for regex, replacement in _iast_to_ipa:
text = re.sub(regex, replacement, text)
text = re.sub('(.)[`ː]*ḥ', lambda x: x.group(0)
[:-1]+'h'+x.group(1)+'*', text)
return text