# -*- coding: utf-8 -*-
class transliterator(object):
def __init__(self):
self.trans = {}
for char in u"":
self.trans[char] = u"A"
for char in u"":
self.trans[char] = u""
self.trans[u""] = u""
self.trans[u""] = u"Ae"
self.trans[u""] = u"Aa"
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u""
self.trans[u""] = u""
self.trans[u""] = u"ae"
self.trans[u""] = u"aa"
for char in u"":
self.trans[char] = u"B"
for char in u"":
self.trans[char] = u"b"
for char in u"":
self.trans[char] = u"C"
for char in u"":
self.trans[char] = u"c"
self.trans[u""] = u""
self.trans[u""] = u""
self.trans[u""] = u"Dh"
self.trans[u""] = u"dh"
for char in u"":
self.trans[char] = u"D"
for char in u"":
self.trans[char] = u"d"
for char in u"E":
self.trans[char] = u"E"
for char in u"":
self.trans[char] = u""
for char in u"e":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"F"
for char in u"":
self.trans[char] = u"f"
for char in u"":
self.trans[char] = u"G"
for char in u"":
self.trans[char] = u"g"
self.trans[u""] = u"Gx"
self.trans[u""] = u"gx"
for char in u"H":
self.trans[char] = u"H"
for char in u"":
self.trans[char] = u"h"
for char in u"I":
self.trans[char] = u"I"
for char in u"i":
self.trans[char] = u"i"
for char in u"J":
self.trans[char] = u"J"
for char in u"":
self.trans[char] = u"j"
for char in u"":
self.trans[char] = u"K"
for char in u"":
self.trans[char] = u"k"
for char in u"":
self.trans[char] = u"L"
for char in u"":
self.trans[char] = u"l"
for char in u"":
self.trans[char] = u"M"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"N"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"O"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"P"
for char in u"":
self.trans[char] = u"p"
self.trans[u""] = u"q"
for char in u"":
self.trans[char] = u"R"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"S"
for char in u"":
self.trans[char] = u"s"
self.trans[u""] = u"Sx"
self.trans[u""] = u"sx"
for char in u"":
self.trans[char] = u"T"
for char in u"":
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"U"
for char in u"":
self.trans[char] = u"u"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
self.trans[u""] = u"Ux"
self.trans[u""] = u"ux"
self.trans[u""] = u""
self.trans[u""] = u""
self.trans[u""] = u""
self.trans[u""] = u""
self.trans[u""] = u""
self.trans[u""] = u""
for char in u"":
self.trans[char] = u"V"
for char in u"":
self.trans[char] = u"v"
for char in u"":
self.trans[char] = u"W"
for char in u"":
self.trans[char] = u"w"
for char in u"":
self.trans[char] = u"X"
for char in u"":
self.trans[char] = u"x"
for char in u"":
self.trans[char] = u"Y"
for char in u"":
self.trans[char] = u"y"
for char in u"":
self.trans[char] = u"Z"
for char in u"":
self.trans[char] = u"z"
self.trans[u""] = u"zv"
# Latin: extended Latin alphabet
self.trans[u""] = u"a"
for char in u"":
self.trans[char] = u"AE"
for char in u"":
self.trans[char] = u"ae"
self.trans[u""] = u"Dh"
self.trans[u""] = u"dh"
for char in u"":
self.trans[char] = u"E"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u"G"
for char in u"":
self.trans[char] = u"g"
self.trans[u""] = u"H"
self.trans[u""] = u"h"
self.trans[u""] = u"Wh"
self.trans[u""] = u"wh"
self.trans[u""] = u"I"
self.trans[u""] = u"i"
self.trans[u""] = u"Ng"
self.trans[u""] = u"ng"
self.trans[u""] = u"OE"
self.trans[u""] = u"oe"
self.trans[u""] = u"O"
self.trans[u""] = u"o"
self.trans[u""] = u"Ou"
self.trans[u""] = u"ou"
self.trans[u""] = u"Q"
for char in u"":
self.trans[char] = u"q"
self.trans[u""] = u"qp"
self.trans[u""] = u"r"
self.trans[u""] = u"s"
self.trans[u""] = u"ss"
self.trans[u""] = u"Sh"
for char in u"":
self.trans[char] = u"sh"
self.trans[u""] = u"U"
self.trans[u""] = u"u"
self.trans[u""] = u"V"
self.trans[u""] = u"v"
for char in u"":
self.trans[char] = u"W"
for char in u"":
self.trans[char] = u"w"
self.trans[u""] = u"Y"
self.trans[u""] = u"y"
self.trans[u""] = u"IJ"
self.trans[u""] = u"ij"
self.trans[u""] = u"Z"
for char in u"":
self.trans[char] = u"z"
self.trans[u""] = u"Zh"
self.trans[u""] = u"zh"
self.trans[u""] = u"Dzh"
self.trans[u""] = u"dzh"
for char in u"":
self.trans[char] = u"'"
for char in u"":
self.trans[char] = u"Th"
for char in u"":
self.trans[char] = u"th"
for char in u"C":
self.trans[char] = u"!"
#Punctuation and typography
for char in u"":
self.trans[char] = u'"'
for char in u"":
self.trans[char] = u"'"
self.trans[u""] = u"*"
self.trans[u"@"] = u"(at)"
self.trans[u""] = u"$"
self.trans[u""] = u"c"
self.trans[u""] = u"E"
self.trans[u""] = u"L"
self.trans[u""] = u"yen"
self.trans[u""] = u"+"
self.trans[u""] = u"++"
self.trans[u""] = u":"
self.trans[u""] = u"!"
self.trans[u""] = u"?"
self.trans[u""] = u"o/oo"
self.trans[u""] = u"o/ooo"
for char in u"":
self.trans[char] = u">"
for char in u"":
self.trans[char] = u"..."
for char in u"":
self.trans[char] = u"-"
for char in u"":
self.trans[char] = u" "
self.trans[u""] = u"|"
self.trans[u""] = u"***"
self.trans[u""] = u"<>"
self.trans[u""] = u"?!"
self.trans[u""] = u";-)"
self.trans[u""] = u"1"
self.trans[u""] = u"2"
self.trans[u""] = u"3"
# Cyrillic
self.trans.update({u"" : u"A", u"" : u"a", u"" : u"B", u"" : u"b",
u"" : u"V", u"" : u"v", u"" : u"G", u"" : u"g",
u"" : u"D", u"" : u"d", u"" : u"E", u"" : u"e",
u"" : u"Zh", u"" : u"zh", u"" : u"Z", u"" : u"z",
u"" : u"I", u"" : u"i", u"" : u"J", u"" : u"j",
u"" : u"K", u"" : u"k", u"" : u"L", u"" : u"l",
u"" : u"M", u"" : u"m", u"" : u"N", u"" : u"n",
u"" : u"O", u"" : u"o", u"" : u"P", u"" : u"p",
u"" : u"R", u"" : u"r", u"" : u"S", u"" : u"s",
u"" : u"T", u"" : u"t", u"" : u"U", u"" : u"u",
u"" : u"F", u"" : u"f", u"" : u"kh", u"" : u"C",
u"" : u"c", u"" : u"Ch", u"" : u"ch", u"" : u"Sh",
u"" : u"sh", u"" : u"Shch", u"" : u"shch", u"" : u"'",
u"" : "'", u"" : u'"', u"" : '"', u"" : u"Yu",
u"" : u"yu", u"" : u"Ya", u"" : u"ya", u"" : u"Kh",
u"" : u"Kh"})
# Additional Cyrillic letters, most occuring in only one or a few languages
self.trans.update({u"" : u"Y", u"" : u"y", u"" : u"", u"" : u"",
u"" : u"", u"" : u"", u"" : u"", u"" : u"",
u"" : u"I", u"" : u"i", u"" : u"Ji", u"" : u"ji",
u"" : u"Je", u"" : u"je", u"" : u"G", u"" : u"G",
u"" : u"g", u"" : u"g", u"" : u"Dj", u"" : u"dj",
u"" : u"Y", u"" : u"y", u"" : u"Lj", u"" : u"lj",
u"" : u"Nj", u"" : u"nj", u"" : u"Cj", u"" : u"cj",
u"" : u"Zhj", u"" : u"zhj", u"" : u"Gj", u"" : u"gj",
u"" : u"Kj", u"" : u"kj", u"" : u"Ii", u"" : u"ii",
u"" : u"U", u"" : u"u", u"" : u"H", u"" : u"h",
u"" : u"Dz",u"" : u"dz", u"" :u"", u"" : u"",
u"" : u"", u"" : u"", u"": u"Y", u"": u"y", u"": u"H",
u"": u"h", u"": u"AE", u"": u"AE", u"": u"ae",
u"": u"", u"": u"", u"": u"", u"": u"", u"": u"Zhj",
u"": u"zhj", u"": u"U", u"": u"u", u"": u"", u"": u"",
u"": u"", u"": u"", u"": u"A", u"": u"a", u"": u"",
u"": u"", u"" : u"Ts", u"": u"Ts", u"": u"ts", u"": u"ts",
u"": u"Dh", u"": u"dh", u"": u"", u"": u"", u"": u"L",
u"": u"l", u"": u"M", u"": u"m", u"": u"", u"": u"",
u"": u"u", u"": u"u", u"": u"Ph", u"": u"ph", u"": u"R",
u"": u"r", u"": u"Th", u"": u"th", u"": u"T", u"": u"t",
u"": u"", u"": u"", u"": u"U", u"": u"U", u"": u"u",
u"": u"u", u"": u"Tts", u"": u"tts", u"": u"Ch", u"": u"ch"})
for char in u"":
self.trans[char] = u"J"
for char in u"":
self.trans[char] = u"j"
for char in u"":
self.trans[char] = u"Dzh"
for char in u"":
self.trans[char] = u"dzh"
for char in u"":
self.trans[char] = u"Dz"
for char in u"":
self.trans[char] = u"dz"
for char in u"":
self.trans[char] = u"G"
for char in u"":
self.trans[char] = u"g"
for char in u"":
self.trans[char] = u"Q"
for char in u"":
self.trans[char] = u"q"
for char in u"":
self.trans[char] = u"Ng"
for char in u"":
self.trans[char] = u"ng"
for char in u"":
self.trans[char] = u"E"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
# Archaic Cyrillic letters
self.trans.update({u"": u"Ou", u"": u"ou", u"": u"O", u"": u"O", u"": u"o",
u"": u"o", u"": u"Ot", u"": u"ot", u"": u"E", u"": u"e",
u"": u"Ei", u"": u"Ei", u"": u"ei", u"": u"ei", u"": u"Ai",
u"": u"ai", u"": u"X", u"": u"x", u"": u"Ps", u"": u"ps",
u"": u"Th", u"": u"th", u"": u"", u"": u"", u"": u""})
# Hebrew alphabet
for char in u"":
self.trans[char] = u"'"
self.trans[u""] = u"b"
self.trans[u""] = u"g"
self.trans[u""] = u"d"
self.trans[u""] = u"h"
self.trans[u""] = u"v"
self.trans[u""] = u"z"
self.trans[u""] = u"kh"
self.trans[u""] = u"t"
self.trans[u""] = u"y"
for char in u"":
self.trans[char] = u"k"
self.trans[u""] = u"l"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"n"
self.trans[u""] = u"s"
for char in u"":
self.trans[char] = u"ph"
for char in u"":
self.trans[char] = u"ts"
self.trans[u""] = u"q"
self.trans[u""] = u"r"
self.trans[u""] = u"sh"
self.trans[u""] = u"th"
# Arab alphabet
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"b"
for char in u"":
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"th"
for char in u"":
self.trans[char] = u"g"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u"kh"
for char in u"":
self.trans[char] = u"d"
for char in u"":
self.trans[char] = u"dh"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"z"
for char in u"":
self.trans[char] = u"s"
for char in u"":
self.trans[char] = u"sh"
for char in u"":
self.trans[char] = u"s"
for char in u"":
self.trans[char] = u"d"
for char in u"":
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"z"
for char in u"":
self.trans[char] = u"'"
for char in u"":
self.trans[char] = u"gh"
for char in u"":
self.trans[char] = u"f"
for char in u"":
self.trans[char] = u"q"
for char in u"":
self.trans[char] = u"k"
for char in u"":
self.trans[char] = u"l"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u"w"
for char in u"":
self.trans[char] = u"y"
# Arabic - additional letters, modified letters and ligatures
self.trans[u""] = u"'"
for char in u"":
self.trans[char] = u"'a"
for char in u"":
self.trans[char] = u"th"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"y"
self.trans[u""] = u"?"
# Arabic - ligatures
for char in u"":
self.trans[char] = u"la"
self.trans[u""] = u"llah"
for char in u"":
self.trans[char] = u"a'"
self.trans[u""] = u"w'"
self.trans[u""] = u"y'"
for char in u"":
self.trans[char] = u"" # indicates absence of vowels
# Arabic vowels
self.trans[u""] = u"a"
self.trans[u""] = u"u"
self.trans[u""] = u"i"
self.trans[u""] = u"a"
self.trans[u""] = u"ay"
self.trans[u""] = u"ay"
self.trans[u""] = u"u"
self.trans[u""] = u"iy"
# Arab numerals
for char in u"":
self.trans[char] = u"0"
for char in u"":
self.trans[char] = u"1"
for char in u"":
self.trans[char] = u"2"
for char in u"":
self.trans[char] = u"3"
for char in u"":
self.trans[char] = u"4"
for char in u"":
self.trans[char] = u"5"
for char in u"":
self.trans[char] = u"6"
for char in u"":
self.trans[char] = u"7"
for char in u"":
self.trans[char] = u"8"
for char in u"":
self.trans[char] = u"9"
# Perso-Arabic
for char in u"":
self.trans[char] = u"p"
for char in u"":
self.trans[char] = u"ch"
for char in u"":
self.trans[char] = u"zh"
for char in u"":
self.trans[char] = u"g"
# Greek
self.trans.update({u"": u"A", u"": u"a", u"": u"B", u"": u"b", u"": u"G",
u"": u"g", u"": u"D", u"": u"d", u"": u"E", u"": u"e",
u"": u"Z", u"": u"z", u"": u"I", u"": u"i", u"": u"th",
u"": u"Th", u"": u"I", u"": u"i", u"": u"K", u"": u"k",
u"": u"L", u"": u"l", u"": u"M", u"": u"m", u"": u"N",
u"": u"n", u"": u"X", u"": u"x", u"": u"O", u"": u"o",
u"": u"P", u"": u"p", u"": u"R", u"": u"r", u"": u"S",
u"": u"s", u"": u"s", u"": u"T", u"": u"t", u"": u"Y",
u"": u"y", u"": u"F", u"": u"f", u"": u"Ps", u"": u"ps",
u"": u"O", u"": u"o", u"": u"&", u"": u"St", u"": u"st",
u"": u"Q", u"": u"Q", u"": u"q", u"": u"q", u"": u"S",
u"": u"s", u"": u"Ss", u"": u"ss", u"": u"Sh", u"": u"sh",
u"": u":", u"": u"", u"": u"", u"": u"", u"": u"",
u"": u"", u"": u"", u"": u"", u"": u"", u"": u"",
u"": u"", u"": u"", u"": u"", u"": u"", u"": u"",
u"": u"", u"": u"Y", u"": u"", u"": u"", u"": u"",
u"": u""})
# Japanese (katakana and hiragana)
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"i"
for char in u"":
self.trans[char] = u"u"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = u"ya"
for char in u"":
self.trans[char] = u"yu"
for char in u"":
self.trans[char] = u"yo"
for char in u"":
self.trans[char] = u"ka"
for char in u"":
self.trans[char] = u"ki"
for char in u"":
self.trans[char] = u"ku"
for char in u"":
self.trans[char] = u"ke"
for char in u"":
self.trans[char] = u"ko"
for char in u"":
self.trans[char] = u"sa"
for char in u"":
self.trans[char] = u"shi"
for char in u"":
self.trans[char] = u"su"
for char in u"":
self.trans[char] = u"se"
for char in u"":
self.trans[char] = u"so"
for char in u"":
self.trans[char] = u"ta"
for char in u"":
self.trans[char] = u"chi"
for char in u"":
self.trans[char] = u"tsu"
for char in u"":
self.trans[char] = u"te"
for char in u"":
self.trans[char] = u"to"
for char in u"":
self.trans[char] = u"na"
for char in u"":
self.trans[char] = u"ni"
for char in u"":
self.trans[char] = u"nu"
for char in u"":
self.trans[char] = u"ne"
for char in u"":
self.trans[char] = u"no"
for char in u"":
self.trans[char] = u"ha"
for char in u"":
self.trans[char] = u"hi"
for char in u"":
self.trans[char] = u"fu"
for char in u"":
self.trans[char] = u"he"
for char in u"":
self.trans[char] = u"ho"
for char in u"":
self.trans[char] = u"ma"
for char in u"":
self.trans[char] = u"mi"
for char in u"":
self.trans[char] = u"mu"
for char in u"":
self.trans[char] = u"me"
for char in u"":
self.trans[char] = u"mo"
for char in u"":
self.trans[char] = u"ra"
for char in u"":
self.trans[char] = u"ri"
for char in u"":
self.trans[char] = u"ru"
for char in u"":
self.trans[char] = u"re"
for char in u"":
self.trans[char] = u"ro"
for char in u"":
self.trans[char] = u"wa"
for char in u"":
self.trans[char] = u"wi"
for char in u"":
self.trans[char] = u"we"
for char in u"":
self.trans[char] = u"wo"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"ga"
for char in u"":
self.trans[char] = u"gi"
for char in u"":
self.trans[char] = u"gu"
for char in u"":
self.trans[char] = u"ge"
for char in u"":
self.trans[char] = u"go"
for char in u"":
self.trans[char] = u"za"
for char in u"":
self.trans[char] = u"ji"
for char in u"":
self.trans[char] = u"zu"
for char in u"":
self.trans[char] = u"ze"
for char in u"":
self.trans[char] = u"zo"
for char in u"":
self.trans[char] = u"da"
for char in u"":
self.trans[char] = u"dji"
for char in u"":
self.trans[char] = u"dzu"
for char in u"":
self.trans[char] = u"de"
for char in u"":
self.trans[char] = u"do"
for char in u"":
self.trans[char] = u"ba"
for char in u"":
self.trans[char] = u"bi"
for char in u"":
self.trans[char] = u"bu"
for char in u"":
self.trans[char] = u"be"
for char in u"":
self.trans[char] = u"bo"
for char in u"":
self.trans[char] = u"pa"
for char in u"":
self.trans[char] = u"pi"
for char in u"":
self.trans[char] = u"pu"
for char in u"":
self.trans[char] = u"pe"
for char in u"":
self.trans[char] = u"po"
for char in u"":
self.trans[char] = u"vu"
self.trans[u""] = u"va"
self.trans[u""] = u"vi"
self.trans[u""] = u"ve"
self.trans[u""] = u"vo"
# Japanese and Chinese punctuation and typography
for char in u"":
self.trans[char] = u" "
for char in u"":
self.trans[char] = u'"'
for char in u"":
self.trans[char] = u"'"
for char in u"":
self.trans[char] = u"("
for char in u"":
self.trans[char] = u")"
for char in u"":
self.trans[char] = u"["
for char in u"":
self.trans[char] = u"]"
for char in u"":
self.trans[char] = u"{"
for char in u"":
self.trans[char] = u"}"
for char in u"":
self.trans[char] = u":"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u"'"
for char in u"":
self.trans[char] = u"p"
for char in u"":
self.trans[char] = u". "
for char in u"":
self.trans[char] = u", "
for char in u"":
self.trans[char] = u" "
for char in u"":
self.trans[char] = u"shime"
for char in u"":
self.trans[char] = u"-"
for char in u"":
self.trans[char] = u"..."
for char in u"":
self.trans[char] = u".."
for char in u"":
self.trans[char] = u"months"
for char in u"":
self.trans[char] = u"_"
for char in u"":
self.trans[char] = u"*"
for char in u"":
self.trans[char] = u"(X)"
for char in u"":
self.trans[char] = u"(Y)"
for char in u"":
self.trans[char] = u"!"
for char in u"":
self.trans[char] = u"?"
for char in u"":
self.trans[char] = u";"
for char in u"":
self.trans[char] = u":"
for char in u"":
self.trans[char] = u"."
for char in u"":
self.trans[char] = u","
# Georgian
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"b"
for char in u"":
self.trans[char] = u"g"
for char in u"":
self.trans[char] = u"d"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u"v"
for char in u"":
self.trans[char] = u"z"
for char in u"":#
self.trans[char] = u"th"
for char in u"":
self.trans[char] = u"i"
for char in u"":#
self.trans[char] = u"k"
for char in u"":
self.trans[char] = u"l"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"o"
for char in u"":#
self.trans[char] = u"p"
for char in u"":#
self.trans[char] = u"zh"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"s"
for char in u"":#
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"u"
for char in u"":#
self.trans[char] = u"ph"
for char in u"":#
self.trans[char] = u"q"
for char in u"":#
self.trans[char] = u"gh"
for char in u"":#
self.trans[char] = u"q'"
for char in u"":
self.trans[char] = u"sh"
for char in u"":
self.trans[char] = u"ch"
for char in u"":
self.trans[char] = u"ts"
for char in u"":
self.trans[char] = u"dz"
for char in u"":#
self.trans[char] = u"ts'"
for char in u"":#
self.trans[char] = u"ch'"
for char in u"":
self.trans[char] = u"kh"
for char in u"":#
self.trans[char] = u"j"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u"w"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = u"f"
# Devanagari
for char in u"":
self.trans[char] = u"p"
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"aa"
for char in u"":
self.trans[char] = u"pa"
for char in u"":
self.trans[char] = u"i"
for char in u"":
self.trans[char] = u"ii"
for char in u"":
self.trans[char] = u"u"
for char in u"":
self.trans[char] = u"uu"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u"ai"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = u"au"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"rr"
for char in u"":
self.trans[char] = u"l"
for char in u"":
self.trans[char] = u"ll"
for char in u"":
self.trans[char] = u"k"
for char in u"":
self.trans[char] = u"kh"
for char in u"":
self.trans[char] = u"g"
for char in u"":
self.trans[char] = u"gh"
for char in u"":
self.trans[char] = u"ng"
for char in u"":
self.trans[char] = u"c"
for char in u"":
self.trans[char] = u"ch"
for char in u"":
self.trans[char] = u"j"
for char in u"":
self.trans[char] = u"jh"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"th"
for char in u"":
self.trans[char] = u"d"
for char in u"":
self.trans[char] = u"dh"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"ph"
for char in u"":
self.trans[char] = u"b"
for char in u"":
self.trans[char] = u"bh"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"y"
for char in u"":
self.trans[char] = u"v"
for char in u"":
self.trans[char] = u"sh"
for char in u"":
self.trans[char] = u"s"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u"x"
for char in u"":
self.trans[char] = u"tr"
for char in u"":
self.trans[char] = u"gj"
for char in u"":
self.trans[char] = u"q"
for char in u"":
self.trans[char] = u"f"
for char in u"":
self.trans[char] = u"hh"
for char in u"H":
self.trans[char] = u"gh"
for char in u"":
self.trans[char] = u"z"
for char in u"":
self.trans[char] = u"r"
# Devanagari ligatures (possibly incomplete and/or incorrect)
for char in u"":
self.trans[char] = u"khn"
for char in u"":
self.trans[char] = u"tn"
for char in u"":
self.trans[char] = u"dn"
for char in u"":
self.trans[char] = u"cn"
for char in u"":
self.trans[char] = u"fn"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"0"
for char in u"":
self.trans[char] = u"1"
for char in u"":
self.trans[char] = u"2"
for char in u"":
self.trans[char] = u"3"
for char in u"":
self.trans[char] = u"4"
for char in u"":
self.trans[char] = u"5"
for char in u"":
self.trans[char] = u"6"
for char in u"":
self.trans[char] = u"7"
for char in u"":
self.trans[char] = u"8"
for char in u"":
self.trans[char] = u"9"
# Armenian
for char in u"":
self.trans[char] = u"A"
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"B"
for char in u"":
self.trans[char] = u"b"
for char in u"":
self.trans[char] = u"G"
for char in u"":
self.trans[char] = u"g"
for char in u"":
self.trans[char] = u"D"
for char in u"":
self.trans[char] = u"d"
for char in u"":
self.trans[char] = u"Je"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u"Z"
for char in u"":
self.trans[char] = u"z"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"Th"
for char in u"":
self.trans[char] = u"th"
for char in u"":
self.trans[char] = u"Zh"
for char in u"":
self.trans[char] = u"zh"
for char in u"":
self.trans[char] = u"I"
for char in u"":
self.trans[char] = u"i"
for char in u"":
self.trans[char] = u"L"
for char in u"":
self.trans[char] = u"l"
for char in u"":
self.trans[char] = u"Ch"
for char in u"":
self.trans[char] = u"ch"
for char in u"":
self.trans[char] = u"Ts"
for char in u"":
self.trans[char] = u"ts"
for char in u"":
self.trans[char] = u"K"
for char in u"":
self.trans[char] = u"k"
for char in u"":
self.trans[char] = u"H"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u"Dz"
for char in u"":
self.trans[char] = u"dz"
for char in u"":
self.trans[char] = u"R"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"Cz"
for char in u"":
self.trans[char] = u"cz"
for char in u"":
self.trans[char] = u"M"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"J"
for char in u"":
self.trans[char] = u"j"
for char in u"":
self.trans[char] = u"N"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"S"
for char in u"":
self.trans[char] = u"s"
for char in u"":
self.trans[char] = u"Vo"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = u"Tsh"
for char in u"":
self.trans[char] = u"tsh"
for char in u"":
self.trans[char] = u"P"
for char in u"":
self.trans[char] = u"p"
for char in u"":
self.trans[char] = u"Dz"
for char in u"":
self.trans[char] = u"dz"
for char in u"":
self.trans[char] = u"R"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"S"
for char in u"":
self.trans[char] = u"s"
for char in u"":
self.trans[char] = u"V"
for char in u"":
self.trans[char] = u"v"
for char in u"":
self.trans[char] = u"T'"
for char in u"":
self.trans[char] = u"t'"
for char in u"":
self.trans[char] = u"R"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"Tsh"
for char in u"":
self.trans[char] = u"tsh"
for char in u"":
self.trans[char] = u"V"
for char in u"":
self.trans[char] = u"v"
for char in u"":
self.trans[char] = u"Ph"
for char in u"":
self.trans[char] = u"ph"
for char in u"":
self.trans[char] = u"Kh"
for char in u"":
self.trans[char] = u"kh"
for char in u"":
self.trans[char] = u"O"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = u"F"
for char in u"":
self.trans[char] = u"f"
for char in u"":
self.trans[char] = u"&"
for char in u"":
self.trans[char] = u"."
for char in u"":
self.trans[char] = u"?"
for char in u"":
self.trans[char] = u";"
for char in u"":
self.trans[char] = u""
# Tamil
for char in u"":
self.trans[char] = u"k"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"c"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"th"
for char in u"":
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"p"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"y"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"l"
for char in u"":
self.trans[char] = u"v"
for char in u"":
self.trans[char] = u"j"
for char in u"":
self.trans[char] = u"sh"
for char in u"":
self.trans[char] = u"s"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u"x"
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"aa"
for char in u"":
self.trans[char] = u"i"
for char in u"":
self.trans[char] = u"ii"
for char in u"":
self.trans[char] = u"u"
for char in u"":
self.trans[char] = u"uu"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u"ee"
for char in u"":
self.trans[char] = u"ai"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = u"oo"
for char in u"":
self.trans[char] = u"au"
for char in u"":
self.trans[char] = ""
# Bengali
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"i"
for char in u"":
self.trans[char] = u"u"
for char in u"":
self.trans[char] = u"ri"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u"oi"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = "ou"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"k"
for char in u"":
self.trans[char] = u"kh"
for char in u"":
self.trans[char] = u"g"
for char in u"":
self.trans[char] = u"gh"
for char in u"":
self.trans[char] = u"ng"
for char in u"":
self.trans[char] = u"ch"
for char in u"":
self.trans[char] = u"chh"
for char in u"":
self.trans[char] = u"j"
for char in u"":
self.trans[char] = u"jh"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"th"
for char in u"":
self.trans[char] = u"d"
for char in u"":
self.trans[char] = u"dh"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"p"
for char in u"":
self.trans[char] = u"ph"
for char in u"":
self.trans[char] = u"b"
for char in u"":
self.trans[char] = u"bh"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"dzh"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"l"
for char in u"":
self.trans[char] = u"s"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u"-"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"rh"
for char in u"":
self.trans[char] = u"0"
for char in u"":
self.trans[char] = u"1"
for char in u"":
self.trans[char] = u"2"
for char in u"":
self.trans[char] = u"3"
for char in u"":
self.trans[char] = u"4"
for char in u"":
self.trans[char] = u"5"
for char in u"":
self.trans[char] = u"6"
for char in u"":
self.trans[char] = u"7"
for char in u"":
self.trans[char] = u"8"
for char in u"":
self.trans[char] = u"9"
# Thai (because of complications of the alphabet, self.transliterations
# are very imprecise here)
for char in u"":
self.trans[char] = u"k"
for char in u"":
self.trans[char] = u"kh"
for char in u"":
self.trans[char] = u"ng"
for char in u"":
self.trans[char] = u"ch"
for char in u"":
self.trans[char] = u"s"
for char in u"":
self.trans[char] = u"y"
for char in u"":
self.trans[char] = u"d"
for char in u"":
self.trans[char] = u"t"
for char in u"":
self.trans[char] = u"th"
for char in u"":
self.trans[char] = u"n"
for char in u"":
self.trans[char] = u"b"
for char in u"":
self.trans[char] = u"p"
for char in u"":
self.trans[char] = u"ph"
for char in u"":
self.trans[char] = u"f"
for char in u"":
self.trans[char] = u"m"
for char in u"":
self.trans[char] = u"r"
for char in u"":
self.trans[char] = u"rue"
for char in u"":
self.trans[char] = u":"
for char in u"":
self.trans[char] = u"l"
for char in u"":
self.trans[char] = u"lue"
for char in u"":
self.trans[char] = u"w"
for char in u"":
self.trans[char] = u"h"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"ua"
for char in u"":
self.trans[char] = u"o"
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"u"
for char in u"":
self.trans[char] = u"am"
for char in u"":
self.trans[char] = u"i"
for char in u"":
self.trans[char] = u"i:"
for char in u"":
self.trans[char] = u"ue"
for char in u"":
self.trans[char] = u"ue:"
for char in u"":
self.trans[char] = u"u"
for char in u"":
self.trans[char] = u"u:"
for char in u"":
self.trans[char] = u"e"
for char in u"":
self.trans[char] = u"ae"
for char in u"":
self.trans[char] = u"ai"
for char in u"":
self.trans[char] = u""
for char in u"":
self.trans[char] = u"."
for char in u"":
self.trans[char] = u"(2)"
# Korean (Revised Romanization system within possible, incomplete)
for char in u"":
self.trans[char] = u"guk"
for char in u"":
self.trans[char] = u"myeong"
for char in u"":
self.trans[char] = u"geom"
for char in u"":
self.trans[char] = u"ta"
for char in u"":
self.trans[char] = u"bun"
for char in u"":
self.trans[char] = u"sa"
for char in u"":
self.trans[char] = u"ryu"
for char in u"":
self.trans[char] = u"po"
for char in u"":
self.trans[char] = u"reu"
for char in u"":
self.trans[char] = u"tu"
for char in u"":
self.trans[char] = u"gal"
for char in u"":
self.trans[char] = u"eo"
for char in u"":
self.trans[char] = u"no"
for char in u"":
self.trans[char] = u"we"
for char in u"":
self.trans[char] = u"i"
for char in u"":
self.trans[char] = u"ra"
for char in u"":
self.trans[char] = u"tin"
for char in u"":
self.trans[char] = u"ru"
for char in u"":
self.trans[char] = u"ma"
for char in u"":
self.trans[char] = u"ni"
for char in u"":
self.trans[char] = u"a"
for char in u"":
self.trans[char] = u"dok"
for char in u"":
self.trans[char] = u"il"
for char in u"":
self.trans[char] = u"mo"
for char in u"":
self.trans[char] = u"keu"
for char in u"":
self.trans[char] = u"sya"
for char in u"":
self.trans[char] = u"yeong"
for char in u"":
self.trans[char] = u"bul"
for char in u"":
self.trans[char] = u"ga"
for char in u"":
self.trans[char] = u"ri"
for char in u"":
self.trans[char] = u"geu"
for char in u"":
self.trans[char] = u"ji"
for char in u"":
self.trans[char] = u"ya"
for char in u"":
self.trans[char] = u"ba"
for char in u"":
self.trans[char] = u"syu"
for char in u"":
self.trans[char] = u"ki"
for char in u"":
self.trans[char] = u"peu"
for char in u"":
self.trans[char] = u"rang"
for char in u"":
self.trans[char] = u"seu"
for char in u"":
self.trans[char] = u"ro"
for char in u"":
self.trans[char] = u"me"
def transliterate(self, char, default="?", prev="-", next="-"):
if char in self.trans:
return self.trans[char]
#Arabic
if char == u"":
return prev
#Japanese
if char == u"":
return self.transliterate(next)[0]
if char in u"":
return prev
return default
|