greek_classical.yml 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. general:
  2. name: Classical Greek (ancient and medieval)
  3. notes:
  4. - Compiled based on https://www.loc.gov/catdir/cpso/romanization/greek.pdf
  5. parents:
  6. - _ignore_base
  7. script_to_roman:
  8. hooks:
  9. begin_input_token:
  10. -
  11. - greek.parse_numeral
  12. normalize:
  13. # Alpha
  14. "\u03B1":
  15. - "\u1F00"
  16. - "\u1F02"
  17. - "\u1F04"
  18. - "\u1F06"
  19. - "\u1F70"
  20. - "\u1F71"
  21. "\u0391":
  22. - "\u1F08"
  23. - "\u1F0A"
  24. - "\u1F0C"
  25. # Rough alpha
  26. "\u03B1\u0314":
  27. - "\u1F01"
  28. - "\u1F03"
  29. - "\u1F05"
  30. - "\u1F07"
  31. "\u0391\u0314":
  32. - "\u1F09"
  33. - "\u1F0B"
  34. - "\u1F0D"
  35. - "\u1F0F"
  36. # Epsilon
  37. "\u03B5":
  38. - "\u1F10"
  39. - "\u1F12"
  40. - "\u1F14"
  41. - "\u1F72"
  42. - "\u1F73"
  43. "\u0395":
  44. - "\u1F18"
  45. - "\u1F1A"
  46. - "\u1F1C"
  47. # Rough epsilon
  48. "\u03B5\u0314":
  49. - "\u1F11"
  50. - "\u1F13"
  51. - "\u1F15"
  52. "\u0395\u0314":
  53. - "\u1F19"
  54. - "\u1F1B"
  55. - "\u1F1D"
  56. # Eta
  57. "\u03B7":
  58. - "\u1F20"
  59. - "\u1F22"
  60. - "\u1F24"
  61. - "\u1F26"
  62. - "\u1F74"
  63. - "\u1F75"
  64. "\u0397":
  65. - "\u1F28"
  66. - "\u1F2A"
  67. - "\u1F2C"
  68. - "\u1F2E"
  69. # Rough eta
  70. "\u03B7\u0314":
  71. - "\u1F21"
  72. - "\u1F23"
  73. - "\u1F25"
  74. - "\u1F27"
  75. "\u0397\u0314":
  76. - "\u1F29"
  77. - "\u1F2B"
  78. - "\u1F2D"
  79. - "\u1F2F"
  80. # Iota
  81. "\u03B9":
  82. - "\u1F30"
  83. - "\u1F32"
  84. - "\u1F34"
  85. - "\u1F36"
  86. - "\u1F76"
  87. - "\u1F77"
  88. "\u0399":
  89. - "\u1F38"
  90. - "\u1F3A"
  91. - "\u1F3C"
  92. - "\u1F3E"
  93. # Rough iota
  94. "\u03B9\u0314":
  95. - "\u1F31"
  96. - "\u1F33"
  97. - "\u1F35"
  98. - "\u1F37"
  99. "\u0399\u0314":
  100. - "\u1F39"
  101. - "\u1F3B"
  102. - "\u1F3D"
  103. - "\u1F3F"
  104. # ὶ
  105. # Omicron
  106. "\u03BF":
  107. - "\u1F40"
  108. - "\u1F42"
  109. - "\u1F44"
  110. - "\u1F78"
  111. - "\u1F79"
  112. "\u039F":
  113. - "\u1F48"
  114. - "\u1F4A"
  115. - "\u1F4C"
  116. # Rough Omicron
  117. "\u03BF\u0314":
  118. - "\u1F41"
  119. - "\u1F43"
  120. - "\u1F45"
  121. "\u039F\u0314":
  122. - "\u1F49"
  123. - "\u1F4B"
  124. - "\u1F4D"
  125. # Rho
  126. "\u03C1":
  127. - "\u1FE4"
  128. "\u03C1\u0314":
  129. - "\u1FE5"
  130. # Rough Rho
  131. "\u03A1\u0314":
  132. - "\u1FEC"
  133. # Upsilon
  134. "\u03C5":
  135. - "\u03CD"
  136. - "\u1F50"
  137. - "\u1F52"
  138. - "\u1F54"
  139. - "\u1F56"
  140. - "\u1FE6"
  141. - "\u1F7A"
  142. - "\u1F7B"
  143. #"\u03A5":
  144. # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
  145. # Rough Upsilon
  146. "\u03C5\u0314":
  147. - "\u1F51"
  148. - "\u1F53"
  149. - "\u1F55"
  150. - "\u1F57"
  151. "\u03A5\u0314":
  152. - "\u1F59"
  153. - "\u1F5B"
  154. - "\u1F5D"
  155. - "\u1F5F"
  156. # Omega
  157. "\u03C9":
  158. - "\u1F60"
  159. - "\u1F62"
  160. - "\u1F64"
  161. - "\u1F66"
  162. - "\u1F7C"
  163. - "\u1F7D"
  164. "\u03A9":
  165. - "\u1F68"
  166. - "\u1F6A"
  167. - "\u1F6C"
  168. - "\u1F6E"
  169. # Rough omega
  170. "\u03C9\u0314":
  171. - "\u1F61"
  172. - "\u1F63"
  173. - "\u1F65"
  174. - "\u1F67"
  175. "\u03A9\u0314":
  176. - "\u1F69"
  177. - "\u1F6B"
  178. - "\u1F6D"
  179. - "\u1F6F"
  180. # TODO: Combined vowels with Vrachy, Makron, Dialytika, Prosgegrammeni, Ypogegrammeni.
  181. # Remove all combining diacritics except for dasia.
  182. "":
  183. - "\u0342"
  184. - "\u0343"
  185. - "\u0345"
  186. - "\u037A"
  187. - "\u1FBD"
  188. - "\u1FBE"
  189. - "\u1FBF"
  190. - "\u1FC0"
  191. - "\u1FC1"
  192. - "\u1FCD"
  193. - "\u1FCE"
  194. - "\u1FCF"
  195. - "\u1FED"
  196. - "\u1FEE"
  197. - "\u1FFD"
  198. map:
  199. "\u201C": "\"\u0332"
  200. "\u201D": "\"\u0333"
  201. "\u2018": "'\u0332"
  202. "\u2019": "'\u0333"
  203. "\u2116": "No\u0332"
  204. # "\u0300": ""
  205. # "\u0301": ""
  206. # "\u0302": ""
  207. # "\u0313": ""
  208. "\u0370": "H\u0323"
  209. "\u0371": "h\u0323"
  210. "\u0372": "S\uFE20\u0332S\uFE21\u0332"
  211. "\u0373": "s\uFE20\u0332s\uFE21\u0332"
  212. "\u0374": "\u02B9"
  213. "\u0375": "\u0326"
  214. "\u0376": "W\u0323"
  215. "\u0377": "w\u0323"
  216. # \u0378 reserved
  217. # \u0379 reserved
  218. "\u037A": "\u0328"
  219. "\u037B": "|)"
  220. "\u037C": "(."
  221. "\u037D": ".)"
  222. "\u037E": "?\u0333"
  223. "\u037F": "J"
  224. # \u0380 reserved
  225. # \u0381 reserved
  226. # \u0382 reserved
  227. # \u0383 reserved
  228. "\u0384": "\u0020\u0301"
  229. "\u0385": "\u0020\u0308\u0301"
  230. "\u0386\u0314": "Ha\u0301"
  231. "\u0386": "A\u0301"
  232. "\u0387": ";\u0333"
  233. "\u0388\u0314": "He\u0301"
  234. "\u0388": "E\u0301"
  235. "\u0389\u0314": "E\u0304\u0301"
  236. "\u0389": "E\u0304\u0301"
  237. "\u038A\u0314": "Hi\u0301"
  238. "\u038A": "I\u0301"
  239. # \u038B reserved
  240. "\u038C\u0314": "Ho\u0301"
  241. "\u038C": "O\u0301"
  242. # \u038D reserved
  243. "\u038E\u0314": "Hy\u0301"
  244. "\u038E": "Y\u0301"
  245. "\u038F\u0314": "Ho\u0301"
  246. "\u038F": "O\u0304\u0301"
  247. "\u0390": "i\u0308\u0301"
  248. "\u0391\u0314": "Ha"
  249. "\u0391\u03C5": "Au"
  250. "\u0391": "A"
  251. "\u0392": "B"
  252. "\u0393": "G"
  253. "\u0394": "D"
  254. "\u0395\u0314": "He"
  255. "\u0395\u03C5": "Eu"
  256. "\u0395": "E"
  257. "\u0396": "Z"
  258. "\u0397\u0314": "He\u0304"
  259. "\u0397": "E\u0304"
  260. "\u0398": "Th"
  261. "\u0399\u0314": "Hi"
  262. "\u0399\u03C5": "Iu"
  263. "\u0399": "I"
  264. "\u039A": "K"
  265. "\u039B": "L"
  266. "\u039C\u03C0%": "B"
  267. "\u039C": "M"
  268. # "\u039D\u03C4%": "D\u0332"
  269. "\u039D": "N"
  270. "\u039E": "X"
  271. "\u039F\u0314": "Ho"
  272. "\u039F\u03C5": "Ou"
  273. "\u039F": "O"
  274. "\u03A0": "P"
  275. "\u03A1\u0314": "Rh"
  276. "\u03A1": "R"
  277. # \u03A2 reserved
  278. "\u03A3": "S"
  279. "\u03A4": "T"
  280. "\u03A5\u0314": "Hy"
  281. "\u03A5": "Y"
  282. "\u03A6": "Ph"
  283. "\u03A7": "Ch"
  284. "\u03A8": "Ps"
  285. "\u03A9\u0314": "Ho\u0304"
  286. "\u03A9": "O\u0304"
  287. "\u03AA": "I\u0308"
  288. "\u03AB": "Y\u0308"
  289. "\u03AC\u0314": "ha\u0301"
  290. "\u03AC": "a\u0301"
  291. "\u03ADU": "he\u0301"
  292. "\u03AD": "e\u0301"
  293. "\u03AE\u0314": "he\u0304\u0301"
  294. "\u03AE": "e\u0304\u0301"
  295. "\u03AF\u0314": "hi\u0301"
  296. "\u03AF": "i\u0301"
  297. "\u03B0": "y\u0308\u0301"
  298. "\u03B1\u0314": "ha"
  299. "\u03B1\u03C5": "au"
  300. "\u03B1": "a"
  301. "\u03B2": "b"
  302. "\u03B3\u03B3": "ng"
  303. "%\u03B3\u03BA%": "nk"
  304. "\u03B3\u03BE": "nx"
  305. "\u03B3\u03C7": "nch"
  306. "\u03B3": "g"
  307. "\u03B4": "d"
  308. "\u03B5\u0314": "he"
  309. "\u03B5\u03C5": "eu"
  310. "\u03B5": "e"
  311. "\u03B6": "z"
  312. "\u03B7\u0314": "he\u0304"
  313. "\u03B7": "e\u0304"
  314. "\u03B8": "th"
  315. "\u03B9\u0314": "hi"
  316. "\u03B9\u03C5": "iu"
  317. "\u03B9": "i"
  318. "\u03BA": "k"
  319. "\u03BB": "l"
  320. "\u03BC\u03C0%": "b"
  321. "\u03BC": "m"
  322. # "\u03BD\u03C4%": "d\u0332"
  323. "\u03BD": "n"
  324. "\u03BE": "x"
  325. "\u03BF\u0314": "ho"
  326. "\u03BF\u03C5": "ou"
  327. "\u03BF": "o"
  328. "\u03C0": "p"
  329. "\u03C1\u0314": "rh"
  330. "\u03C1": "r"
  331. "\u03C2": "s"
  332. "\u03C3": "s"
  333. "\u03C4": "t"
  334. "\u03C5\u0314": "hy"
  335. "\u03C5": "y"
  336. "\u03C6": "ph"
  337. "\u03C7": "ch"
  338. "\u03C8": "ps"
  339. "\u03C9\u0314": "ho\u0304"
  340. "\u03C9": "o\u0304"
  341. "\u03CA": "i\u0308"
  342. "\u03CB": "y\u0308"
  343. "\u03CC": "o\u0301"
  344. "\u03CD": "y\u0301"
  345. "\u03CE": "o\u0304\u0301"
  346. "\u03CF": "K\u0326"
  347. "\u03D0": "b\u0333"
  348. "\u03D1": "t\u0333h\u0333"
  349. "\u03D2": "Y\u0333"
  350. "\u03D3": "Y\u0301\u0333"
  351. "\u03D4": "Y\u0308\u0333"
  352. "\u03D5": "p\u0333h\u0333"
  353. "\u03D6": "p\u0333"
  354. "\u03D7": "k\u0326"
  355. "\u03D8": "Q"
  356. "\u03D9": "q"
  357. "\u03DA": "6\u0333"
  358. "\u03DB": "6\u0332"
  359. "\u03DC": "G\u0332"
  360. "\u03DD": "g\u0332"
  361. "\u03DE": "K\u0324"
  362. "\u03DF": "k\u0324"
  363. "\u03E0": "s\uFE20s\uFE21"
  364. "\u03E1": "S\uFE20S\uFE21"
  365. "\u03E2": "S\u030C"
  366. "\u03E3": "s\u030C"
  367. "\u03E4": "F"
  368. "\u03E5": "f"
  369. "\u03E6": "H\u0332"
  370. "\u03E7": "h\u0332"
  371. "\u03E8": "H\u0307"
  372. "\u03E9": "h\u0307"
  373. "\u03EA": "C\u030C"
  374. "\u03EB": "c\u030C"
  375. "\u03EC": "K\u0323y"
  376. "\u03ED": "k\u0323y"
  377. "\u03EE": "T\u0323i"
  378. "\u03EF": "t\u0323i"
  379. "\u03F0": "k\u0332"
  380. "\u03F1": "r\u0332"
  381. "\u03F2": "s\u0332"
  382. "\u03F3": "j"
  383. "\u03F4": "T\u0333H\u0333"
  384. "\u03F5": "e\u0332"
  385. "\u03F6": "e\u0333"
  386. "\u03F7": "S\uFE20H\uFE21"
  387. "\u03F8": "s\uFE20h\uFE21"
  388. "\u03F9": "S\u0332"
  389. "\u03FA": "S\u0323"
  390. "\u03FB": "s\u0323"
  391. "\u03FC": "r\u0333"
  392. "\u03FD": "|)\u0333"
  393. "\u03FE": "(.\u0333"
  394. "\u03FF": ".)\u0333"
  395. roman_to_script:
  396. map:
  397. ".)\u0333": "\u03FF"
  398. ".)": "\u037D"
  399. "?\u0333": "\u037E"
  400. "\"\u0332": "\u201C"
  401. "\"\u0333": "\u201D"
  402. "'\u0332": "\u2018"
  403. "'\u0333": "\u2019"
  404. "(.\u0333": "\u03FE"
  405. "(.": "\u037C"
  406. "|)\u0333": "\u03FD"
  407. "|)": "\u037B"
  408. # Left pointing double angle quotation mark
  409. "\u003C\u003C": "\u00AB"
  410. # Right pointing double angle quotation mark
  411. "\u003E\u003E": "\u00BB"
  412. "6\u0332": "\u03DB"
  413. "6\u0333": "\u03DA"
  414. "Au": "\u0391\u03C5"
  415. "au": "\u03B1\u03C5"
  416. "a\u0301": "\u03AC"
  417. "B": "\u0392"
  418. "b": "\u03B2"
  419. "b\u0333": "\u03D0"
  420. "Ch": "\u03A7"
  421. "ch": "\u03C7"
  422. "c\u030C": "\u03EB"
  423. "D\u0332": "\u039D\u03C4"
  424. "d\u0332": "\u03BD\u03C4"
  425. "D": "\u0394"
  426. "d": "\u03B4"
  427. "Eu": "\u0395\u03C5"
  428. "eu": "\u03B5\u03C5"
  429. "E\u0301": "\u0388"
  430. "e\u0301": "\u03AD"
  431. "E\u0304\u0301": "\u0389\u0314"
  432. "e\u0304\u0301": "\u03AE"
  433. "E\u0304\u0301": "\u0389"
  434. "E\u0304": "\u0397"
  435. "e\u0304": "\u03B7"
  436. "e\u0332": "\u03F5"
  437. "e\u0333": "\u03F6"
  438. "F": "\u03E4"
  439. "f": "\u03E5"
  440. "G\u0332": "\u03DC"
  441. "g\u0332": "\u03DD"
  442. "Ha\u0301": "\u0386\u0314"
  443. "ha\u0301": "\u03AC\u0314"
  444. "Ha": "\u0391\u0314"
  445. "ha": "\u03B1\u0314"
  446. "A": "\u0391"
  447. "a": "\u03B1"
  448. "he\u0304\u0301": "\u03AE\u0314"
  449. "He\u0301": "\u0388\u0314"
  450. "he\u0301": "\u03AD\u0314"
  451. "He\u0304": "\u0397\u0314"
  452. "he\u0304": "\u03B7\u0314"
  453. "He": "\u0395\u0314"
  454. "he": "\u03B5\u0314"
  455. "E": "\u0395"
  456. "e": "\u03B5"
  457. "Hi\u0301": "\u038A\u0314"
  458. "hi\u0301": "\u03AF\u0314"
  459. "Hi": "\u0399\u0314"
  460. "hi": "\u03B9\u0314"
  461. "Ho\u0301": "\u038F\u0314"
  462. "Ho\u0301": "\u038C\u0314"
  463. "ho\u0304": "\u03C9\u0314"
  464. "Ho\u0304": "\u03A9\u0314"
  465. "Ho": "\u039F\u0314"
  466. "ho": "\u03BF\u0314"
  467. "H\u0307": "\u03E8"
  468. "h\u0307": "\u03E9"
  469. "H\u0323": "\u0370"
  470. "h\u0323": "\u0371"
  471. "H\u0332": "\u03E6"
  472. "h\u0332": "\u03E7"
  473. "Hy\u0301": "\u038E\u0314"
  474. "Hy": "\u03A5\u0314"
  475. "hy": "\u03C5\u0314"
  476. "Iu": "\u0399\u03C5"
  477. "iu": "\u03B9\u03C5"
  478. "I\u0301": "\u038A"
  479. "i\u0301": "\u03AF"
  480. "I\u0308": "\u03AA"
  481. "i\u0308\u0301": "\u0390"
  482. "i\u0308": "\u03CA"
  483. "J": "\u037F"
  484. "j": "\u03F3"
  485. "K\u0323y": "\u03EC"
  486. "k\u0323y": "\u03ED"
  487. "K\u0332": "\u03DE"
  488. "k\u0332": "\u03DF"
  489. "K\u0326": "\u03CF"
  490. "k\u0326": "\u03D7"
  491. "k\u0332": "\u03F0"
  492. "L": "\u039B"
  493. "l": "\u03BB"
  494. "M": "\u039C"
  495. "m": "\u03BC"
  496. "nch": "\u03B3\u03C7"
  497. "ng": "\u03B3\u03B3"
  498. "%nk%": "\u03B3\u03BA"
  499. "nx": "\u03B3\u03BE"
  500. "No\u0332": "\u2116"
  501. "N": "\u039D"
  502. "n": "\u03BD"
  503. "K": "\u039A"
  504. "k": "\u03BA"
  505. "G": "\u0393"
  506. "g": "\u03B3"
  507. "Ou": "\u039F\u03C5"
  508. "ou": "\u03BF\u03C5"
  509. "O\u0301": "\u038C"
  510. "o\u0301": "\u03CC"
  511. "O\u0304\u0301": "\u038F"
  512. "o\u0304\u0301": "\u03CE"
  513. "O\u0304": "\u03A9"
  514. "o\u0304": "\u03C9"
  515. "O": "\u039F"
  516. "o": "\u03BF"
  517. "Ph": "\u03A6"
  518. "ph": "\u03C6"
  519. "Ps": "\u03A8"
  520. "ps": "\u03C8"
  521. "p\u0333h\u0333": "\u03D5"
  522. "p\u0333": "\u03D6"
  523. "P": "\u03A0"
  524. "p": "\u03C0"
  525. "Q": "\u03D8"
  526. "q": "\u03D9"
  527. "Rh": "\u03A1\u0314"
  528. "rh": "\u03C1\u0314"
  529. "r\u0332": "\u03F1"
  530. "r\u0333": "\u03FC"
  531. "R": "\u03A1"
  532. "r": "\u03C1"
  533. "S\uFE20\u0332S\uFE21\u0332": "\u0372"
  534. "s\uFE20\u0332s\uFE21\u0332": "\u0373"
  535. "S\uFE20H\uFE21": "\u03F7"
  536. "s\uFE20h\uFE21": "\u03F8"
  537. "S\uFE20S\uFE21": "\u03E1"
  538. "s\uFE20s\uFE21": "\u03E0"
  539. "S\u030C": "\u03E2"
  540. "s\u030C": "\u03E3"
  541. "S\u0323": "\u03FA"
  542. "s\u0323": "\u03FB"
  543. "S\u0332": "\u03F9"
  544. "s\u0332": "\u03F2"
  545. "S": "\u03A3"
  546. "%s": "\u03C2"
  547. "s": "\u03C3"
  548. "T\u0333H\u0333": "\u03F4"
  549. "t\u0333h\u0333": "\u03D1"
  550. "Th": "\u0398"
  551. "th": "\u03B8"
  552. "T\u0323i": "\u03EE"
  553. "t\u0323i": "\u03EF"
  554. "T": "\u03A4"
  555. "t": "\u03C4"
  556. "I": "\u0399"
  557. "i": "\u03B9"
  558. "\u0020\u0301": "\u0384"
  559. "\u0020\u0308\u0301": "\u0385"
  560. ";\u0333": "\u0387"
  561. "\u02B9": "\u0374"
  562. "\u0326": "\u0375"
  563. "\u0328": "\u037A"
  564. "V": "\u0392"
  565. "v": "\u03B2"
  566. "W\u0323": "\u0376"
  567. "w\u0323": "\u0377"
  568. "X": "\u039E"
  569. "x": "\u03BE"
  570. "Y\u0301\u0333": "\u03D3"
  571. "Y\u0301": "\u038E"
  572. "y\u0301": "\u03CD"
  573. "Y\u0308\u0333": "\u03D4"
  574. "y\u0308\u0301": "\u03B0"
  575. "Y\u0308": "\u03AB"
  576. "y\u0308": "\u03CB"
  577. "Y\u0333": "\u03D2"
  578. "Y": "\u03A5"
  579. "y": "\u03C5"
  580. "Z": "\u0396"
  581. "z": "\u03B6"