armenian.yml 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. general:
  2. name: Armenian
  3. roman_to_script:
  4. ignore:
  5. - "at head of title"
  6. - "colophon"
  7. - "date of publication not identified"
  8. - "place of publication not identified"
  9. - "publisher not identified"
  10. # NOTE There is ambiguity about ignoring these
  11. # words. Note that the single-character Roman
  12. # numerals are not included on purpose.
  13. # Ideally the source editors should use the
  14. # dedicated U+2160÷U+216F (uppercase Roman
  15. # numerals) and/or U+2170÷U+217F (lower case Roman
  16. # numerals) ranges to avoid this ambiguity.
  17. # TODO implement regular expressions for ignore patterns.
  18. #- re: "I{2,3}"
  19. #- re: "I(V|X)"
  20. #- re: "LI{,3}"
  21. #- re: "LI?(V|X)"
  22. #- re: "L(V|X{1,3})I{,3}"
  23. #- re: "LX{1,3}I?V"
  24. #- re: "LX{1,3}VI{,3}"
  25. #- re: "(V|X{1,3})I{,3}"
  26. #- re: "X{1,3}I{,3}"
  27. #- re: "X{1,3}I(V|X)"
  28. #- re: "X{1,3}VI{,3}"
  29. - "II"
  30. - "III"
  31. - "IV"
  32. - "IX"
  33. - "LI"
  34. - "LII"
  35. - "LIII"
  36. - "LIV"
  37. - "LIX"
  38. - "LV"
  39. - "LVI"
  40. - "LVII"
  41. - "LVIII"
  42. - "LX"
  43. - "LXI"
  44. - "LXII"
  45. - "LXIII"
  46. - "LXIV"
  47. - "LXIX"
  48. - "LXV"
  49. - "LXVI"
  50. - "LXVII"
  51. - "LXVIII"
  52. - "LXX"
  53. - "LXXI"
  54. - "LXXII"
  55. - "LXXIII"
  56. - "LXXIV"
  57. - "LXXIX"
  58. - "LXXV"
  59. - "LXXVI"
  60. - "LXXVII"
  61. - "LXXVIII"
  62. - "LXXX"
  63. - "LXXXI"
  64. - "LXXXII"
  65. - "LXXXIII"
  66. - "LXXXIV"
  67. - "LXXXIX"
  68. - "LXXXV"
  69. - "LXXXVI"
  70. - "LXXXVII"
  71. - "LXXXVIII"
  72. - "VI"
  73. - "VII"
  74. - "VIII"
  75. - "XI"
  76. - "XII"
  77. - "XIII"
  78. - "XIV"
  79. - "XIX"
  80. - "XL"
  81. - "XLI"
  82. - "XLII"
  83. - "XLIII"
  84. - "XLIV"
  85. - "XLIX"
  86. - "XLV"
  87. - "XLVI"
  88. - "XLVII"
  89. - "XLVIII"
  90. - "XV"
  91. - "XVI"
  92. - "XVII"
  93. - "XVIII"
  94. - "XX"
  95. - "XXI"
  96. - "XXII"
  97. - "XXIII"
  98. - "XXIV"
  99. - "XXIX"
  100. - "XXV"
  101. - "XXVI"
  102. - "XXVII"
  103. - "XXVIII"
  104. - "XXX"
  105. - "XXXI"
  106. - "XXXII"
  107. - "XXXIII"
  108. - "XXXIV"
  109. - "XXXIX"
  110. - "XXXV"
  111. - "XXXVI"
  112. - "XXXVII"
  113. - "XXXVIII"
  114. - "and one other"
  115. #- re: "and ([a-z0-9]+ )?others"
  116. - "et al."
  117. map:
  118. "A": "\u0531"
  119. "a": "\u0561"
  120. "B": "\u0532"
  121. "b": "\u0562"
  122. # GH combination
  123. "GH": "\u0542"
  124. # Gh combination
  125. "Gh": "\u0542"
  126. # gh combination
  127. "gh": "\u0572"
  128. "G": "\u0533"
  129. "g": "\u0563"
  130. # DZ combination
  131. "DZ": "\u0541\u0566"
  132. # Dz combination
  133. "Dz": "\u0541\u0566"
  134. # dz combination
  135. "dz": "\u0571\u0566"
  136. "D": "\u0534"
  137. "d": "\u0564"
  138. # E uppercase with macron
  139. "E\u0304": "\u0537"
  140. # e lowercase with macron
  141. "e\u0304": "\u0567"
  142. # E uppercase with caron
  143. "E\u030C": "\u0538"
  144. # e lowercase with caron
  145. "e\u030C": "\u0568"
  146. # EW combination
  147. "EW": "\u0535\u0582"
  148. # Ew combination
  149. "Ew": "\u0535\u0582"
  150. # ew combination
  151. "ew": "\u0565\u0582"
  152. # EV combination
  153. "EV": "\u0565\u057E"
  154. # Ev combination
  155. "Ev": "\u0565\u057E"
  156. # ev combination
  157. "ev": "\u0565\u057E"
  158. "E": "\u0535"
  159. "e": "\u0565"
  160. # T uppercase with ayn
  161. "T\u02BB": "\u0539"
  162. # t lowercase with ayn
  163. "t\u02BB": "\u0569"
  164. # ZH combination
  165. "ZH": "\u053A"
  166. # Zh combination
  167. "Zh": "\u053A"
  168. # zh combination
  169. "zh": "\u056A"
  170. "Z": "\u0536"
  171. "z": "\u0566"
  172. "I": "\u053B"
  173. "i": "\u056B"
  174. "L": "\u053C"
  175. "l": "\u056C"
  176. # KH combination
  177. "KH": "\u053D"
  178. # Kh combination
  179. "Kh": "\u053D"
  180. # kh combination
  181. "kh": "\u056D"
  182. # TS + ayn combination
  183. "TS\u02BB": "\u0551"
  184. # Ts + ayn combination
  185. "Ts\u02BB": "\u0551"
  186. # ts + ayn combination
  187. "ts\u02BB": "\u0581"
  188. # TS combination
  189. "TS": "\u053E"
  190. # Ts combination
  191. "Ts": "\u053E"
  192. # ts combination
  193. "ts": "\u056E"
  194. # K + ayn
  195. "K\u02BB": "\u0554"
  196. # k + ayn
  197. "k\u02BB": "\u0584"
  198. "K": "\u053F"
  199. "k": "\u056F"
  200. # CH + ayn combination
  201. "CH\u02BB": "\u0549"
  202. # Ch + ayn combination
  203. "Ch\u02BB": "\u0549"
  204. # ch + ayn combination
  205. "ch\u02BB": "\u0579"
  206. # CH combination
  207. "CH": "\u0543"
  208. # Ch combination
  209. "Ch": "\u0543"
  210. # ch combination
  211. "ch": "\u0573"
  212. "M": "\u0544"
  213. "m": "\u0574"
  214. "Y": "\u0545"
  215. "y": "\u0575"
  216. "N": "\u0546"
  217. "n": "\u0576"
  218. # SH combinatiomn
  219. "SH": "\u0547"
  220. # Sh combination
  221. "Sh": "\u0547"
  222. #sh combination
  223. "sh": "\u0577"
  224. "H": "\u0540"
  225. "h": "\u0570"
  226. # O uppercase with combining macron
  227. "O\u0304": "\u0555"
  228. # o lowercase with combining macron
  229. "o\u0304": "\u0585"
  230. "O": "\u0548"
  231. "o": "\u0578"
  232. # P uppercase + ayn
  233. "P\u02BB": "\u0553"
  234. # p lowercase + ayn
  235. "p\u02BB": "\u0583"
  236. "J": "\u054B"
  237. "j": "\u057B"
  238. # R uppercase with combining dot below
  239. "R\u0323": "\u054C"
  240. # r lowercase with combining dot below
  241. "r\u0323": "\u057C"
  242. "S": "\u054D"
  243. "s": "\u057D"
  244. "V": "\u054E"
  245. "v": "\u057E"
  246. "T": "\u054F"
  247. "t": "\u057F"
  248. "R": "\u0550"
  249. "r": "\u0580"
  250. "W": "\u0552"
  251. "w": "\u0582"
  252. "U": "\u0548\u0582"
  253. "u": "\u0578\u0582"
  254. "F": "\u0556"
  255. "f": "\u0586"
  256. "\u02B9": ""
  257. script_to_roman:
  258. map:
  259. "\u053F\u0540": "K\u02B9H"
  260. "\u053F\u0570": "K\u02B9h"
  261. "\u056F\u0570": "k\u02B9h"
  262. "\u0536\u0540": "Z\u02B9H"
  263. "\u0536\u0570": "Z\u02B9h"
  264. "\u0566\u0570": "z\u02B9h"
  265. "\u054F\u054D": "T\u02B9S"
  266. "\u054F\u057D": "T\u02B9s"
  267. "\u057F\u057D": "t\u02B9s"
  268. "\u0534\u0536": "D\u02B9Z"
  269. "\u0534\u0566": "D\u02B9z"
  270. "\u0564\u0566": "d\u02B9z"
  271. "\u0533\u0540": "G\u02B9H"
  272. "\u0533\u0570": "G\u02B9h"
  273. "\u0563\u0570": "g\u02B9h"
  274. "\u054D\u0540": "S\u02B9H"
  275. "\u054D\u0570": "S\u02B9h"
  276. "\u057D\u0570": "s\u02B9h"
  277. "\u0531": "A"
  278. "\u0561": "a"
  279. "\u0532": "B"
  280. "\u0562": "b"
  281. # Gh combination
  282. "\u0542": "Gh"
  283. # gh combination
  284. "\u0572": "gh"
  285. "\u0533": "G"
  286. "\u0563": "g"
  287. # Dz combination
  288. "\u0541\u0566": "Dz"
  289. # dz combination
  290. "\u0571\u0566": "dz"
  291. "\u0534": "D"
  292. "\u0564": "d"
  293. # E uppercase with macron
  294. "\u0537": "E\u0304"
  295. # e lowercase with macron
  296. "\u0567": "e\u0304"
  297. # E uppercase with caron
  298. "\u0538": "E\u030C"
  299. # e lowercase with caron
  300. "\u0568": "e\u030C"
  301. # Ew combination
  302. "\u0535\u0582": "Ew"
  303. # ew combination
  304. "\u0565\u0582": "ew"
  305. # Ev combination
  306. "\u0565\u057E": "Ev"
  307. # ev combination
  308. "\u0565\u057E": "ev"
  309. "\u0535": "E"
  310. "\u0565": "e"
  311. # T uppercase with ayn
  312. "\u0539": "T\u02BB"
  313. # t lowercase with ayn
  314. "\u0569": "t\u02BB"
  315. # Zh combination
  316. "\u053A": "Zh"
  317. # zh combination
  318. "\u056A": "zh"
  319. "\u0536": "Z"
  320. "\u0566": "z"
  321. "\u053B": "I"
  322. "\u056B": "i"
  323. "\u053C": "L"
  324. "\u056C": "l"
  325. # Kh combination
  326. "\u053D": "Kh"
  327. # kh combination
  328. "\u056D": "kh"
  329. # Ts + ayn combination
  330. "\u0551": "Ts\u02BB"
  331. # ts + ayn combination
  332. "\u0581": "ts\u02BB"
  333. # Ts combination
  334. "\u053E": "Ts"
  335. # ts combination
  336. "\u056E": "ts"
  337. # K + ayn
  338. "\u0554": "K\u02BB"
  339. # k + ayn
  340. "\u0584": "k\u02BB"
  341. "\u053F": "K"
  342. "\u056F": "k"
  343. # Ch + ayn combination
  344. "\u0549": "Ch\u02BB"
  345. # ch + ayn combination
  346. "\u0579": "ch\u02BB"
  347. # Ch combination
  348. "\u0543": "Ch"
  349. # ch combination
  350. "\u0573": "ch"
  351. "\u0544": "M"
  352. "\u0574": "m"
  353. "\u0545": "Y"
  354. "\u0575": "y"
  355. "\u0546": "N"
  356. "\u0576": "n"
  357. # Sh combination
  358. "\u0547": "Sh"
  359. # sh combination
  360. "\u0577": "sh"
  361. "\u0540": "H"
  362. "\u0570": "h"
  363. # O uppercase with combining macron
  364. "\u0555": "O\u0304"
  365. # o lowercase with combining macron
  366. "\u0585": "o\u0304"
  367. "\u0548": "O"
  368. "\u0578": "o"
  369. # P uppercase + ayn
  370. "\u0553": "P\u02BB"
  371. # p lowercase + ayn
  372. "\u0583": "p\u02BB"
  373. "\u054B": "J"
  374. "\u057B": "j"
  375. # R uppercase with combining dot below
  376. "\u054C": "R\u0323"
  377. # r lowercase with combining dot below
  378. "\u057C": "r\u0323"
  379. "\u054D": "S"
  380. "\u057D": "s"
  381. "\u054E": "V"
  382. "\u057E": "v"
  383. "\u054F": "T"
  384. "\u057F": "t"
  385. "\u0550": "R"
  386. "\u0580": "r"
  387. "\u0552": "W"
  388. "\u0582": "w"
  389. "\u0556": "F"
  390. "\u0586": "f"