123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394 |
- general:
- name: Armenian
- roman_to_script:
- ignore:
- - "at head of title"
- - "colophon"
- - "date of publication not identified"
- - "place of publication not identified"
- - "publisher not identified"
- # NOTE There is ambiguity about ignoring these
- # words. Note that the single-character Roman
- # numerals are not included on purpose.
- # Ideally the source editors should use the
- # dedicated U+2160÷U+216F (uppercase Roman
- # numerals) and/or U+2170÷U+217F (lower case Roman
- # numerals) ranges to avoid this ambiguity.
- # TODO implement regular expressions for ignore patterns.
- #- re: "I{2,3}"
- #- re: "I(V|X)"
- #- re: "LI{,3}"
- #- re: "LI?(V|X)"
- #- re: "L(V|X{1,3})I{,3}"
- #- re: "LX{1,3}I?V"
- #- re: "LX{1,3}VI{,3}"
- #- re: "(V|X{1,3})I{,3}"
- #- re: "X{1,3}I{,3}"
- #- re: "X{1,3}I(V|X)"
- #- re: "X{1,3}VI{,3}"
- - "II"
- - "III"
- - "IV"
- - "IX"
- - "LI"
- - "LII"
- - "LIII"
- - "LIV"
- - "LIX"
- - "LV"
- - "LVI"
- - "LVII"
- - "LVIII"
- - "LX"
- - "LXI"
- - "LXII"
- - "LXIII"
- - "LXIV"
- - "LXIX"
- - "LXV"
- - "LXVI"
- - "LXVII"
- - "LXVIII"
- - "LXX"
- - "LXXI"
- - "LXXII"
- - "LXXIII"
- - "LXXIV"
- - "LXXIX"
- - "LXXV"
- - "LXXVI"
- - "LXXVII"
- - "LXXVIII"
- - "LXXX"
- - "LXXXI"
- - "LXXXII"
- - "LXXXIII"
- - "LXXXIV"
- - "LXXXIX"
- - "LXXXV"
- - "LXXXVI"
- - "LXXXVII"
- - "LXXXVIII"
- - "VI"
- - "VII"
- - "VIII"
- - "XI"
- - "XII"
- - "XIII"
- - "XIV"
- - "XIX"
- - "XL"
- - "XLI"
- - "XLII"
- - "XLIII"
- - "XLIV"
- - "XLIX"
- - "XLV"
- - "XLVI"
- - "XLVII"
- - "XLVIII"
- - "XV"
- - "XVI"
- - "XVII"
- - "XVIII"
- - "XX"
- - "XXI"
- - "XXII"
- - "XXIII"
- - "XXIV"
- - "XXIX"
- - "XXV"
- - "XXVI"
- - "XXVII"
- - "XXVIII"
- - "XXX"
- - "XXXI"
- - "XXXII"
- - "XXXIII"
- - "XXXIV"
- - "XXXIX"
- - "XXXV"
- - "XXXVI"
- - "XXXVII"
- - "XXXVIII"
- - "and one other"
- #- re: "and ([a-z0-9]+ )?others"
- - "et al."
- map:
- "A": "\u0531"
- "a": "\u0561"
- "B": "\u0532"
- "b": "\u0562"
- # GH combination
- "GH": "\u0542"
- # Gh combination
- "Gh": "\u0542"
- # gh combination
- "gh": "\u0572"
- "G": "\u0533"
- "g": "\u0563"
- # DZ combination
- "DZ": "\u0541\u0566"
- # Dz combination
- "Dz": "\u0541\u0566"
- # dz combination
- "dz": "\u0571\u0566"
- "D": "\u0534"
- "d": "\u0564"
- # E uppercase with macron
- "E\u0304": "\u0537"
- # e lowercase with macron
- "e\u0304": "\u0567"
- # E uppercase with caron
- "E\u030C": "\u0538"
- # e lowercase with caron
- "e\u030C": "\u0568"
- # EW combination
- "EW": "\u0535\u0582"
- # Ew combination
- "Ew": "\u0535\u0582"
- # ew combination
- "ew": "\u0565\u0582"
- # EV combination
- "EV": "\u0565\u057E"
- # Ev combination
- "Ev": "\u0565\u057E"
- # ev combination
- "ev": "\u0565\u057E"
- "E": "\u0535"
- "e": "\u0565"
- # T uppercase with ayn
- "T\u02BB": "\u0539"
- # t lowercase with ayn
- "t\u02BB": "\u0569"
- # ZH combination
- "ZH": "\u053A"
- # Zh combination
- "Zh": "\u053A"
- # zh combination
- "zh": "\u056A"
- "Z": "\u0536"
- "z": "\u0566"
- "I": "\u053B"
- "i": "\u056B"
- "L": "\u053C"
- "l": "\u056C"
- # KH combination
- "KH": "\u053D"
- # Kh combination
- "Kh": "\u053D"
- # kh combination
- "kh": "\u056D"
- # TS + ayn combination
- "TS\u02BB": "\u0551"
- # Ts + ayn combination
- "Ts\u02BB": "\u0551"
- # ts + ayn combination
- "ts\u02BB": "\u0581"
- # TS combination
- "TS": "\u053E"
- # Ts combination
- "Ts": "\u053E"
- # ts combination
- "ts": "\u056E"
- # K + ayn
- "K\u02BB": "\u0554"
- # k + ayn
- "k\u02BB": "\u0584"
- "K": "\u053F"
- "k": "\u056F"
- # CH + ayn combination
- "CH\u02BB": "\u0549"
- # Ch + ayn combination
- "Ch\u02BB": "\u0549"
- # ch + ayn combination
- "ch\u02BB": "\u0579"
- # CH combination
- "CH": "\u0543"
- # Ch combination
- "Ch": "\u0543"
- # ch combination
- "ch": "\u0573"
- "M": "\u0544"
- "m": "\u0574"
- "Y": "\u0545"
- "y": "\u0575"
- "N": "\u0546"
- "n": "\u0576"
- # SH combinatiomn
- "SH": "\u0547"
- # Sh combination
- "Sh": "\u0547"
- #sh combination
- "sh": "\u0577"
- "H": "\u0540"
- "h": "\u0570"
- # O uppercase with combining macron
- "O\u0304": "\u0555"
- # o lowercase with combining macron
- "o\u0304": "\u0585"
- "O": "\u0548"
- "o": "\u0578"
- # P uppercase + ayn
- "P\u02BB": "\u0553"
- # p lowercase + ayn
- "p\u02BB": "\u0583"
- "J": "\u054B"
- "j": "\u057B"
- # R uppercase with combining dot below
- "R\u0323": "\u054C"
- # r lowercase with combining dot below
- "r\u0323": "\u057C"
- "S": "\u054D"
- "s": "\u057D"
- "V": "\u054E"
- "v": "\u057E"
- "T": "\u054F"
- "t": "\u057F"
- "R": "\u0550"
- "r": "\u0580"
- "W": "\u0552"
- "w": "\u0582"
- "U": "\u0548\u0582"
- "u": "\u0578\u0582"
- "F": "\u0556"
- "f": "\u0586"
- "\u02B9": ""
- script_to_roman:
- map:
- "\u053F\u0540": "K\u02B9H"
- "\u053F\u0570": "K\u02B9h"
- "\u056F\u0570": "k\u02B9h"
- "\u0536\u0540": "Z\u02B9H"
- "\u0536\u0570": "Z\u02B9h"
- "\u0566\u0570": "z\u02B9h"
- "\u054F\u054D": "T\u02B9S"
- "\u054F\u057D": "T\u02B9s"
- "\u057F\u057D": "t\u02B9s"
- "\u0534\u0536": "D\u02B9Z"
- "\u0534\u0566": "D\u02B9z"
- "\u0564\u0566": "d\u02B9z"
- "\u0533\u0540": "G\u02B9H"
- "\u0533\u0570": "G\u02B9h"
- "\u0563\u0570": "g\u02B9h"
- "\u054D\u0540": "S\u02B9H"
- "\u054D\u0570": "S\u02B9h"
- "\u057D\u0570": "s\u02B9h"
- "\u0531": "A"
- "\u0561": "a"
- "\u0532": "B"
- "\u0562": "b"
- # Gh combination
- "\u0542": "Gh"
- # gh combination
- "\u0572": "gh"
- "\u0533": "G"
- "\u0563": "g"
- # Dz combination
- "\u0541\u0566": "Dz"
- # dz combination
- "\u0571\u0566": "dz"
- "\u0534": "D"
- "\u0564": "d"
- # E uppercase with macron
- "\u0537": "E\u0304"
- # e lowercase with macron
- "\u0567": "e\u0304"
- # E uppercase with caron
- "\u0538": "E\u030C"
- # e lowercase with caron
- "\u0568": "e\u030C"
- # Ew combination
- "\u0535\u0582": "Ew"
- # ew combination
- "\u0565\u0582": "ew"
- # Ev combination
- "\u0565\u057E": "Ev"
- # ev combination
- "\u0565\u057E": "ev"
- "\u0535": "E"
- "\u0565": "e"
- # T uppercase with ayn
- "\u0539": "T\u02BB"
- # t lowercase with ayn
- "\u0569": "t\u02BB"
- # Zh combination
- "\u053A": "Zh"
- # zh combination
- "\u056A": "zh"
- "\u0536": "Z"
- "\u0566": "z"
- "\u053B": "I"
- "\u056B": "i"
- "\u053C": "L"
- "\u056C": "l"
- # Kh combination
- "\u053D": "Kh"
- # kh combination
- "\u056D": "kh"
- # Ts + ayn combination
- "\u0551": "Ts\u02BB"
- # ts + ayn combination
- "\u0581": "ts\u02BB"
- # Ts combination
- "\u053E": "Ts"
- # ts combination
- "\u056E": "ts"
- # K + ayn
- "\u0554": "K\u02BB"
- # k + ayn
- "\u0584": "k\u02BB"
- "\u053F": "K"
- "\u056F": "k"
- # Ch + ayn combination
- "\u0549": "Ch\u02BB"
- # ch + ayn combination
- "\u0579": "ch\u02BB"
- # Ch combination
- "\u0543": "Ch"
- # ch combination
- "\u0573": "ch"
- "\u0544": "M"
- "\u0574": "m"
- "\u0545": "Y"
- "\u0575": "y"
- "\u0546": "N"
- "\u0576": "n"
- # Sh combination
- "\u0547": "Sh"
- # sh combination
- "\u0577": "sh"
- "\u0540": "H"
- "\u0570": "h"
- # O uppercase with combining macron
- "\u0555": "O\u0304"
- # o lowercase with combining macron
- "\u0585": "o\u0304"
- "\u0548": "O"
- "\u0578": "o"
- # P uppercase + ayn
- "\u0553": "P\u02BB"
- # p lowercase + ayn
- "\u0583": "p\u02BB"
- "\u054B": "J"
- "\u057B": "j"
- # R uppercase with combining dot below
- "\u054C": "R\u0323"
- # r lowercase with combining dot below
- "\u057C": "r\u0323"
- "\u054D": "S"
- "\u057D": "s"
- "\u054E": "V"
- "\u057E": "v"
- "\u054F": "T"
- "\u057F": "t"
- "\u0550": "R"
- "\u0580": "r"
- "\u0552": "W"
- "\u0582": "w"
- "\u0556": "F"
- "\u0586": "f"
|