123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245 |
- general:
- name: Cyrillic base
- notes: >
- copied from Russian .cfg file and stripped
- off language-specific tokens. Russian ignore list
- has been left here on purpose, assuming it's valid
- for all child languages.
- roman_to_script:
- ignore:
- - "at head of title"
- - "colophon"
- - "date of publication not identified"
- - "place of publication not identified"
- - "publisher not identified"
- # NOTE There is ambiguity about ignoring these
- # words. Note that the single-character Roman
- # numerals are not included on purpose.
- # Ideally the source editors should use the
- # dedicated U+2160÷U+216F (uppercase Roman
- # numerals) and/or U+2170÷U+217F (lower case Roman
- # numerals) ranges to avoid this ambiguity.
- #- re: "I{2,3}"
- #- re: "I(V|X)"
- #- re: "LI{,3}"
- #- re: "LI?(V|X)"
- #- re: "L(V|X{1,3})I{,3}"
- #- re: "LX{1,3}I?V"
- #- re: "LX{1,3}VI{,3}"
- #- re: "(V|X{1,3})I{,3}"
- #- re: "X{1,3}I{,3}"
- #- re: "X{1,3}I(V|X)"
- #- re: "X{1,3}VI{,3}"
- - "II"
- - "III"
- - "IV"
- - "IX"
- - "LI"
- - "LII"
- - "LIII"
- - "LIV"
- - "LIX"
- - "LV"
- - "LVI"
- - "LVII"
- - "LVIII"
- - "LX"
- - "LXI"
- - "LXII"
- - "LXIII"
- - "LXIV"
- - "LXIX"
- - "LXV"
- - "LXVI"
- - "LXVII"
- - "LXVIII"
- - "LXX"
- - "LXXI"
- - "LXXII"
- - "LXXIII"
- - "LXXIV"
- - "LXXIX"
- - "LXXV"
- - "LXXVI"
- - "LXXVII"
- - "LXXVIII"
- - "LXXX"
- - "LXXXI"
- - "LXXXII"
- - "LXXXIII"
- - "LXXXIV"
- - "LXXXIX"
- - "LXXXV"
- - "LXXXVI"
- - "LXXXVII"
- - "LXXXVIII"
- - "VI"
- - "VII"
- - "VIII"
- - "XI"
- - "XII"
- - "XIII"
- - "XIV"
- - "XIX"
- - "XL"
- - "XLI"
- - "XLII"
- - "XLIII"
- - "XLIV"
- - "XLIX"
- - "XLV"
- - "XLVI"
- - "XLVII"
- - "XLVIII"
- - "XV"
- - "XVI"
- - "XVII"
- - "XVIII"
- - "XX"
- - "XXI"
- - "XXII"
- - "XXIII"
- - "XXIV"
- - "XXIX"
- - "XXV"
- - "XXVI"
- - "XXVII"
- - "XXVIII"
- - "XXX"
- - "XXXI"
- - "XXXII"
- - "XXXIII"
- - "XXXIV"
- - "XXXIX"
- - "XXXV"
- - "XXXVI"
- - "XXXVII"
- - "XXXVIII"
- - "and one other"
- #- re: "and ([a-z]+ )?others"
- - "et al."
- map:
- "A": "\u0410"
- "a": "\u0430"
- "B": "\u0411"
- "b": "\u0431"
- "V": "\u0412"
- "v": "\u0432"
- "D": "\u0414"
- "d": "\u0434"
- "E": "\u0415"
- "e": "\u0435"
- # this conversion shouldn't be needed, but does no harm
- "Z": "\u0417"
- "z": "\u0437"
- "I\u0306": "\u0419"
- # this conversion shouldn't be needed, but does no harm
- "I\uFE20U\uFE21": "\u042E"
- # this conversion shouldn't be needed, but does no harm
- "I\uFE20u\uFE21": "\u042E"
- "I\uFE20A\uFE21": "\u042F"
- # this conversion shouldn't be needed, but does no harm
- "I\uFE20a\uFE21": "\u042F"
- "i\u0306": "\u0439"
- "i\uFE20u\uFE21": "\u044E"
- "i\uFE20a\uFE21": "\u044F"
- # this conversion shouldn't be needed, but does no harm
- "KH": "\u0425"
- "Kh": "\u0425"
- "K": "\u041A"
- "kh": "\u0445"
- "k": "\u043A"
- "L": "\u041B"
- "l": "\u043B"
- "M": "\u041C"
- "m": "\u043C"
- "N": "\u041D"
- "n": "\u043D"
- "O": "\u041E"
- "o": "\u043E"
- "P": "\u041F"
- "p": "\u043F"
- "R": "\u0420"
- "r": "\u0440"
- # this conversion shouldn't be needed, but does no harm
- # this conversion shouldn't be needed, but does no harm
- "SH": "\u0428"
- "Sh": "\u0428"
- "S": "\u0421"
- "sh": "\u0448"
- "s": "\u0441"
- # this conversion shouldn't be needed, but does no harm
- "T": "\u0422"
- "t": "\u0442"
- "U": "\u0423"
- "u": "\u0443"
- "F": "\u0424"
- "f": "\u0444"
- # this conversion shouldn't be needed, but does no harm
- "CH": "\u0427"
- "Ch": "\u0427"
- "ch": "\u0447"
- # this conversion shouldn't be needed, but does no harm
- "\uFE20": ""
- # this conversion shouldn't be needed, but does no harm
- "\uFE21": ""
- # this conversion is ambiguous - \u042C is also theoretically possible
- "\u02B9": "\u044C"
- script_to_roman:
- map:
- "\u0404": "I\uFE20E\uFE21"
- "\u0407": "I\u0308"
- "\u0410": "A"
- "\u0411": "B"
- "\u0412": "V"
- "\u0414": "D"
- "\u0415": "E"
- "\u0417": "Z"
- "\u0419": "I\u0306"
- "\u041A": "K"
- "\u041B": "L"
- "\u041C": "M"
- "\u041D": "N"
- "\u041E": "O"
- "\u041F": "P"
- "\u0420": "R"
- "\u0421": "S"
- "\u0422": "T"
- "\u0423": "U"
- "\u0424": "F"
- "\u0425": "Kh"
- "\u0427": "Ch"
- "\u0428": "Sh"
- "\u0429": "Shch"
- "\u042C": "\u02B9"
- "\u042E": "I\uFE20U\uFE21"
- "\u042F": "I\uFE20A\uFE21"
- "\u0430": "a"
- "\u0431": "b"
- "\u0432": "v"
- "\u0433": "h"
- "\u0434": "d"
- "\u0435": "e"
- "\u0437": "z"
- "\u0439": "i\u0306"
- "\u043A": "k"
- "\u043B": "l"
- "\u043C": "m"
- "\u043D": "n"
- "\u043E": "o"
- "\u043F": "p"
- "\u0440": "r"
- "\u0441": "s"
- "\u0442": "t"
- "\u0443": "u"
- "\u0444": "f"
- "\u0445": "kh"
- "\u0447": "ch"
- "\u0448": "sh"
- "\u0449": "shch"
- "\u044C": "\u02B9"
- "\u044E": "i\uFE20u\uFE21"
- "\u044F": "i\uFE20a\uFE21"
|