general: name: Cyrillic base notes: > copied from Russian .cfg file and stripped off language-specific tokens. Russian ignore list has been left here on purpose, assuming it's valid for all child languages. roman_to_script: ignore: - "at head of title" - "colophon" - "date of publication not identified" - "place of publication not identified" - "publisher not identified" # NOTE There is ambiguity about ignoring these # words. Note that the single-character Roman # numerals are not included on purpose. # Ideally the source editors should use the # dedicated U+2160÷U+216F (uppercase Roman # numerals) and/or U+2170÷U+217F (lower case Roman # numerals) ranges to avoid this ambiguity. #- re: "I{2,3}" #- re: "I(V|X)" #- re: "LI{,3}" #- re: "LI?(V|X)" #- re: "L(V|X{1,3})I{,3}" #- re: "LX{1,3}I?V" #- re: "LX{1,3}VI{,3}" #- re: "(V|X{1,3})I{,3}" #- re: "X{1,3}I{,3}" #- re: "X{1,3}I(V|X)" #- re: "X{1,3}VI{,3}" - "II" - "III" - "IV" - "IX" - "LI" - "LII" - "LIII" - "LIV" - "LIX" - "LV" - "LVI" - "LVII" - "LVIII" - "LX" - "LXI" - "LXII" - "LXIII" - "LXIV" - "LXIX" - "LXV" - "LXVI" - "LXVII" - "LXVIII" - "LXX" - "LXXI" - "LXXII" - "LXXIII" - "LXXIV" - "LXXIX" - "LXXV" - "LXXVI" - "LXXVII" - "LXXVIII" - "LXXX" - "LXXXI" - "LXXXII" - "LXXXIII" - "LXXXIV" - "LXXXIX" - "LXXXV" - "LXXXVI" - "LXXXVII" - "LXXXVIII" - "VI" - "VII" - "VIII" - "XI" - "XII" - "XIII" - "XIV" - "XIX" - "XL" - "XLI" - "XLII" - "XLIII" - "XLIV" - "XLIX" - "XLV" - "XLVI" - "XLVII" - "XLVIII" - "XV" - "XVI" - "XVII" - "XVIII" - "XX" - "XXI" - "XXII" - "XXIII" - "XXIV" - "XXIX" - "XXV" - "XXVI" - "XXVII" - "XXVIII" - "XXX" - "XXXI" - "XXXII" - "XXXIII" - "XXXIV" - "XXXIX" - "XXXV" - "XXXVI" - "XXXVII" - "XXXVIII" - "and one other" #- re: "and ([a-z]+ )?others" - "et al." map: "A": "\u0410" "a": "\u0430" "B": "\u0411" "b": "\u0431" "V": "\u0412" "v": "\u0432" "D": "\u0414" "d": "\u0434" "E": "\u0415" "e": "\u0435" # this conversion shouldn't be needed, but does no harm "Z": "\u0417" "z": "\u0437" "I\u0306": "\u0419" # this conversion shouldn't be needed, but does no harm "I\uFE20U\uFE21": "\u042E" # this conversion shouldn't be needed, but does no harm "I\uFE20u\uFE21": "\u042E" "I\uFE20A\uFE21": "\u042F" # this conversion shouldn't be needed, but does no harm "I\uFE20a\uFE21": "\u042F" "i\u0306": "\u0439" "i\uFE20u\uFE21": "\u044E" "i\uFE20a\uFE21": "\u044F" # this conversion shouldn't be needed, but does no harm "KH": "\u0425" "Kh": "\u0425" "K": "\u041A" "kh": "\u0445" "k": "\u043A" "L": "\u041B" "l": "\u043B" "M": "\u041C" "m": "\u043C" "N": "\u041D" "n": "\u043D" "O": "\u041E" "o": "\u043E" "P": "\u041F" "p": "\u043F" "R": "\u0420" "r": "\u0440" # this conversion shouldn't be needed, but does no harm # this conversion shouldn't be needed, but does no harm "SH": "\u0428" "Sh": "\u0428" "S": "\u0421" "sh": "\u0448" "s": "\u0441" # this conversion shouldn't be needed, but does no harm "T": "\u0422" "t": "\u0442" "U": "\u0423" "u": "\u0443" "F": "\u0424" "f": "\u0444" # this conversion shouldn't be needed, but does no harm "CH": "\u0427" "Ch": "\u0427" "ch": "\u0447" # this conversion shouldn't be needed, but does no harm "\uFE20": "" # this conversion shouldn't be needed, but does no harm "\uFE21": "" # this conversion is ambiguous - \u042C is also theoretically possible "\u02B9": "\u044C" script_to_roman: map: "\u0404": "I\uFE20E\uFE21" "\u0407": "I\u0308" "\u0410": "A" "\u0411": "B" "\u0412": "V" "\u0414": "D" "\u0415": "E" "\u0417": "Z" "\u0419": "I\u0306" "\u041A": "K" "\u041B": "L" "\u041C": "M" "\u041D": "N" "\u041E": "O" "\u041F": "P" "\u0420": "R" "\u0421": "S" "\u0422": "T" "\u0423": "U" "\u0424": "F" "\u0425": "Kh" "\u0427": "Ch" "\u0428": "Sh" "\u0429": "Shch" "\u042C": "\u02B9" "\u042E": "I\uFE20U\uFE21" "\u042F": "I\uFE20A\uFE21" "\u0430": "a" "\u0431": "b" "\u0432": "v" "\u0433": "h" "\u0434": "d" "\u0435": "e" "\u0437": "z" "\u0439": "i\u0306" "\u043A": "k" "\u043B": "l" "\u043C": "m" "\u043D": "n" "\u043E": "o" "\u043F": "p" "\u0440": "r" "\u0441": "s" "\u0442": "t" "\u0443": "u" "\u0444": "f" "\u0445": "kh" "\u0447": "ch" "\u0448": "sh" "\u0449": "shch" "\u044C": "\u02B9" "\u044E": "i\uFE20u\uFE21" "\u044F": "i\uFE20a\uFE21"