general: name: Common ignore list. roman_to_script: ignore: - "at head of title" - "colophon" - "date of publication not identified" - "place of publication not identified" - "publisher not identified" # NOTE There is ambiguity about ignoring these # words. Note that the single-character Roman # numerals are not included on purpose. # Ideally the source editors should use the # dedicated U+2160÷U+216F (uppercase Roman # numerals) and/or U+2170÷U+217F (lower case Roman # numerals) ranges to avoid this ambiguity. # TODO implement regular expressions for ignore patterns. #- re: "I{2,3}" #- re: "I(V|X)" #- re: "LI{,3}" #- re: "LI?(V|X)" #- re: "L(V|X{1,3})I{,3}" #- re: "LX{1,3}I?V" #- re: "LX{1,3}VI{,3}" #- re: "(V|X{1,3})I{,3}" #- re: "X{1,3}I{,3}" #- re: "X{1,3}I(V|X)" #- re: "X{1,3}VI{,3}" - "II" - "III" - "IV" - "IX" - "LI" - "LII" - "LIII" - "LIV" - "LIX" - "LV" - "LVI" - "LVII" - "LVIII" - "LX" - "LXI" - "LXII" - "LXIII" - "LXIV" - "LXIX" - "LXV" - "LXVI" - "LXVII" - "LXVIII" - "LXX" - "LXXI" - "LXXII" - "LXXIII" - "LXXIV" - "LXXIX" - "LXXV" - "LXXVI" - "LXXVII" - "LXXVIII" - "LXXX" - "LXXXI" - "LXXXII" - "LXXXIII" - "LXXXIV" - "LXXXIX" - "LXXXV" - "LXXXVI" - "LXXXVII" - "LXXXVIII" - "VI" - "VII" - "VIII" - "XI" - "XII" - "XIII" - "XIV" - "XIX" - "XL" - "XLI" - "XLII" - "XLIII" - "XLIV" - "XLIX" - "XLV" - "XLVI" - "XLVII" - "XLVIII" - "XV" - "XVI" - "XVII" - "XVIII" - "XX" - "XXI" - "XXII" - "XXIII" - "XXIV" - "XXIX" - "XXV" - "XXVI" - "XXVII" - "XXVIII" - "XXX" - "XXXI" - "XXXII" - "XXXIII" - "XXXIV" - "XXXIX" - "XXXV" - "XXXVI" - "XXXVII" - "XXXVIII" - "and one other" #- re: "and ([a-z0-9]+ )?others" - "et al."