_ignore_base.yml 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. general:
  2. name: Common ignore list.
  3. roman_to_script:
  4. ignore:
  5. - "at head of title"
  6. - "colophon"
  7. - "date of publication not identified"
  8. - "place of publication not identified"
  9. - "publisher not identified"
  10. # NOTE There is ambiguity about ignoring these
  11. # words. Note that the single-character Roman
  12. # numerals are not included on purpose.
  13. # Ideally the source editors should use the
  14. # dedicated U+2160÷U+216F (uppercase Roman
  15. # numerals) and/or U+2170÷U+217F (lower case Roman
  16. # numerals) ranges to avoid this ambiguity.
  17. # TODO implement regular expressions for ignore patterns.
  18. #- re: "I{2,3}"
  19. #- re: "I(V|X)"
  20. #- re: "LI{,3}"
  21. #- re: "LI?(V|X)"
  22. #- re: "L(V|X{1,3})I{,3}"
  23. #- re: "LX{1,3}I?V"
  24. #- re: "LX{1,3}VI{,3}"
  25. #- re: "(V|X{1,3})I{,3}"
  26. #- re: "X{1,3}I{,3}"
  27. #- re: "X{1,3}I(V|X)"
  28. #- re: "X{1,3}VI{,3}"
  29. - "II"
  30. - "III"
  31. - "IV"
  32. - "IX"
  33. - "LI"
  34. - "LII"
  35. - "LIII"
  36. - "LIV"
  37. - "LIX"
  38. - "LV"
  39. - "LVI"
  40. - "LVII"
  41. - "LVIII"
  42. - "LX"
  43. - "LXI"
  44. - "LXII"
  45. - "LXIII"
  46. - "LXIV"
  47. - "LXIX"
  48. - "LXV"
  49. - "LXVI"
  50. - "LXVII"
  51. - "LXVIII"
  52. - "LXX"
  53. - "LXXI"
  54. - "LXXII"
  55. - "LXXIII"
  56. - "LXXIV"
  57. - "LXXIX"
  58. - "LXXV"
  59. - "LXXVI"
  60. - "LXXVII"
  61. - "LXXVIII"
  62. - "LXXX"
  63. - "LXXXI"
  64. - "LXXXII"
  65. - "LXXXIII"
  66. - "LXXXIV"
  67. - "LXXXIX"
  68. - "LXXXV"
  69. - "LXXXVI"
  70. - "LXXXVII"
  71. - "LXXXVIII"
  72. - "VI"
  73. - "VII"
  74. - "VIII"
  75. - "XI"
  76. - "XII"
  77. - "XIII"
  78. - "XIV"
  79. - "XIX"
  80. - "XL"
  81. - "XLI"
  82. - "XLII"
  83. - "XLIII"
  84. - "XLIV"
  85. - "XLIX"
  86. - "XLV"
  87. - "XLVI"
  88. - "XLVII"
  89. - "XLVIII"
  90. - "XV"
  91. - "XVI"
  92. - "XVII"
  93. - "XVIII"
  94. - "XX"
  95. - "XXI"
  96. - "XXII"
  97. - "XXIII"
  98. - "XXIV"
  99. - "XXIX"
  100. - "XXV"
  101. - "XXVI"
  102. - "XXVII"
  103. - "XXVIII"
  104. - "XXX"
  105. - "XXXI"
  106. - "XXXII"
  107. - "XXXIII"
  108. - "XXXIV"
  109. - "XXXIX"
  110. - "XXXV"
  111. - "XXXVI"
  112. - "XXXVII"
  113. - "XXXVIII"
  114. - "and one other"
  115. #- re: "and ([a-z0-9]+ )?others"
  116. - "et al."