_ignore_base.yml 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. general:
  2. name: Common ignore list.
  3. roman_to_script:
  4. # Ignore regular expression patterns.
  5. ignore_ptn:
  6. # Roman numerals.
  7. # NOTE There is ambiguity about ignoring these
  8. # words. Note that the single-character Roman
  9. # numerals are not included on purpose.
  10. # Ideally the source editors should use the
  11. # dedicated U+2160÷U+216F (uppercase Roman
  12. # numerals) and/or U+2170÷U+217F (lower case Roman
  13. # numerals) ranges to avoid this ambiguity.
  14. - "\\<I{2,3}\\>"
  15. - "\\<I(V|X)\\>"
  16. - "\\<LI{,3}\\>"
  17. - "\\<LI?(V|X)\\>"
  18. - "\\<L(V|X{1,3})I{,3}\\>"
  19. - "\\<LX{1,3}I?V\\>"
  20. - "\\<LX{1,3}VI{,3}\\>"
  21. - "\\<(V|X{1,3})I{,3}\\>"
  22. - "\\<X{1,3}I{,3}\\>"
  23. - "\\<X{1,3}I(V|X)\\>"
  24. - "\\<X{1,3}VI{,3}\\>"
  25. - "\\<and ([a-z]+ )?others\\>"
  26. ignore:
  27. - "at head of title"
  28. - "colophon"
  29. - "date of publication not identified"
  30. - "place of publication not identified"
  31. - "publisher not identified"
  32. #- "II"
  33. #- "III"
  34. #- "IV"
  35. #- "IX"
  36. #- "LI"
  37. #- "LII"
  38. #- "LIII"
  39. #- "LIV"
  40. #- "LIX"
  41. #- "LV"
  42. #- "LVI"
  43. #- "LVII"
  44. #- "LVIII"
  45. #- "LX"
  46. #- "LXI"
  47. #- "LXII"
  48. #- "LXIII"
  49. #- "LXIV"
  50. #- "LXIX"
  51. #- "LXV"
  52. #- "LXVI"
  53. #- "LXVII"
  54. #- "LXVIII"
  55. #- "LXX"
  56. #- "LXXI"
  57. #- "LXXII"
  58. #- "LXXIII"
  59. #- "LXXIV"
  60. #- "LXXIX"
  61. #- "LXXV"
  62. #- "LXXVI"
  63. #- "LXXVII"
  64. #- "LXXVIII"
  65. #- "LXXX"
  66. #- "LXXXI"
  67. #- "LXXXII"
  68. #- "LXXXIII"
  69. #- "LXXXIV"
  70. #- "LXXXIX"
  71. #- "LXXXV"
  72. #- "LXXXVI"
  73. #- "LXXXVII"
  74. #- "LXXXVIII"
  75. #- "VI"
  76. #- "VII"
  77. #- "VIII"
  78. #- "XI"
  79. #- "XII"
  80. #- "XIII"
  81. #- "XIV"
  82. #- "XIX"
  83. #- "XL"
  84. #- "XLI"
  85. #- "XLII"
  86. #- "XLIII"
  87. #- "XLIV"
  88. #- "XLIX"
  89. #- "XLV"
  90. #- "XLVI"
  91. #- "XLVII"
  92. #- "XLVIII"
  93. #- "XV"
  94. #- "XVI"
  95. #- "XVII"
  96. #- "XVIII"
  97. #- "XX"
  98. #- "XXI"
  99. #- "XXII"
  100. #- "XXIII"
  101. #- "XXIV"
  102. #- "XXIX"
  103. #- "XXV"
  104. #- "XXVI"
  105. #- "XXVII"
  106. #- "XXVIII"
  107. #- "XXX"
  108. #- "XXXI"
  109. #- "XXXII"
  110. #- "XXXIII"
  111. #- "XXXIV"
  112. #- "XXXIX"
  113. #- "XXXV"
  114. #- "XXXVI"
  115. #- "XXXVII"
  116. #- "XXXVIII"
  117. - "and one other"
  118. - "et al."