_ignore_base.yml 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. general:
  2. name: Common ignore list.
  3. roman_to_script:
  4. ignore:
  5. - " "
  6. - "at head of title"
  7. - "colophon"
  8. - "date of publication not identified"
  9. - "place of publication not identified"
  10. - "publisher not identified"
  11. # NOTE There is ambiguity about ignoring these
  12. # words. Note that the single-character Roman
  13. # numerals are not included on purpose.
  14. # Ideally the source editors should use the
  15. # dedicated U+2160÷U+216F (uppercase Roman
  16. # numerals) and/or U+2170÷U+217F (lower case Roman
  17. # numerals) ranges to avoid this ambiguity.
  18. # TODO implement regular expressions for ignore patterns.
  19. #- re: "I{2,3}"
  20. #- re: "I(V|X)"
  21. #- re: "LI{,3}"
  22. #- re: "LI?(V|X)"
  23. #- re: "L(V|X{1,3})I{,3}"
  24. #- re: "LX{1,3}I?V"
  25. #- re: "LX{1,3}VI{,3}"
  26. #- re: "(V|X{1,3})I{,3}"
  27. #- re: "X{1,3}I{,3}"
  28. #- re: "X{1,3}I(V|X)"
  29. #- re: "X{1,3}VI{,3}"
  30. - "II"
  31. - "III"
  32. - "IV"
  33. - "IX"
  34. - "LI"
  35. - "LII"
  36. - "LIII"
  37. - "LIV"
  38. - "LIX"
  39. - "LV"
  40. - "LVI"
  41. - "LVII"
  42. - "LVIII"
  43. - "LX"
  44. - "LXI"
  45. - "LXII"
  46. - "LXIII"
  47. - "LXIV"
  48. - "LXIX"
  49. - "LXV"
  50. - "LXVI"
  51. - "LXVII"
  52. - "LXVIII"
  53. - "LXX"
  54. - "LXXI"
  55. - "LXXII"
  56. - "LXXIII"
  57. - "LXXIV"
  58. - "LXXIX"
  59. - "LXXV"
  60. - "LXXVI"
  61. - "LXXVII"
  62. - "LXXVIII"
  63. - "LXXX"
  64. - "LXXXI"
  65. - "LXXXII"
  66. - "LXXXIII"
  67. - "LXXXIV"
  68. - "LXXXIX"
  69. - "LXXXV"
  70. - "LXXXVI"
  71. - "LXXXVII"
  72. - "LXXXVIII"
  73. - "VI"
  74. - "VII"
  75. - "VIII"
  76. - "XI"
  77. - "XII"
  78. - "XIII"
  79. - "XIV"
  80. - "XIX"
  81. - "XL"
  82. - "XLI"
  83. - "XLII"
  84. - "XLIII"
  85. - "XLIV"
  86. - "XLIX"
  87. - "XLV"
  88. - "XLVI"
  89. - "XLVII"
  90. - "XLVIII"
  91. - "XV"
  92. - "XVI"
  93. - "XVII"
  94. - "XVIII"
  95. - "XX"
  96. - "XXI"
  97. - "XXII"
  98. - "XXIII"
  99. - "XXIV"
  100. - "XXIX"
  101. - "XXV"
  102. - "XXVI"
  103. - "XXVII"
  104. - "XXVIII"
  105. - "XXX"
  106. - "XXXI"
  107. - "XXXII"
  108. - "XXXIII"
  109. - "XXXIV"
  110. - "XXXIX"
  111. - "XXXV"
  112. - "XXXVI"
  113. - "XXXVII"
  114. - "XXXVIII"
  115. - "and one other"
  116. #- re: "and ([a-z0-9]+ )?others"
  117. - "et al."
  118. script_to_roman:
  119. ignore:
  120. - " "