_ignore_base.yml 1.0 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. ---
  2. general:
  3. name: Common ignore list.
  4. roman_to_script:
  5. ignore:
  6. - "at head of title"
  7. - "colophon"
  8. - "date of publication not identified"
  9. - "place of publication not identified"
  10. - "publisher not identified"
  11. - "and one other"
  12. - "et al."
  13. ignore_ptn:
  14. - "and ([a-z0-9]+ )?others"
  15. # Incorrectly entered (but frequently found) Roman numerals.
  16. # NOTE There is ambiguity about ignoring these
  17. # words. Note that the single-character Roman
  18. # numerals are not included on purpose.
  19. # Ideally the source editors should use the
  20. # dedicated U+2160÷U+216F (uppercase Roman
  21. # numerals) and/or U+2170÷U+217F (lower case Roman
  22. # numerals) ranges to avoid this ambiguity.
  23. - "I{2,3}\\b"
  24. - "I(V|X)\\b"
  25. - "LI{,3}\\b"
  26. - "LI?(V|X)\\b"
  27. - "L(V|X{1,3})I{,3}\\b"
  28. - "LX{1,3}I?V\\b"
  29. - "LX{1,3}VI{,3}\\b"
  30. - "(V|X{1,3})I{,3}\\b"
  31. - "X{1,3}I{,3}\\b"
  32. - "X{1,3}I(V|X)\\b"
  33. - "X{1,3}VI{,3}\\b"
  34. # MARC sub-field markers.
  35. - "[\u2021\u01C2\\$][0-9a-z]\\b"