1234567891011121314151617181920212223242526272829303132333435363738 |
- ---
- general:
- name: Common ignore list.
- roman_to_script:
- ignore:
- - "at head of title"
- - "colophon"
- - "date of publication not identified"
- - "place of publication not identified"
- - "publisher not identified"
- - "and one other"
- - "et al."
- ignore_ptn:
- - "and ([a-z0-9]+ )?others"
- # Incorrectly entered (but frequently found) Roman numerals.
- # NOTE There is ambiguity about ignoring these
- # words. Note that the single-character Roman
- # numerals are not included on purpose.
- # Ideally the source editors should use the
- # dedicated U+2160÷U+216F (uppercase Roman
- # numerals) and/or U+2170÷U+217F (lower case Roman
- # numerals) ranges to avoid this ambiguity.
- - "I{2,3}\\b"
- - "I(V|X)\\b"
- - "LI{,3}\\b"
- - "LI?(V|X)\\b"
- - "L(V|X{1,3})I{,3}\\b"
- - "LX{1,3}I?V\\b"
- - "LX{1,3}VI{,3}\\b"
- - "(V|X{1,3})I{,3}\\b"
- - "X{1,3}I{,3}\\b"
- - "X{1,3}I(V|X)\\b"
- - "X{1,3}VI{,3}\\b"
- # MARC sub-field markers.
- - "[\u2021\u01C2\\$][0-9a-z]\\b"
|