Forráskód Böngészése

Merge branch 'main' into test

scossu 2 hónapja
szülő
commit
2b9b5be220
68 módosított fájl, 1897 hozzáadás és 1497 törlés
  1. 3 3
      doc/supported_scripts.md
  2. 36 0
      scriptshifter/hooks/general/__init__.py
  3. 4 12
      scriptshifter/hooks/hebrew/dicta_api.py
  4. 18 15
      scriptshifter/hooks/korean/romanizer.py
  5. 1 9
      scriptshifter/hooks/yiddish_/__init__.py
  6. 0 133
      scriptshifter/tables/data/_cyrillic_base.yml
  7. 23 16
      scriptshifter/tables/data/_ignore_base.yml
  8. 1 1
      scriptshifter/tables/data/abkhaz_cyrillic.yml
  9. 1 1
      scriptshifter/tables/data/altai_cyrillic.yml
  10. 7 4
      scriptshifter/tables/data/arabic.yml
  11. 1 1
      scriptshifter/tables/data/azerbaijani_cyrillic.yml
  12. 1 1
      scriptshifter/tables/data/bashkir_cyrillic.yml
  13. 1 1
      scriptshifter/tables/data/belarusian.yml
  14. 18 38
      scriptshifter/tables/data/bulgarian.yml
  15. 1 2
      scriptshifter/tables/data/buriat_cyrillic.yml
  16. 1 1
      scriptshifter/tables/data/chukchi_cyrillic.yml
  17. 1 1
      scriptshifter/tables/data/church_slavonic.yml
  18. 1 1
      scriptshifter/tables/data/chuvash_cyrillic.yml
  19. 705 700
      scriptshifter/tables/data/cyrillic_generic.yml
  20. 1 1
      scriptshifter/tables/data/dungan_cyrillic.yml
  21. 1 1
      scriptshifter/tables/data/even-evenki_cyrillic.yml
  22. 1 1
      scriptshifter/tables/data/gagauz_cyrillic.yml
  23. 207 209
      scriptshifter/tables/data/greek_classical.yml
  24. 18 0
      scriptshifter/tables/data/greek_modern.yml
  25. 1 1
      scriptshifter/tables/data/kalmyk_cyrillic.yml
  26. 1 1
      scriptshifter/tables/data/kara-kalpak_cyrillic.yml
  27. 1 1
      scriptshifter/tables/data/karachay-balkar_cyrillic.yml
  28. 1 1
      scriptshifter/tables/data/karelian_cyrillic.yml
  29. 1 1
      scriptshifter/tables/data/kazakh_cyrillic.yml
  30. 1 1
      scriptshifter/tables/data/khakass_cyrillic.yml
  31. 1 1
      scriptshifter/tables/data/khanty_cyrillic.yml
  32. 1 1
      scriptshifter/tables/data/komi_cyrillic.yml
  33. 1 1
      scriptshifter/tables/data/koryak_cyrillic.yml
  34. 1 1
      scriptshifter/tables/data/kyrgyz_cyrillic.yml
  35. 1 1
      scriptshifter/tables/data/lithuanian_cyrillic.yml
  36. 62 16
      scriptshifter/tables/data/macedonian.yml
  37. 191 0
      scriptshifter/tables/data/manchu.yml
  38. 1 1
      scriptshifter/tables/data/mansi_cyrillic.yml
  39. 1 1
      scriptshifter/tables/data/mari_cyrillic.yml
  40. 1 1
      scriptshifter/tables/data/moldovan_cyrillic.yml
  41. 1 1
      scriptshifter/tables/data/mongolian_cyrillic.yml
  42. 1 1
      scriptshifter/tables/data/mordvin_cyrillic.yml
  43. 1 1
      scriptshifter/tables/data/nenets_cyrillic.yml
  44. 1 1
      scriptshifter/tables/data/ossetic_cyrillic.yml
  45. 1 1
      scriptshifter/tables/data/romani_cyrillic.yml
  46. 1 1
      scriptshifter/tables/data/russian.yml
  47. 1 1
      scriptshifter/tables/data/serbian.yml
  48. 1 1
      scriptshifter/tables/data/shor_cyrillic.yml
  49. 1 1
      scriptshifter/tables/data/syriac_cyrillic.yml
  50. 1 1
      scriptshifter/tables/data/tajik_cyrillic.yml
  51. 1 1
      scriptshifter/tables/data/tatar-kryashen_cyrillic.yml
  52. 1 1
      scriptshifter/tables/data/tatar_cyrillic.yml
  53. 275 0
      scriptshifter/tables/data/tod_mongolian.yml
  54. 1 1
      scriptshifter/tables/data/turkmen_cyrillic.yml
  55. 1 1
      scriptshifter/tables/data/tuvinian_cyrillic.yml
  56. 1 1
      scriptshifter/tables/data/udmurt_cyrillic.yml
  57. 1 1
      scriptshifter/tables/data/uighur_cyrillic.yml
  58. 1 1
      scriptshifter/tables/data/ukrainian.yml
  59. 1 1
      scriptshifter/tables/data/uzbek_cyrillic.yml
  60. 1 1
      scriptshifter/tables/data/yakut_cyrillic.yml
  61. 1 1
      scriptshifter/tables/data/yuit_cyrillic.yml
  62. 33 14
      scriptshifter/tables/index.yml
  63. 0 9
      scriptshifter/tools.py
  64. 10 3
      scriptshifter/trans.py
  65. 176 186
      test/data/script_samples/sea.csv
  66. 26 5
      test/data/script_samples/tibetan.csv
  67. 30 19
      test/integration.py
  68. 8 59
      test/unittest/test02_transliteration.py

+ 3 - 3
doc/supported_scripts.md

@@ -63,7 +63,7 @@ third-party library.
 |  [macedonian](../scriptshifter/tables/data/macedonian.yml)  |  Macedonian  |  Y  |  Y  |  stable  |  
 |  [malayalam](../scriptshifter/tables/data/malayalam.yml)  |  Malayalam  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [mansi_cyrillic](../scriptshifter/tables/data/mansi_cyrillic.yml)  |  Mansi (Cyrillic)  |  Y  |  Y  |  stable  |  
-|  [marathi](../scriptshifter/tables/data/marathi.yml)  |  Marathi  |  Y  |  Y  |    |  s-to-r lacks capitalization
+|  [marathi](../scriptshifter/tables/data/marathi_devanagari.yml)  |  Marathi  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [mari_cyrillic](../scriptshifter/tables/data/mari_cyrillic.yml)  |  Mari (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [moldovan_cyrillic](../scriptshifter/tables/data/moldovan_cyrillic.yml)  |  Moldovan (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [mongolian_cyrillic](../scriptshifter/tables/data/mongolian_cyrillic.yml)  |  Mongolian (Cyrillic)  |  Y  |  Y  |  stable  |  
@@ -86,13 +86,13 @@ third-party library.
 |  [sanskrit_devanagari](../scriptshifter/tables/data/sanskrit_devanagari.yml)  |  Sanskrit (Devanagari)  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [serbian](../scriptshifter/tables/data/serbian.yml)  |  Serbian  |  Y  |  Y  |  stable  |  
 |  [shor_cyrillic](../scriptshifter/tables/data/shor_cyrillic.yml)  |  Shor (Cyrillic)  |  Y  |  Y  |  stable  |  
-|  [sinhalese_sinhala](../scriptshifter/tables/data/sinhalese_sinhala.yml)  |  Sinhalese (Sinhala)  |  Y  |  Y  |    |  s-to-r lacks capitalization
+|  [sinhalese_sinhala](../scriptshifter/tables/data/sinhalese.yml)  |  Sinhalese (Sinhala)  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [syriac_cyrillic](../scriptshifter/tables/data/syriac_cyrillic.yml)  |  Syriac (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [tajik_cyrillic](../scriptshifter/tables/data/tajik_cyrillic.yml)  |  Tajik (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [tamil](../scriptshifter/tables/data/tamil.yml)  |  Tamil  |  Y  |  Y  |  beta  |  
 |  [tamil_brahmi](../scriptshifter/tables/data/tamil_brahmi.yml)  |  Tamil Brahmi  |  Y  |  Y  |    |  
 |  [tamil_extended](../scriptshifter/tables/data/tamil_extended.yml)  |  Tamil (extended)  |  Y  |  Y  |    |  
-|  [tatar-kryashen_cyrillic](../scriptshifter/tables/data/tatar.yml)  |  Tatar-Kryashen (Cyrillic)  |  Y  |  Y  |  stable  |  
+|  [tatar-kryashen_cyrillic](../scriptshifter/tables/data/tatar-kryashen_cyrillic.yml)  |  Tatar-Kryashen (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [tatar_cyrillic](../scriptshifter/tables/data/tatar_cyrillic.yml)  |  Tatar (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [telugu](../scriptshifter/tables/data/telugu.yml)  |  Telugu  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [thai](../scriptshifter/tables/data/thai.yml)  |  Thai  |  Y  |  Y  |    |  

+ 36 - 0
scriptshifter/hooks/general/__init__.py

@@ -27,6 +27,24 @@ NORM8_RE = compile(r"([.,;:\(\[\{\)\]}])\s+([.,;:\(\[\{\)\]}])")
 logger = getLogger(__name__)
 
 
+def capitalize_pre_assembly(ctx):
+    """
+    Capitalize a not-yet-assembled result list according to user options.
+    """
+    ctx.dest_ls = _capitalize(ctx.dest_ls, ctx.options.get("capitalize"))
+
+
+def capitalize_post_assembly(ctx):
+    """
+    Capitalize an already assembled result string according to user options.
+    """
+    dest_ls = ctx.dest.split(" ")
+
+    dest_ls = _capitalize(dest_ls, ctx.options.get("capitalize"))
+
+    return " ".join(dest_ls)
+
+
 def normalize_spacing_post_assembly(ctx):
     """
     Remove duplicate and unwanted whitespace around punctuation.
@@ -53,3 +71,21 @@ def normalize_spacing_post_assembly(ctx):
     # norm = NORM8_RE.sub(r"\1\2", norm)
 
     return norm
+
+
+def _capitalize(src, which):
+    """
+    capitalize first word only or all words.
+
+    NOTE: this function is only used for capitalizing hook-generated
+    transliterations, which are not normally processed. Double cap rules are
+    not applicable here.
+    """
+    if which == "first":
+        src[0] = src[0].capitalize()
+        return src
+
+    if which == "all":
+        return [tk[0].upper() + tk[1:] for tk in src]
+
+    return src

+ 4 - 12
scriptshifter/hooks/hebrew/dicta_api.py

@@ -3,7 +3,7 @@ from os import environ
 from requests import post
 
 from scriptshifter.exceptions import BREAK, UpstreamError
-from scriptshifter.tools import capitalize
+from scriptshifter.hooks.general import capitalize_post_assembly
 
 EP = environ.get("TXL_DICTA_EP")
 DEFAULT_GENRE = "rabbinic"
@@ -25,16 +25,8 @@ def s2r_post_config(ctx):
     except Exception:
         raise UpstreamError("Error received from Dicta service.")
 
-    rom = rsp.json().get("transliteration")
-
-    if rom:
-        if ctx.options["capitalize"] == "all":
-            rom = capitalize(rom)
-        elif ctx.options["capitalize"] == "first":
-            rom = rom[0].upper() + rom[1:]
-    else:
-        ctx.warnings.append("Upstream service returned empty result.")
-
-    ctx.dest = rom
+    ctx.dest = rsp.json().get("transliteration")
+    if ctx.dest:
+        ctx.dest = capitalize_post_assembly(ctx)
 
     return BREAK

+ 18 - 15
scriptshifter/hooks/korean/romanizer.py

@@ -28,7 +28,7 @@ from os import path
 
 from scriptshifter.exceptions import BREAK
 from scriptshifter.hooks.korean import KCONF
-from scriptshifter.tools import capitalize
+from scriptshifter.hooks.general import capitalize_post_assembly
 
 
 PWD = path.dirname(path.realpath(__file__))
@@ -62,6 +62,12 @@ def s2r_nonames_post_config(ctx):
     ctx.dest, ctx.warnings = _romanize_nonames(
             ctx.src, ctx.options)
 
+    if ctx.dest:
+        # FKR042: Capitalize all first letters
+        # FKR043: Capitalize the first letter
+        logger.debug(f"Before capitalization: {ctx.dest}")
+        ctx.dest = capitalize_post_assembly(ctx)
+
     return BREAK
 
 
@@ -74,6 +80,12 @@ def s2r_names_post_config(ctx):
     """
     ctx.dest, ctx.warnings = _romanize_names(ctx.src, ctx.options)
 
+    if ctx.dest:
+        # FKR042: Capitalize all first letters
+        # FKR043: Capitalize the first letter
+        logger.debug(f"Before capitalization: {ctx.dest}")
+        ctx.dest = capitalize_post_assembly(ctx)
+
     return BREAK
 
 
@@ -105,19 +117,9 @@ def _romanize_nonames(src, options):
 
     rom = _romanize_oclc_auto(kor)
 
-    logger.debug(f"Before capitalization: {rom}")
-    # FKR042: Capitalize all first letters
-    if options["capitalize"] == "all":
-        rom = capitalize(rom)
-    # FKR043: Capitalize the first letter
-    elif options["capitalize"] == "first":
-        rom = rom[0].upper() + rom[1:]
-
     # FKR044: Ambiguities
     ambi = re.sub("[,.\";: ]+", " ", rom)
 
-    # TODO Decide what to do with these. There is no facility for outputting
-    # warnings or notes to the user yet.
     warnings = []
     _fkr_log(45)
     for exp, warn in KCONF["fkr045"].items():
@@ -308,10 +310,11 @@ def _kor_corp_name_rom(src):
         src = src[:-4]
         yu = "R"
 
-    rom_tok = []
-    for tok in src.split(" "):
-        rom_tok.append(_romanize_oclc_auto(tok))
-    rom = capitalize(" ".join(rom_tok))
+    rom_tok = [
+        _romanize_oclc_auto(tok)
+        for tok in src.split(" ")
+    ]
+    rom = " ".join(rom_tok)
 
     if chu == "L":
         rom = "(Chu) " + rom

+ 1 - 9
scriptshifter/hooks/yiddish_/__init__.py

@@ -16,24 +16,16 @@ external package name.
 from yiddish import detransliterate, transliterate
 
 from scriptshifter.exceptions import BREAK
-from scriptshifter.tools import capitalize
 
 
 def s2r_post_config(ctx):
     """
     Script to Roman.
     """
-    rom = transliterate(
+    ctx.dest = transliterate(
             ctx.src, loc=True,
             loshn_koydesh=ctx.options.get("loshn_koydesh"))
 
-    if ctx.options["capitalize"] == "all":
-        rom = capitalize(rom)
-    elif ctx.options["capitalize"] == "first":
-        rom = rom[0].upper() + rom[1:]
-
-    ctx.dest = rom
-
     return BREAK
 
 

+ 0 - 133
scriptshifter/tables/data/_cyrillic_base.yml

@@ -1,133 +0,0 @@
-general:
-  name: Cyrillic base
-  parents:
-    - _ignore_base
-  notes: >
-    copied from Russian .cfg file and stripped
-    off language-specific tokens. Russian ignore list
-    has been left here on purpose, assuming it's valid
-    for all child languages.
-
-roman_to_script:
-  map:
-    "A": "\u0410"
-    "a": "\u0430"
-    "B": "\u0411"
-    "b": "\u0431"
-    "V": "\u0412"
-    "v": "\u0432"
-    "D": "\u0414"
-    "d": "\u0434"
-    "E": "\u0415"
-    "e": "\u0435"
-    # this conversion shouldn't be needed, but does no harm
-    "Z": "\u0417"
-    "z": "\u0437"
-    "I\u0306": "\u0419"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20U\uFE21": "\u042E"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20u\uFE21": "\u042E"
-    "I\uFE20A\uFE21": "\u042F"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20a\uFE21": "\u042F"
-    "i\u0306": "\u0439"
-    "i\uFE20u\uFE21": "\u044E"
-    "i\uFE20a\uFE21": "\u044F"
-    # this conversion shouldn't be needed, but does no harm
-    "KH": "\u0425"
-    "Kh": "\u0425"
-    "K": "\u041A"
-    "kh": "\u0445"
-    "k": "\u043A"
-    "L": "\u041B"
-    "l": "\u043B"
-    "M": "\u041C"
-    "m": "\u043C"
-    "N": "\u041D"
-    "n": "\u043D"
-    "O": "\u041E"
-    "o": "\u043E"
-    "P": "\u041F"
-    "p": "\u043F"
-    "R": "\u0420"
-    "r": "\u0440"
-    # this conversion shouldn't be needed, but does no harm
-    # this conversion shouldn't be needed, but does no harm
-    "SH": "\u0428"
-    "Sh": "\u0428"
-    "S": "\u0421"
-    "sh": "\u0448"
-    "s": "\u0441"
-    # this conversion shouldn't be needed, but does no harm
-    "T": "\u0422"
-    "t": "\u0442"
-    "U": "\u0423"
-    "u": "\u0443"
-    "F": "\u0424"
-    "f": "\u0444"
-    # this conversion shouldn't be needed, but does no harm
-    "CH": "\u0427"
-    "Ch": "\u0427"
-    "ch": "\u0447"
-    # this conversion shouldn't be needed, but does no harm
-    "\uFE20": ""
-    # this conversion shouldn't be needed, but does no harm
-    "\uFE21": ""
-    # this conversion is ambiguous - \u042C is also theoretically possible
-    "\u02B9": "\u044C"
-
-script_to_roman:
-  map:
-    "\u0404": "I\uFE20E\uFE21"
-    "\u0407": "I\u0308"
-    "\u0410": "A"
-    "\u0411": "B"
-    "\u0412": "V"
-    "\u0414": "D"
-    "\u0415": "E"
-    "\u0417": "Z"
-    "\u0419": "I\u0306"
-    "\u041A": "K"
-    "\u041B": "L"
-    "\u041C": "M"
-    "\u041D": "N"
-    "\u041E": "O"
-    "\u041F": "P"
-    "\u0420": "R"
-    "\u0421": "S"
-    "\u0422": "T"
-    "\u0423": "U"
-    "\u0424": "F"
-    "\u0425": "Kh"
-    "\u0427": "Ch"
-    "\u0428": "Sh"
-    "\u0429": "Shch"
-    "\u042C": "\u02B9"
-    "\u042E": "I\uFE20U\uFE21"
-    "\u042F": "I\uFE20A\uFE21"
-    "\u0430": "a"
-    "\u0431": "b"
-    "\u0432": "v"
-    "\u0434": "d"
-    "\u0435": "e"
-    "\u0437": "z"
-    "\u0439": "i\u0306"
-    "\u043A": "k"
-    "\u043B": "l"
-    "\u043C": "m"
-    "\u043D": "n"
-    "\u043E": "o"
-    "\u043F": "p"
-    "\u0440": "r"
-    "\u0441": "s"
-    "\u0442": "t"
-    "\u0443": "u"
-    "\u0444": "f"
-    "\u0445": "kh"
-    "\u0447": "ch"
-    "\u0448": "sh"
-    "\u0449": "shch"
-    "\u044C": "\u02B9"
-    "\u044E": "i\uFE20u\uFE21"
-    "\u044F": "i\uFE20a\uFE21"

+ 23 - 16
scriptshifter/tables/data/_ignore_base.yml

@@ -9,6 +9,12 @@ roman_to_script:
     - "date of publication not identified"
     - "place of publication not identified"
     - "publisher not identified"
+    - "and one other"
+    - "et al."
+  ignore_ptn:
+    - "and ([a-z0-9]+ )?others"
+
+    # Incorrectly entered (but frequently found) Roman numerals.
     # NOTE There is ambiguity about ignoring these
     # words. Note that the single-character Roman
     # numerals are not included on purpose.
@@ -16,23 +22,24 @@ roman_to_script:
     # dedicated U+2160÷U+216F (uppercase Roman
     # numerals) and/or U+2170÷U+217F (lower case Roman
     # numerals) ranges to avoid this ambiguity.
-    - "and one other"
-    - "et al."
-  ignore_ptn:
-    - "and ([a-z0-9]+ )?others"
-    - "I{2,3}"
-    - "I(V|X)"
-    - "LI{,3}"
-    - "LI?(V|X)"
-    - "L(V|X{1,3})I{,3}"
-    - "LX{1,3}I?V"
-    - "LX{1,3}VI{,3}"
-    - "(V|X{1,3})I{,3}"
-    - "X{1,3}I{,3}"
-    - "X{1,3}I(V|X)"
-    - "X{1,3}VI{,3}"
+    - "\\bI{2,3}\\b"
+    - "\\bI(V|X)\\b"
+    - "\\bLI{,3}\\b"
+    - "\\bLI?(V|X)\\b"
+    - "\\bL(V|X{1,3})I{,3}\\b"
+    - "\\bLX{1,3}I?V\\b"
+    - "\\bLX{1,3}VI{,3}\\b"
+    - "\\b(V|X{1,3})I{,3}\\b"
+    - "\\bX{1,3}I{,3}\\b"
+    - "\\bX{1,3}I(V|X)\\b"
+    - "\\bX{1,3}VI{,3}\\b"
+
+    # MARC sub-field markers.
+    - "\\b[\u2021$][0-9a-z]\\b"
 
 script_to_roman:
   ignore:
     - " "
-
+  ignore_ptn:
+    # MARC sub-field markers.
+    - "\\b[\u2021$][0-9a-z]\\b"

+ 1 - 1
scriptshifter/tables/data/abkhaz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Abkhaz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/altai_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Altai (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 7 - 4
scriptshifter/tables/data/arabic.yml

@@ -15,11 +15,12 @@ general:
 roman_to_script:
   map:
 
-    # Original table by David Bucknum
-    # Last updated 25 January 2019
+    # Original table by David Bucknum, 5 April 2010
+    # Updated, 25 January 2019
     # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
     # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
     # Scripts Conceptually"
+    # Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter
 
     # Punctuation marks:
     "*": "\u066D"
@@ -134,11 +135,11 @@ roman_to_script:
     "fi\u0304-": "\u0641\u064A"
     "ka-": "\u0643"
 
-    # Vowels and vowel/consonant combinations
+    # Vowels and vowel/consonant combinations - ta-marbutah at end of word
     "ah%": "\u0629"
     "at%": "\u0629"
 
-    # tanwin
+    # tanwin at end of word
     "an%": "\u0627"
 
     # ayn-alif combo
@@ -148,6 +149,8 @@ roman_to_script:
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
 
+    "\u02BBI\u0304Y": "\u0639\u064A"
+    "\u02BBi\u0304y": "\u0639\u064A"
     "\u02BBI\u0304": "\u0639\u064A"
     "\u02BBi\u0304": "\u0639\u064A"
 

+ 1 - 1
scriptshifter/tables/data/azerbaijani_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Azerbaijani (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/bashkir_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Bashkir (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/belarusian.yml

@@ -1,7 +1,7 @@
 general:
   name: Belarusian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 18 - 38
scriptshifter/tables/data/bulgarian.yml

@@ -1,58 +1,38 @@
 general:
   name: Bulgarian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:
-    "G": "\u0413"
-    "g": "\u0433"
-    # this conversion shouldn't be needed, but does no harm
-    "ZH": "\u0416"
-    "Zh": "\u0416"
-    "zh": "\u0436"
-    "I\uFE20E\uFE21": "\u0462"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20e\uFE21": "\u0462"
-    # this conversion shouldn't be needed, but does no harm
-    # this conversion shouldn't be needed, but does no harm
-    "I": "\u0418"
-    "i\uFE20e\uFE21": "\u0463"
-    "i": "\u0438"
-    # this conversion shouldn't be needed, but does no harm
     "SHT": "\u0429"
     "Sht": "\u0429"
     "sht": "\u0449"
-    "T\uFE20S\uFE21": "\u0426"
-    # this conversion shouldn't be needed, but does no harm
-    "T\uFE20s\uFE21": "\u0426"
-    "t\uFE20s\uFE21": "\u0446"
-    "U\u0310": "\u046A"
+    "U\u0306": "\u042A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u016C": "\u042A"
     "u\u0306": "\u044A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u016D": "\u044A"
+    "U\u0310": "\u046A"
     "u\u0310": "\u046B"
     # this conversion is ambiguous - \u042A is also theoretically possible
     "\u02BA": "\u044A"
+    # upper case hard sign is unlikely to occur
+    "\u02BA\u0332": "\u042A"
 
 script_to_roman:
   map:
-    "\u044C": ""
-    "\u042C": ""
-    "\u044A": ""
-    "\u042A%": ""  # Final
-    "\u042A": "u\u0306"
-    "\u0413": "G"
-    "\u0433": "g"
-    "\u0416": "Zh"
-    "\u0436": "zh"
-    "\u0462": "I\uFE20E\uFE21"
-    "\u0418": "I"
-    "\u0463": "i\uFE20e\uFE21"
-    "\u0438": "i"
     "\u0429": "Sht"
+    "\u042A": "U\u0306"
+    # Capital letter hard sign at the end of a word (rare)
+    "\u042A%": "\u02BA\u0332"
+    "\u042C": "\u02B9\u0332"
     "\u0449": "sht"
-    "\u0426": "T\uFE20S\uFE21"
-    "\u0446": "t\uFE20s\uFE21"
+    "\u044A": "u\u0306"
+    # Small letter hard sign at the end of a word (rare)
+    "\u044A%": "\u02BA"
+    "\u044C": "\u02B9"
     "\u046A": "U\u0310"
     "\u046B": "u\u0310"
-    "\u042A": "u\u016C"
-    "\u044A": "u\u016D"
+    

+ 1 - 2
scriptshifter/tables/data/buriat_cyrillic.yml

@@ -1,8 +1,7 @@
 general:
   name: Buriat (Cyrillic)
   parents:
-    - _cyrillic_base
-    - _ignore_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/chukchi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Chukchi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/church_slavonic.yml

@@ -1,7 +1,7 @@
 general:
   name: Church Slavonic
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/chuvash_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Chuvash (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 705 - 700
scriptshifter/tables/data/asian_cyrillic.yml → scriptshifter/tables/data/cyrillic_generic.yml

@@ -1,700 +1,705 @@
-general:
-  name: Asian (Cyrillic)
-  parents:
-    - _cyrillic_base
-
-# COMMON COMBINING CHARACTERS (always follow a base letter):
-# combining grave \u0300
-# combining acute \u0301
-# combining circumflex \u0302
-# combining tilde \u0303
-# combining macron \u0304
-# combining breve \u0306
-# combining dot above \u0307
-# combining diaeresis \u0308
-# combining ring above \u030A
-# combining double acute \u030B
-# combining caron (hachek) \u030C
-# combining candrabindu \u0310
-# combining dot below \u0323
-# combining dieresis below \u0324
-# combining comma below \u0326 (Romanian, Latvian, Livonian)
-# combining cedilla \u0327 (French, Turkish, Azeri)
-# combining ogonek (hook) \u0328 (Polish, Lithuanian)
-# combining low line \u0332
-# combining double low line \u0333
-# combining left ligature \uFE20 (Cyrillic transliteration)
-# combining right ligature \uFE21 (Cyrillic transliteration)
-# soft sign/prime (spacing) \u02B9(Cyrillic transliteration)
-# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration)
-# ayn(spacing) \u02BB (Semitic and Caucasian languages)
-# alif (spacing) \u02BC (Semitic languages)
-# middle dot (space) \u00B7) (Catalan)
-
-roman_to_script:
-  map:
-    "A\uFE20E\uFE21": "\u04D4"
-    "A\uFE20e\uFE21": "\u04D4"
-    "a\uFE20e\uFE21": "\u04D5"
-    "A\u0306\u0323": "\u04D0"
-    "a\u0306\u0323": "\u04D1"
-    "\u00C6": "\u04D4"
-    "\u00E6": "\u04D5"
-    "A\u0306": "\u04D8"
-    "a\u0306": "\u04D9"
-    "A\u030B": "\u04DA"
-    "a\u030B": "\u04DB"
-    "A\u0308": "\u04D2"
-    "a\u0308": "\u04D3"
-    "A\u0310": "\u0518"
-    "a\u0310": "\u0519"
-    
-    "B": "\u0411"
-    "b": "\u0431"
-    
-    "C\u0301h\u0301": "\u04BE"
-    "c\u0301h\u0301": "\u04BF"
-    "C\u0301h": "\u04BC"
-    "c\u0301h": "\u04BD"
-    "C\u0301": "\u040B"
-    "c\u0301": "\u045B"
-    "C\u0308h": "\u04F4"
-    "c\u0308h": "\u04F5"
-    "C\u0323h": "\u04CB"
-    "c\u0323h": "\u04CC"
-    
-    "D\u0301": "\u0502"
-    "d\u0301": "\u0503"
-    "D\u0307": "\u0500"
-    "d\u0307": "\u0501"
-    "D\uFE20c\uFE21h": "\u052C"
-    "d\uFE20c\uFE21h": "\u052D"
-    "D\uFE20z\uFE21h": "\u052A"
-    "d\uFE20z\uFE21h": "\u052B"
-    "D\uFE20Z\uFE21": "\u04E0"
-    "d\uFE20z\uFE21": "\u04E1"
-    "Dz\u030C": "\u040F"
-    "dz\u030C": "\u045F"
-    "D": "\u0414"
-    "d": "\u0434"
-    
-    "E\u0300": "\u0400"
-    "e\u0300": "\u0450"
-    "E\u0304": "\u0404"
-    "e\u0304": "\u0454"
-    "E\u0306": "\u04D6"
-    "e\u0306": "\u04D7"
-    "E\u0306\u0323": "\u048C"
-    "e\u0306\u0323": "\u048D"
-    "E\u0307": "\u042D"
-    "e\u0307": "\u044D"
-    "E\u0308\u0323": "\u04EC"
-    "e\u0308\u0323": "\u04ED"
-    "E\u0308": "\u0401"
-    "e\u0308": "\u0451"
-    "E\u0328": "\u0466"
-    "e\u0328": "\u0467"
-    
-    "F\u0307": "\u0472"
-    "f\u0307": "\u0473"
-    "F": "\u0424"
-    "f": "\u0444"
-    
-    "Gh\u0327": "\u04FA"
-    "gh\u0327": "\u04FB"
-    "Gh": "\u0492"
-    "gh": "\u0493"
-    "G\u0301": "\u0403"
-    "g\u0301": "\u0453"
-    "G\u0306": "\u0490"
-    "g\u0306": "\u0491"
-    "G\u0307": "\u049C"
-    "g\u0307": "\u049D"
-    "G\u0323": "\u04F6"
-    "g\u0323": "\u04F7"
-    "G\u0327": "\u0494"
-    "g\u0327": "\u0495"
-    
-    "H\u0304": "\u04FE"
-    "h\u0304": "\u04FF"
-    "H\u0327": "\u04FC"
-    "h\u0327": "\u04FD"
-    "H\u0307": "\u04BA"
-    "h\u0307": "\u04BB"
-    "H\u0308": "\u04C0"
-    "h\u0308": "\u04CF"
-    
-    "I\u0300": "\u040D"
-    "i\u0300": "\u045D"
-    "I\u0304\u0323": "\u04E2"
-    "i\u0304\u0323": "\u04E3"
-    "I\u0304": "\u0406"
-    "i\u0304": "\u0456"
-    "I\u0306\u0323": "\u048A"
-    "i\u0306\u0323": "\u048B"
-    "I\u0306": "\u0419"
-    "i\u0306": "\u0439"
-    "I\u0308\u0323": "\u04E4"
-    "i\u0308\u0323": "\u04E5"
-    "I\u0308": "\u0407"
-    "i\u0308": "\u0457"
-    "I\u0310": "\u0408"
-    "i\u0310": "\u0458"
-    
-    "I\uFE20A\uFE21": "\u042F"
-    "i\uFE20a\uFE21": "\u044F"
-    "A": "\u0410"
-    "a": "\u0430"
-    
-    "I\uFE20E\uFE21\u0304": "\u0464"
-    "i\uFE20e\uFE21\u0304": "\u0465"
-    "I\uFE20E\uFE21\u0328": "\u0468"
-    "i\uFE20e\uFE21\u0328": "\u0469"
-    "I\uFE20E\uFE21": "\u0462"
-    "i\uFE20e\uFE21": "\u0463"
-    "E": "\u0415"
-    "e": "\u0435"
-    
-    "I\uFE20O\uFE21\u0328": "\u046C"
-    "i\uFE20o\uFE21\u0328": "\u046D"
-    "I\uFE20U\uFE21": "\u042E"
-    "i\uFE20u\uFE21": "\u044E"
-    "I": "\u0418"
-    "i": "\u0438"
-    
-    "J\u0304": "\u04B8"
-    "j\u0304": "\u04B9"
-    "J\u0306": "\u04C1"
-    "j\u0306": "\u04C2"
-    "J\u0302": "\u04B6"
-    "j\u0302": "\u04B7"
-    "J\u0308": "\u04DC"
-    "j\u0308": "\u04DD"
-    
-    "K\u0300": "\u051E"
-    "k\u0300": "\u051F"
-    "K\u0301": "\u040C"
-    "k\u0301": "\u045C"
-    "K\uFE20H\uFE21": "\u04B2"
-    "k\uFE20h\uFE21": "\u04B3"
-    "Kh": "\u0425"
-    "kh": "\u0445"
-    "K\uFE20S\uFE21": "\u046E"
-    "k\uFE20s\uFE21": "\u046F"
-    "K": "\u041A"
-    "k": "\u043A"
-    
-    "Lj": "\u0409"
-    "lj": "\u0459"
-    "Lkh\u0307": "\u0514"
-    "lkh\u0307": "\u0515"
-    "L\u0301": "\u0508"
-    "l\u0301": "\u0509"
-    "L\u0321": "\u04C5"
-    "l\u0326": "\u04C6"
-    "L\u0323": "\u052E"
-    "l\u0323": "\u052F"
-    "L\u0327": "\u0512"
-    "l\u0327": "\u0513"
-    "L\u0324": "\u0520"
-    "l\u0324": "\u0521"
-    "L": "\u041B"
-    "l": "\u043B"
-    
-    "M\u0323": "\u04CD"
-    "m\u0323": "\u04CE"
-    "M": "\u041C"
-    "m": "\u043C"
-    
-    "Nj": "\u040A"
-    "nj": "\u045A"
-    "N\u0301G\u0300": "\u04A4"
-    "n\u0301g\u0300": "\u04A5"
-    "N\u0301": "\u050A"
-    "n\u0301": "\u050B"
-    "N\u0326": "\u0528"
-    "n\u0326": "\u0529"
-    "N\u0327": "\u0522"
-    "n\u0327": "\u0523"
-    "N\uFE20\u0323G\uFE21": "\u04C9"
-    "n\uFE20\u0323g\uFE21": "\u04CA"
-    "N\uFE20\u0327G\uFE21": "\u04C7"
-    "n\uFE20\u0327g\uFE21": "\u04C8"
-    "N\uFE20G\uFE21": "\u04A2"
-    "n\uFE20g\uFE21": "\u04A3"
-    "No\u0332": "\u2116"
-    "N": "\u041D"
-    "n": "\u043D"
-    
-    "G": "\u0413"
-    "g": "\u0433"
-    
-    "J": "\u0496"
-    "j": "\u0497"
-    
-    "O\u0303": "\u047C"
-    "o\u0303": "\u047D"
-    "O\u0304\u0323": "\u047A"
-    "o\u0304\u0323": "\u047B"
-    "O\u0304\uFE20T\uFE21": "\u047E"
-    "o\u0304\uFE20t\uFE21": "\u047F"
-    "O\u0304\u0324": "\u0460"
-    "o\u0304\u0324": "\u0461"
-    "O\u0304": "\u04EA"
-    "o\u0304": "\u04EB"
-    "O\u0307": "\u04E8"
-    "o\u0307": "\u04E9"
-    "O\u0308": "\u04E6"
-    "o\u0308": "\u04E7"
-    "O\u0328": "\u046A"
-    "o\u0328": "\u046B"
-    "O\uFE20u\uFE21": "\u0478"
-    "o\uFE20u\uFE21": "\u0479"
-    "O": "\u041E"
-    "o": "\u043E"
-    
-    "Ph": "\u04A6"
-    "ph": "\u04A7"
-    "P\u0323": "\u0524"
-    "p\u0323": "\u0525"
-    "P\uFE20S\uFE21": "\u0470"
-    "p\uFE20s\uFE21": "\u0471"
-    "P": "\u041F"
-    "p": "\u043F"
-    
-    "Q\u0300": "\u04A0"
-    "q\u0300": "\u04A1"
-    "Q\u0302": "\u0480"
-    "q\u0302": "\u0481"
-    "Q\u0304": "\u049E"
-    "q\u0304": "\u049F"
-    "Q\u0307": "\u04C3"
-    "q\u0307": "\u04C4"
-    "Q\u0308": "\u051A"
-    "q\u0308": "\u051B"
-    "Q": "\u049A"
-    "q": "\u049B"
-    
-    "Rkh\u0307": "\u0516"
-    "rkh\u0307": "\u0517"
-    "R\u0306": "\u048E"
-    "r\u0306": "\u048F"
-    "R": "\u0420"
-    "r": "\u0440"
-    
-    "Shch": "\u0429"
-    "shch": "\u0449"
-    "Sh\u0323": "\u0526"
-    "sh\u0323": "\u0527"
-    "Sh": "\u0428"
-    "sh": "\u0448"
-    "S\u0301": "\u050C"
-    "s\u0301": "\u050D"
-    "S\u0307": "\u0405"
-    "s\u0307": "\u0455"
-    
-    "Ch": "\u0427"
-    "ch": "\u0447"
-    "C": "\u0426"
-    "c": "\u0446"
-    
-    "Th": "\u04AA"
-    "th": "\u04AB"
-    "T\u0301": "\u050E"
-    "t\u0301": "\u050F"
-    "T\u0327": "\u04AC"
-    "t\u0327": "\u04AD"
-    "T\uFE20H\uFE21": "\u0498"
-    "t\uFE20h\uFE21": "\u0499"
-    "T\uFE20S\uFE21": "\u0426"
-    "t\uFE20s\uFE21": "\u0446"
-    "T\uFE20S\uFE21\u0307": "\u04B4"
-    "t\uFE20s\uFE21\u0307": "\u04B5"
-    
-    "S": "\u0421"
-    "s": "\u0441"
-    
-    "T": "\u0422"
-    "t": "\u0442"
-    
-    "U\u0302": "\u04B0"
-    "u\u0302": "\u04B1"
-    "U\u0304": "\u04EE"
-    "u\u0304": "\u04EF"
-    "U\u0306": "\u040E"
-    "u\u0306": "\u045E"
-    "U\u0307": "\u04AE"
-    "u\u0307": "\u04AF"
-    "U\u0308": "\u04F0"
-    "u\u0308": "\u04F1"
-    "U\u030B": "\u04F2"
-    "u\u030B": "\u04F3"
-    "U": "\u0423"
-    "u": "\u0443"
-    
-    "V\u0307": "\u0474"
-    "v\u0307": "\u0475"
-    "V\u0308": "\u0476"
-    "v\u0308": "\u0477"
-    "V": "\u0412"
-    "v": "\u0432"
-    
-    "W\u0308": "\u051C"
-    "w\u0308": "\u051D"
-    "W": "\u04A8"
-    "w": "\u04A9"
-    
-    "X": "\u0058"
-    "x": "\u0078"
-    
-    "Y\u0307": "\u0474"
-    "y\u0307": "\u0475"
-    "Y\u0308": "\u04F8"
-    "y\u0308": "\u04F9"
-    "Y": "\u042B"
-    "y": "\u044B"
-    
-    "Zh": "\u0416"
-    "zh": "\u0436"
-    "Z\u0301": "\u0504"
-    "z\u0301": "\u0505"
-    "Z\u0307": "\u0510"
-    "z\u0307": "\u0511"
-    "Z\u0308": "\u04DE"
-    "z\u0308": "\u04DF"
-    "Z\u0327": "\u0506"
-    "z\u0327": "\u0507"
-    "Z": "\u0417"
-    "z": "\u0437"
-    
-    "H": "\u0413"
-    "h": "\u0433"
-    
-    "\u0110": "\u0402"
-    "\u0111": "\u0452"
-    "\u02B9\u0333": "\u042C"
-    "\u02B9": "\u044C"
-    "\u02BA\u0333": "\u042A"
-    "\u02BA": "\u044A"
-    "\u0303": "\u0487"
-    "\u0311": "\u0484"
-    "\u0313": "\u0486"
-    "\u0314": "\u0485"
-    "\u007E": "\u0483"
-    "(|)": "\u0482"
-    "(^)": "\u0488"
-    "(')": "\u0489"
-    
-    "\u003C\u003C": "\u00AB"
-    "\u003E\u003E": "\u00BB"
-
-script_to_roman:
-  map:
-    
-    "\u00AB": "\""
-    "\u00BB": "\""
-    "\u2116": "No\u0332"
-    "\u0400": "E\u0300"
-    "\u0401": "E\u0308"
-    "\u0402": "\u0110"
-    "\u0403": "G\u0301"
-    "\u0404": "E\u0304"
-    "\u0405": "S\u0307"
-    "\u0406": "I\u0304"
-    "\u0407": "I\u0308"
-    "\u0408": "I\u0310"
-    "\u0409": "Lj"
-    "\u040A": "Nj"
-    "\u040B": "C\u0301"
-    "\u040C": "K\u0301"
-    "\u040D": "I\u0300"
-    "\u040E": "U\u0306"
-    "\u040F": "Dz\u030C"
-    "\u0410": "A"
-    "\u0411": "B"
-    "\u0412": "V"
-    "\u0413": "G"
-    "\u0414": "D"
-    "\u0415": "E"
-    "\u0416": "Zh"
-    "\u0417": "Z"
-    "\u0418": "I"
-    "\u0419": "I\u0306"
-    "\u041A": "K"
-    "\u041B": "L"
-    "\u041C": "M"
-    "\u041D": "N"
-    "\u041E": "O"
-    "\u041F": "P"
-    "\u0420": "R"
-    "\u0421": "S"
-    "\u0422": "T"
-    "\u0423": "U"
-    "\u0424": "F"
-    "\u0425": "Kh"
-    "\u0426": "T\uFE20S\uFE21"
-    "\u0427": "Ch"
-    "\u0428": "Sh"
-    "\u0429": "Shch"
-    "\u042A": "\u02BA\u0333"
-    "\u042B": "Y"
-    "\u042C": "\u02B9\u0333"
-    "\u042D": "E\u0307"
-    "\u042E": "I\uFE20U\uFE21"
-    "\u042F": "I\uFE20A\uFE21"
-    "\u0430": "a"
-    "\u0431": "b"
-    "\u0432": "v"
-    "\u0433": "g"
-    "\u0434": "d"
-    "\u0435": "e"
-    "\u0436": "zh"
-    "\u0437": "z"
-    "\u0438": "i"
-    "\u0439": "i\u0306"
-    "\u043A": "k"
-    "\u043B": "l"
-    "\u043C": "m"
-    "\u043D": "n"
-    "\u043E": "o"
-    "\u043F": "p"
-    "\u0440": "r"
-    "\u0441": "s"
-    "\u0442": "t"
-    "\u0443": "u"
-    "\u0444": "f"
-    "\u0445": "kh"
-    "\u0446": "t\uFE20s\uFE21"
-    "\u0447": "ch"
-    "\u0448": "sh"
-    "\u0449": "shch"
-    "\u044A": "\u02BA"
-    "\u044B": "y"
-    "\u044C": "\u02B9"
-    "\u044D": "e\u0307"
-    "\u044E": "i\uFE20u\uFE21"
-    "\u044F": "i\uFE20a\uFE21"
-    "\u0450": "e\u0300"
-    "\u0451": "e\u0308"
-    "\u0452": "\u0111"
-    "\u0453": "g\u0301"
-    "\u0454": "e\u0304"
-    "\u0455": "s\u0307"
-    "\u0456": "i\u0304"
-    "\u0457": "i\u0308"
-    "\u0458": "i\u0310"
-    "\u0459": "lj"
-    "\u045A": "nj"
-    "\u045B": "c\u0301"
-    "\u045C": "k\u0301"
-    "\u045D": "i\u0300"
-    "\u045E": "u\u0306"
-    "\u045F": "dz\u030C"
-    "\u0460": "O\u0304\u0324"
-    "\u0461": "o\u0304\u0324"
-    "\u0462": "I\uFE20E\uFE21"
-    "\u0463": "i\uFE20e\uFE21"
-    "\u0464": "I\uFE20E\uFE21\u0304"
-    "\u0465": "i\uFE20e\uFE21\u0304"
-    "\u0466": "E\u0328"
-    "\u0467": "e\u0328"
-    "\u0468": "I\uFE20E\uFE21\u0328"
-    "\u0469": "i\uFE20e\uFE21\u0328"
-    "\u046A": "O\u0328"
-    "\u046B": "o\u0328"
-    "\u046C": "I\uFE20O\uFE21\u0328"
-    "\u046D": "i\uFE20o\uFE21\u0328"
-    "\u046E": "K\uFE20S\uFE21"
-    "\u046F": "k\uFE20s\uFE21"
-    "\u0470": "P\uFE20S\uFE21"
-    "\u0471": "p\uFE20s\uFE21"
-    "\u0472": "F\u0307"
-    "\u0473": "f\u0307"
-    "\u0474": "V\u0307"
-    "\u0475": "v\u0307"
-    "\u0476": "V\u0308"
-    "\u0477": "v\u0308"
-    "\u0478": "O\uFE20u\uFE21"
-    "\u0479": "o\uFE20u\uFE21"
-    "\u047A": "O\u0304\u0323"
-    "\u047B": "o\u0304\u0323"
-    "\u047C": "O\u0303"
-    "\u047D": "o\u0303"
-    "\u047E": "O\u0304\uFE20T\uFE21"
-    "\u047F": "o\u0304\uFE20t\uFE21"
-    "\u0480": "Q\u0302"
-    "\u0481": "q\u0302"
-    "\u0482": "(|)"
-    "\u0483": "\u007E"
-    "\u0484": "\u0311"
-    "\u0485": "\u0314"
-    "\u0486": "\u0313"
-    "\u0487": "\u0303"
-    "\u0488": "(^)"
-    "\u0489": "(')"
-    "\u048A": "I\u0306\u0323"
-    "\u048B": "i\u0306\u0323"
-    "\u048C": "E\u0306\u0323"
-    "\u048D": "e\u0306\u0323"
-    "\u048E": "R\u0306"
-    "\u048F": "r\u0306"
-    "\u0490": "G\u0306"
-    "\u0491": "g\u0306"
-    "\u0492": "Gh"
-    "\u0493": "gh"
-    "\u0494": "G\u0327"
-    "\u0495": "g\u0327"
-    "\u0496": "J"
-    "\u0497": "j"
-    "\u0498": "T\uFE20H\uFE21"
-    "\u0499": "t\uFE20h\uFE21"
-    "\u049A": "Q"
-    "\u049B": "q"
-    "\u049C": "G\u0307"
-    "\u049D": "g\u0307"
-    "\u049E": "Q\u0304"
-    "\u049F": "q\u0304"
-    "\u04A0": "Q\u0300"
-    "\u04A1": "q\u0300"
-    "\u04A2": "N\uFE20G\uFE21"
-    "\u04A3": "n\uFE20g\uFE21"
-    "\u04A4": "N\u0301G\u0300"
-    "\u04A5": "n\u0301g\u0300"
-    "\u04A6": "Ph"
-    "\u04A7": "ph"
-    "\u04A8": "W"
-    "\u04A9": "w"
-    "\u04AA": "Th"
-    "\u04AB": "th"
-    "\u04AC": "T\u0327"
-    "\u04AD": "t\u0327"
-    "\u04AE": "U\u0307"
-    "\u04AF": "u\u0307"
-    "\u04B0": "U\u0302"
-    "\u04B1": "u\u0302"
-    "\u04B2": "K\uFE20H\uFE21"
-    "\u04B3": "k\uFE20h\uFE21"
-    "\u04B4": "T\uFE20S\uFE21\u0307"
-    "\u04B5": "t\uFE20s\uFE21\u0307"
-    "\u04B6": "J\u0302"
-    "\u04B7": "j\u0302"
-    "\u04B8": "J\u0304"
-    "\u04B9": "j\u0304"
-    "\u04BA": "H\u0307"
-    "\u04BB": "h\u0307"
-    "\u04BC": "C\u0301h"
-    "\u04BD": "c\u0301h"
-    "\u04BE": "C\u0301h\u0301"
-    "\u04BF": "c\u0301h\u0301"
-    "\u04C0": "H\u0308"
-    "\u04C1": "J\u0306"
-    "\u04C2": "j\u0306"
-    "\u04C3": "Q\u0307"
-    "\u04C4": "q\u0307"
-    "\u04C5": "L\u0326"
-    "\u04C6": "l\u0326"
-    "\u04C7": "N\uFE20\u0327G\uFE21"
-    "\u04C8": "n\uFE20\u0327g\uFE21"
-    "\u04C9": "N\uFE20\u0323G\uFE21"
-    "\u04CA": "n\uFE20\u0323g\uFE21"
-    "\u04CB": "C\u0323h"
-    "\u04CC": "c\u0323h"
-    "\u04CD": "M\u0323"
-    "\u04CE": "m\u0323"
-    "\u04CF": "h\u0308"
-    "\u04D0": "A\u0306\u0323"
-    "\u04D1": "a\u0306\u0323"
-    "\u04D2": "A\u0308"
-    "\u04D3": "a\u0308"
-    "\u04D4": "\u00C6"
-    "\u04D5": "\u00E6"
-    "\u04D6": "E\u0306"
-    "\u04D7": "e\u0306"
-    "\u04D8": "A\u0306"
-    "\u04D9": "a\u0306"
-    "\u04DA": "A\u030B"
-    "\u04DB": "a\u030B"
-    "\u04DC": "J\u0308"
-    "\u04DD": "j\u0308"
-    "\u04DE": "Z\u0308"
-    "\u04DF": "z\u0308"
-    "\u04E0": "D\uFE20Z\uFE21"
-    "\u04E1": "d\uFE20z\uFE21"
-    "\u04E2": "I\u0304\u0323"
-    "\u04E3": "i\u0304\u0323"
-    "\u04E4": "I\u0308\u0323"
-    "\u04E5": "i\u0308\u0323"
-    "\u04E6": "O\u0308"
-    "\u04E7": "o\u0308"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
-    "\u04EA": "O\u0304"
-    "\u04EB": "o\u0304"
-    "\u04EC": "E\u0308\u0323"
-    "\u04ED": "e\u0308\u0323"
-    "\u04EE": "U\u0304"
-    "\u04EF": "u\u0304"
-    "\u04F0": "U\u0308"
-    "\u04F1": "u\u0308"
-    "\u04F2": "U\u030B"
-    "\u04F3": "u\u030B"
-    "\u04F4": "C\u0308h"
-    "\u04F5": "c\u0308h"
-    "\u04F6": "G\u0323"
-    "\u04F7": "g\u0323"
-    "\u04F8": "Y\u0308"
-    "\u04F9": "y\u0308"
-    "\u04FA": "Gh\u0327"
-    "\u04FB": "gh\u0327"
-    "\u04FC": "H\u0327"
-    "\u04FD": "h\u0327"
-    "\u04FE": "H\u0304"
-    "\u04FF": "h\u0304"
-    "\u0500": "D\u0307"
-    "\u0501": "d\u0307"
-    "\u0502": "D\u0301"
-    "\u0503": "d\u0301"
-    "\u0504": "Z\u0301"
-    "\u0505": "z\u0301"
-    "\u0506": "Z\u0327"
-    "\u0507": "z\u0327"
-    "\u0508": "L\u0301"
-    "\u0509": "l\u0301"
-    "\u050A": "N\u0301"
-    "\u050B": "n\u0301"
-    "\u050C": "S\u0301"
-    "\u050D": "s\u0301"
-    "\u050E": "T\u0301"
-    "\u050F": "t\u0301"
-    "\u0510": "Z\u0307"
-    "\u0511": "z\u0307"
-    "\u0512": "L\u0327"
-    "\u0513": "l\u0327"
-    "\u0514": "Lkh\u0307"
-    "\u0515": "lkh\u0307"
-    "\u0516": "Rkh\u0307"
-    "\u0517": "rkh\u0307"
-    "\u0518": "A\u0310"
-    "\u0519": "a\u0310"
-    "\u051A": "Q\u0308"
-    "\u051B": "q\u0308"
-    "\u051C": "W\u0308"
-    "\u051D": "w\u0308"
-    "\u051E": "K\u0300"
-    "\u051F": "k\u0300"
-    "\u0520": "L\u0324"
-    "\u0521": "l\u0324"
-    "\u0522": "N\u0327"
-    "\u0523": "n\u0327"
-    "\u0524": "P\u0323"
-    "\u0525": "p\u0323"
-    "\u0526": "Sh\u0323"
-    "\u0527": "sh\u0323"
-    "\u0528": "N\u0326"
-    "\u0529": "n\u0326"
-    "\u052A": "D\uFE20z\uFE21h"
-    "\u052B": "d\uFE20z\uFE21h"
-    "\u052C": "D\uFE20c\uFE21h"
-    "\u052D": "d\uFE20c\uFE21h"
-    "\u052E": "L\u0323"
-    "\u052F": "l\u0323"
+---
+general:
+  name: Cyrillic (Generic)
+  parents:
+    - _ignore_base
+
+# COMMON COMBINING CHARACTERS (always follow a base letter):
+# combining grave \u0300
+# combining acute \u0301
+# combining circumflex \u0302
+# combining tilde \u0303
+# combining macron \u0304
+# combining breve \u0306
+# combining dot above \u0307
+# combining diaeresis \u0308
+# combining ring above \u030A
+# combining double acute \u030B
+# combining caron (hachek) \u030C
+# combining candrabindu \u0310
+# combining dot below \u0323
+# combining dieresis below \u0324
+# combining comma below \u0326 (Romanian, Latvian, Livonian)
+# combining cedilla \u0327 (French, Turkish, Azeri)
+# combining ogonek (hook) \u0328 (Polish, Lithuanian)
+# combining low line \u0332
+# combining double low line \u0333
+# combining left ligature \uFE20 (Cyrillic transliteration)
+# combining right ligature \uFE21 (Cyrillic transliteration)
+# soft sign/prime (spacing) \u02B9(Cyrillic transliteration)
+# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration)
+# ayn(spacing) \u02BB (Semitic and Caucasian languages)
+# alif (spacing) \u02BC (Semitic languages)
+# middle dot (space) \u00B7) (Catalan)
+
+roman_to_script:
+  map:
+    "A\uFE20E\uFE21": "\u04D4"
+    "A\uFE20e\uFE21": "\u04D4"
+    "a\uFE20e\uFE21": "\u04D5"
+    "A\u0306\u0323": "\u04D0"
+    "a\u0306\u0323": "\u04D1"
+    "\u00C6": "\u04D4"
+    "\u00E6": "\u04D5"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "A\u030B": "\u04DA"
+    "a\u030B": "\u04DB"
+    "A\u0308": "\u04D2"
+    "a\u0308": "\u04D3"
+    "A\u0310": "\u0518"
+    "a\u0310": "\u0519"
+
+    "B": "\u0411"
+    "b": "\u0431"
+
+    "C\u0301h\u0301": "\u04BE"
+    "c\u0301h\u0301": "\u04BF"
+    "C\u0301h": "\u04BC"
+    "c\u0301h": "\u04BD"
+    "C\u0301": "\u040B"
+    "c\u0301": "\u045B"
+    "C\u0308h": "\u04F4"
+    "c\u0308h": "\u04F5"
+    "C\u0323h": "\u04CB"
+    "c\u0323h": "\u04CC"
+
+    "D\u0301": "\u0502"
+    "d\u0301": "\u0503"
+    "D\u0307": "\u0500"
+    "d\u0307": "\u0501"
+    "D\uFE20c\uFE21h": "\u052C"
+    "d\uFE20c\uFE21h": "\u052D"
+    "D\uFE20z\uFE21h": "\u052A"
+    "d\uFE20z\uFE21h": "\u052B"
+    "D\uFE20Z\uFE21": "\u04E0"
+    "d\uFE20z\uFE21": "\u04E1"
+    "Dz\u030C": "\u040F"
+    "dz\u030C": "\u045F"
+    "D": "\u0414"
+    "d": "\u0434"
+
+    "E\u0300": "\u0400"
+    "e\u0300": "\u0450"
+    "E\u0304": "\u0404"
+    "e\u0304": "\u0454"
+    "E\u0306": "\u04D6"
+    "e\u0306": "\u04D7"
+    "E\u0306\u0323": "\u048C"
+    "e\u0306\u0323": "\u048D"
+    "E\u0307": "\u042D"
+    "e\u0307": "\u044D"
+    "E\u0308\u0323": "\u04EC"
+    "e\u0308\u0323": "\u04ED"
+    "E\u0308": "\u0401"
+    "e\u0308": "\u0451"
+    "E\u0328": "\u0466"
+    "e\u0328": "\u0467"
+
+    "F\u0307": "\u0472"
+    "f\u0307": "\u0473"
+    "F": "\u0424"
+    "f": "\u0444"
+
+    "Gh\u0327": "\u04FA"
+    "gh\u0327": "\u04FB"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    "G\u0301": "\u0403"
+    "g\u0301": "\u0453"
+    "G\u0306": "\u0490"
+    "g\u0306": "\u0491"
+    "G\u0307": "\u049C"
+    "g\u0307": "\u049D"
+    "G\u0323": "\u04F6"
+    "g\u0323": "\u04F7"
+    "G\u0327": "\u0494"
+    "g\u0327": "\u0495"
+
+    "H\u0304": "\u04FE"
+    "h\u0304": "\u04FF"
+    "H\u0327": "\u04FC"
+    "h\u0327": "\u04FD"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "H\u0308": "\u04C0"
+    "h\u0308": "\u04CF"
+
+    "I\u0300": "\u040D"
+    "i\u0300": "\u045D"
+    "I\u0304\u0323": "\u04E2"
+    "i\u0304\u0323": "\u04E3"
+    "I\u0304": "\u0406"
+    "i\u0304": "\u0456"
+    "I\u0306\u0323": "\u048A"
+    "i\u0306\u0323": "\u048B"
+    "I\u0306": "\u0419"
+    "i\u0306": "\u0439"
+    "I\u0308\u0323": "\u04E4"
+    "i\u0308\u0323": "\u04E5"
+    "I\u0308": "\u0407"
+    "i\u0308": "\u0457"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+
+    "I\uFE20A\uFE21": "\u042F"
+    "i\uFE20a\uFE21": "\u044F"
+    "A": "\u0410"
+    "a": "\u0430"
+
+    "I\uFE20E\uFE21\u0304": "\u0464"
+    "i\uFE20e\uFE21\u0304": "\u0465"
+    "I\uFE20E\uFE21\u0328": "\u0468"
+    "i\uFE20e\uFE21\u0328": "\u0469"
+    "I\uFE20E\uFE21": "\u0462"
+    "i\uFE20e\uFE21": "\u0463"
+    "E": "\u0415"
+    "e": "\u0435"
+
+    "I\uFE20O\uFE21\u0328": "\u046C"
+    "i\uFE20o\uFE21\u0328": "\u046D"
+    "I\uFE20U\uFE21": "\u042E"
+    "i\uFE20u\uFE21": "\u044E"
+    "I": "\u0418"
+    "i": "\u0438"
+
+    "J\u0304": "\u04B8"
+    "j\u0304": "\u04B9"
+    "J\u0306": "\u04C1"
+    "j\u0306": "\u04C2"
+    "J\u0302": "\u04B6"
+    "j\u0302": "\u04B7"
+    "J\u0308": "\u04DC"
+    "j\u0308": "\u04DD"
+
+    "K\u0300": "\u051E"
+    "k\u0300": "\u051F"
+    "K\u0301": "\u040C"
+    "k\u0301": "\u045C"
+    "K\uFE20H\uFE21": "\u04B2"
+    "k\uFE20h\uFE21": "\u04B3"
+    "Kh": "\u0425"
+    "kh": "\u0445"
+    "K\uFE20S\uFE21": "\u046E"
+    "k\uFE20s\uFE21": "\u046F"
+    "K": "\u041A"
+    "k": "\u043A"
+
+    "Lj": "\u0409"
+    "lj": "\u0459"
+    "Lkh\u0307": "\u0514"
+    "lkh\u0307": "\u0515"
+    "L\u0301": "\u0508"
+    "l\u0301": "\u0509"
+    "L\u0321": "\u04C5"
+    "l\u0326": "\u04C6"
+    "L\u0323": "\u052E"
+    "l\u0323": "\u052F"
+    "L\u0327": "\u0512"
+    "l\u0327": "\u0513"
+    "L\u0324": "\u0520"
+    "l\u0324": "\u0521"
+    "L": "\u041B"
+    "l": "\u043B"
+
+    "M\u0323": "\u04CD"
+    "m\u0323": "\u04CE"
+    "M": "\u041C"
+    "m": "\u043C"
+
+    "Nj": "\u040A"
+    "nj": "\u045A"
+    "N\u0301G\u0300": "\u04A4"
+    "n\u0301g\u0300": "\u04A5"
+    "N\u0301": "\u050A"
+    "n\u0301": "\u050B"
+    "N\u0326": "\u0528"
+    "n\u0326": "\u0529"
+    "N\u0327": "\u0522"
+    "n\u0327": "\u0523"
+    "N\uFE20\u0323G\uFE21": "\u04C9"
+    "n\uFE20\u0323g\uFE21": "\u04CA"
+    "N\uFE20\u0327G\uFE21": "\u04C7"
+    "n\uFE20\u0327g\uFE21": "\u04C8"
+    "N\uFE20G\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "No\u0332": "\u2116"
+    "N": "\u041D"
+    "n": "\u043D"
+
+    "G": "\u0413"
+    "g": "\u0433"
+
+    "J": "\u0496"
+    "j": "\u0497"
+
+    "O\u0303": "\u047C"
+    "o\u0303": "\u047D"
+    "O\u0304\u0323": "\u047A"
+    "o\u0304\u0323": "\u047B"
+    "O\u0304\uFE20T\uFE21": "\u047E"
+    "o\u0304\uFE20t\uFE21": "\u047F"
+    "O\u0304\u0324": "\u0460"
+    "o\u0304\u0324": "\u0461"
+    "O\u0304": "\u04EA"
+    "o\u0304": "\u04EB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "O\u0308": "\u04E6"
+    "o\u0308": "\u04E7"
+    "O\u0328": "\u046A"
+    "o\u0328": "\u046B"
+    "O\uFE20u\uFE21": "\u0478"
+    "o\uFE20u\uFE21": "\u0479"
+    "O": "\u041E"
+    "o": "\u043E"
+
+    "Ph": "\u04A6"
+    "ph": "\u04A7"
+    "P\u0323": "\u0524"
+    "p\u0323": "\u0525"
+    "P\uFE20S\uFE21": "\u0470"
+    "p\uFE20s\uFE21": "\u0471"
+    "P": "\u041F"
+    "p": "\u043F"
+
+    "Q\u0300": "\u04A0"
+    "q\u0300": "\u04A1"
+    "Q\u0302": "\u0480"
+    "q\u0302": "\u0481"
+    "Q\u0304": "\u049E"
+    "q\u0304": "\u049F"
+    "Q\u0307": "\u04C3"
+    "q\u0307": "\u04C4"
+    "Q\u0308": "\u051A"
+    "q\u0308": "\u051B"
+    "Q": "\u049A"
+    "q": "\u049B"
+
+    "Rkh\u0307": "\u0516"
+    "rkh\u0307": "\u0517"
+    "R\u0306": "\u048E"
+    "r\u0306": "\u048F"
+    "R": "\u0420"
+    "r": "\u0440"
+
+    "Shch": "\u0429"
+    "shch": "\u0449"
+    "Sh\u0323": "\u0526"
+    "sh\u0323": "\u0527"
+    "Sh": "\u0428"
+    "sh": "\u0448"
+    "S\u0301": "\u050C"
+    "s\u0301": "\u050D"
+    "S\u0307": "\u0405"
+    "s\u0307": "\u0455"
+
+    "Ch": "\u0427"
+    "ch": "\u0447"
+    "C": "\u0426"
+    "c": "\u0446"
+
+    "Th": "\u04AA"
+    "th": "\u04AB"
+    "T\u0301": "\u050E"
+    "t\u0301": "\u050F"
+    "T\u0327": "\u04AC"
+    "t\u0327": "\u04AD"
+    "T\uFE20H\uFE21": "\u0498"
+    "t\uFE20h\uFE21": "\u0499"
+    "T\uFE20S\uFE21": "\u0426"
+    "t\uFE20s\uFE21": "\u0446"
+    "T\uFE20S\uFE21\u0307": "\u04B4"
+    "t\uFE20s\uFE21\u0307": "\u04B5"
+
+    "S": "\u0421"
+    "s": "\u0441"
+
+    "T": "\u0422"
+    "t": "\u0442"
+
+    "U\u0302": "\u04B0"
+    "u\u0302": "\u04B1"
+    "U\u0304": "\u04EE"
+    "u\u0304": "\u04EF"
+    "U\u0306": "\u040E"
+    "u\u0306": "\u045E"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "U\u0308": "\u04F0"
+    "u\u0308": "\u04F1"
+    "U\u030B": "\u04F2"
+    "u\u030B": "\u04F3"
+    "U": "\u0423"
+    "u": "\u0443"
+
+    "V\u0307": "\u0474"
+    "v\u0307": "\u0475"
+    "V\u0308": "\u0476"
+    "v\u0308": "\u0477"
+    "V": "\u0412"
+    "v": "\u0432"
+
+    "W\u0308": "\u051C"
+    "w\u0308": "\u051D"
+    "W": "\u04A8"
+    "w": "\u04A9"
+
+    "X": "\u0058"
+    "x": "\u0078"
+
+    "Y\u0307": "\u0474"
+    "y\u0307": "\u0475"
+    "Y\u0308": "\u04F8"
+    "y\u0308": "\u04F9"
+    "Y": "\u042B"
+    "y": "\u044B"
+
+    "Zh": "\u0416"
+    "zh": "\u0436"
+    "Z\u0301": "\u0504"
+    "z\u0301": "\u0505"
+    "Z\u0307": "\u0510"
+    "z\u0307": "\u0511"
+    "Z\u0308": "\u04DE"
+    "z\u0308": "\u04DF"
+    "Z\u0327": "\u0506"
+    "z\u0327": "\u0507"
+    "Z": "\u0417"
+    "z": "\u0437"
+
+    "H": "\u0413"
+    "h": "\u0433"
+
+    "\u0110": "\u0402"
+    "\u0111": "\u0452"
+    "\u02B9\u0333": "\u042C"
+    "\u02B9": "\u044C"
+    "\u02BA\u0333": "\u042A"
+    "\u02BA": "\u044A"
+    "\u0303": "\u0487"
+    "\u0311": "\u0484"
+    "\u0313": "\u0486"
+    "\u0314": "\u0485"
+    "\u007E": "\u0483"
+    "(|)": "\u0482"
+    "(^)": "\u0488"
+    "(')": "\u0489"
+
+    # Two Less-than signs mapped to Left-pointing double angle quotation mark
+    "\u003C\u003C": "\u00AB"
+    # Two Greater-than signs mapped to Right-pointing double angle quotation mark
+    "\u003E\u003E": "\u00BB"
+
+script_to_roman:
+  map:
+
+    # Left-pointing double angle quotation mark mapped to Two Less-than signs
+    "\u00AB": "\u003C\u003C"
+    # Right-pointing double angle quotation mark mapped to Two Greater-than signs
+    "\u00BB": "\u003E\u003E"
+    "\u2116": "No\u0332"
+    "\u0400": "E\u0300"
+    "\u0401": "E\u0308"
+    "\u0402": "\u0110"
+    "\u0403": "G\u0301"
+    "\u0404": "E\u0304"
+    "\u0405": "S\u0307"
+    "\u0406": "I\u0304"
+    "\u0407": "I\u0308"
+    "\u0408": "I\u0310"
+    "\u0409": "Lj"
+    "\u040A": "Nj"
+    "\u040B": "C\u0301"
+    "\u040C": "K\u0301"
+    "\u040D": "I\u0300"
+    "\u040E": "U\u0306"
+    "\u040F": "Dz\u030C"
+    "\u0410": "A"
+    "\u0411": "B"
+    "\u0412": "V"
+    "\u0413": "G"
+    "\u0414": "D"
+    "\u0415": "E"
+    "\u0416": "Zh"
+    "\u0417": "Z"
+    "\u0418": "I"
+    "\u0419": "I\u0306"
+    "\u041A": "K"
+    "\u041B": "L"
+    "\u041C": "M"
+    "\u041D": "N"
+    "\u041E": "O"
+    "\u041F": "P"
+    "\u0420": "R"
+    "\u0421": "S"
+    "\u0422": "T"
+    "\u0423": "U"
+    "\u0424": "F"
+    "\u0425": "Kh"
+    "\u0426": "T\uFE20S\uFE21"
+    "\u0427": "Ch"
+    "\u0428": "Sh"
+    "\u0429": "Shch"
+    "\u042A": "\u02BA\u0333"
+    "\u042B": "Y"
+    "\u042C": "\u02B9\u0333"
+    "\u042D": "E\u0307"
+    "\u042E": "I\uFE20U\uFE21"
+    "\u042F": "I\uFE20A\uFE21"
+    "\u0430": "a"
+    "\u0431": "b"
+    "\u0432": "v"
+    "\u0433": "g"
+    "\u0434": "d"
+    "\u0435": "e"
+    "\u0436": "zh"
+    "\u0437": "z"
+    "\u0438": "i"
+    "\u0439": "i\u0306"
+    "\u043A": "k"
+    "\u043B": "l"
+    "\u043C": "m"
+    "\u043D": "n"
+    "\u043E": "o"
+    "\u043F": "p"
+    "\u0440": "r"
+    "\u0441": "s"
+    "\u0442": "t"
+    "\u0443": "u"
+    "\u0444": "f"
+    "\u0445": "kh"
+    "\u0446": "t\uFE20s\uFE21"
+    "\u0447": "ch"
+    "\u0448": "sh"
+    "\u0449": "shch"
+    "\u044A": "\u02BA"
+    "\u044B": "y"
+    "\u044C": "\u02B9"
+    "\u044D": "e\u0307"
+    "\u044E": "i\uFE20u\uFE21"
+    "\u044F": "i\uFE20a\uFE21"
+    "\u0450": "e\u0300"
+    "\u0451": "e\u0308"
+    "\u0452": "\u0111"
+    "\u0453": "g\u0301"
+    "\u0454": "e\u0304"
+    "\u0455": "s\u0307"
+    "\u0456": "i\u0304"
+    "\u0457": "i\u0308"
+    "\u0458": "i\u0310"
+    "\u0459": "lj"
+    "\u045A": "nj"
+    "\u045B": "c\u0301"
+    "\u045C": "k\u0301"
+    "\u045D": "i\u0300"
+    "\u045E": "u\u0306"
+    "\u045F": "dz\u030C"
+    "\u0460": "O\u0304\u0324"
+    "\u0461": "o\u0304\u0324"
+    "\u0462": "I\uFE20E\uFE21"
+    "\u0463": "i\uFE20e\uFE21"
+    "\u0464": "I\uFE20E\uFE21\u0304"
+    "\u0465": "i\uFE20e\uFE21\u0304"
+    "\u0466": "E\u0328"
+    "\u0467": "e\u0328"
+    "\u0468": "I\uFE20E\uFE21\u0328"
+    "\u0469": "i\uFE20e\uFE21\u0328"
+    "\u046A": "O\u0328"
+    "\u046B": "o\u0328"
+    "\u046C": "I\uFE20O\uFE21\u0328"
+    "\u046D": "i\uFE20o\uFE21\u0328"
+    "\u046E": "K\uFE20S\uFE21"
+    "\u046F": "k\uFE20s\uFE21"
+    "\u0470": "P\uFE20S\uFE21"
+    "\u0471": "p\uFE20s\uFE21"
+    "\u0472": "F\u0307"
+    "\u0473": "f\u0307"
+    "\u0474": "V\u0307"
+    "\u0475": "v\u0307"
+    "\u0476": "V\u0308"
+    "\u0477": "v\u0308"
+    "\u0478": "O\uFE20u\uFE21"
+    "\u0479": "o\uFE20u\uFE21"
+    "\u047A": "O\u0304\u0323"
+    "\u047B": "o\u0304\u0323"
+    "\u047C": "O\u0303"
+    "\u047D": "o\u0303"
+    "\u047E": "O\u0304\uFE20T\uFE21"
+    "\u047F": "o\u0304\uFE20t\uFE21"
+    "\u0480": "Q\u0302"
+    "\u0481": "q\u0302"
+    "\u0482": "(|)"
+    "\u0483": "\u007E"
+    "\u0484": "\u0311"
+    "\u0485": "\u0314"
+    "\u0486": "\u0313"
+    "\u0487": "\u0303"
+    "\u0488": "(^)"
+    "\u0489": "(')"
+    "\u048A": "I\u0306\u0323"
+    "\u048B": "i\u0306\u0323"
+    "\u048C": "E\u0306\u0323"
+    "\u048D": "e\u0306\u0323"
+    "\u048E": "R\u0306"
+    "\u048F": "r\u0306"
+    "\u0490": "G\u0306"
+    "\u0491": "g\u0306"
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u0494": "G\u0327"
+    "\u0495": "g\u0327"
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u0498": "T\uFE20H\uFE21"
+    "\u0499": "t\uFE20h\uFE21"
+    "\u049A": "Q"
+    "\u049B": "q"
+    "\u049C": "G\u0307"
+    "\u049D": "g\u0307"
+    "\u049E": "Q\u0304"
+    "\u049F": "q\u0304"
+    "\u04A0": "Q\u0300"
+    "\u04A1": "q\u0300"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04A4": "N\u0301G\u0300"
+    "\u04A5": "n\u0301g\u0300"
+    "\u04A6": "Ph"
+    "\u04A7": "ph"
+    "\u04A8": "W"
+    "\u04A9": "w"
+    "\u04AA": "Th"
+    "\u04AB": "th"
+    "\u04AC": "T\u0327"
+    "\u04AD": "t\u0327"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04B0": "U\u0302"
+    "\u04B1": "u\u0302"
+    "\u04B2": "K\uFE20H\uFE21"
+    "\u04B3": "k\uFE20h\uFE21"
+    "\u04B4": "T\uFE20S\uFE21\u0307"
+    "\u04B5": "t\uFE20s\uFE21\u0307"
+    "\u04B6": "J\u0302"
+    "\u04B7": "j\u0302"
+    "\u04B8": "J\u0304"
+    "\u04B9": "j\u0304"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04BC": "C\u0301h"
+    "\u04BD": "c\u0301h"
+    "\u04BE": "C\u0301h\u0301"
+    "\u04BF": "c\u0301h\u0301"
+    "\u04C0": "H\u0308"
+    "\u04C1": "J\u0306"
+    "\u04C2": "j\u0306"
+    "\u04C3": "Q\u0307"
+    "\u04C4": "q\u0307"
+    "\u04C5": "L\u0326"
+    "\u04C6": "l\u0326"
+    "\u04C7": "N\uFE20\u0327G\uFE21"
+    "\u04C8": "n\uFE20\u0327g\uFE21"
+    "\u04C9": "N\uFE20\u0323G\uFE21"
+    "\u04CA": "n\uFE20\u0323g\uFE21"
+    "\u04CB": "C\u0323h"
+    "\u04CC": "c\u0323h"
+    "\u04CD": "M\u0323"
+    "\u04CE": "m\u0323"
+    "\u04CF": "h\u0308"
+    "\u04D0": "A\u0306\u0323"
+    "\u04D1": "a\u0306\u0323"
+    "\u04D2": "A\u0308"
+    "\u04D3": "a\u0308"
+    "\u04D4": "\u00C6"
+    "\u04D5": "\u00E6"
+    "\u04D6": "E\u0306"
+    "\u04D7": "e\u0306"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04DA": "A\u030B"
+    "\u04DB": "a\u030B"
+    "\u04DC": "J\u0308"
+    "\u04DD": "j\u0308"
+    "\u04DE": "Z\u0308"
+    "\u04DF": "z\u0308"
+    "\u04E0": "D\uFE20Z\uFE21"
+    "\u04E1": "d\uFE20z\uFE21"
+    "\u04E2": "I\u0304\u0323"
+    "\u04E3": "i\u0304\u0323"
+    "\u04E4": "I\u0308\u0323"
+    "\u04E5": "i\u0308\u0323"
+    "\u04E6": "O\u0308"
+    "\u04E7": "o\u0308"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04EA": "O\u0304"
+    "\u04EB": "o\u0304"
+    "\u04EC": "E\u0308\u0323"
+    "\u04ED": "e\u0308\u0323"
+    "\u04EE": "U\u0304"
+    "\u04EF": "u\u0304"
+    "\u04F0": "U\u0308"
+    "\u04F1": "u\u0308"
+    "\u04F2": "U\u030B"
+    "\u04F3": "u\u030B"
+    "\u04F4": "C\u0308h"
+    "\u04F5": "c\u0308h"
+    "\u04F6": "G\u0323"
+    "\u04F7": "g\u0323"
+    "\u04F8": "Y\u0308"
+    "\u04F9": "y\u0308"
+    "\u04FA": "Gh\u0327"
+    "\u04FB": "gh\u0327"
+    "\u04FC": "H\u0327"
+    "\u04FD": "h\u0327"
+    "\u04FE": "H\u0304"
+    "\u04FF": "h\u0304"
+    "\u0500": "D\u0307"
+    "\u0501": "d\u0307"
+    "\u0502": "D\u0301"
+    "\u0503": "d\u0301"
+    "\u0504": "Z\u0301"
+    "\u0505": "z\u0301"
+    "\u0506": "Z\u0327"
+    "\u0507": "z\u0327"
+    "\u0508": "L\u0301"
+    "\u0509": "l\u0301"
+    "\u050A": "N\u0301"
+    "\u050B": "n\u0301"
+    "\u050C": "S\u0301"
+    "\u050D": "s\u0301"
+    "\u050E": "T\u0301"
+    "\u050F": "t\u0301"
+    "\u0510": "Z\u0307"
+    "\u0511": "z\u0307"
+    "\u0512": "L\u0327"
+    "\u0513": "l\u0327"
+    "\u0514": "Lkh\u0307"
+    "\u0515": "lkh\u0307"
+    "\u0516": "Rkh\u0307"
+    "\u0517": "rkh\u0307"
+    "\u0518": "A\u0310"
+    "\u0519": "a\u0310"
+    "\u051A": "Q\u0308"
+    "\u051B": "q\u0308"
+    "\u051C": "W\u0308"
+    "\u051D": "w\u0308"
+    "\u051E": "K\u0300"
+    "\u051F": "k\u0300"
+    "\u0520": "L\u0324"
+    "\u0521": "l\u0324"
+    "\u0522": "N\u0327"
+    "\u0523": "n\u0327"
+    "\u0524": "P\u0323"
+    "\u0525": "p\u0323"
+    "\u0526": "Sh\u0323"
+    "\u0527": "sh\u0323"
+    "\u0528": "N\u0326"
+    "\u0529": "n\u0326"
+    "\u052A": "D\uFE20z\uFE21h"
+    "\u052B": "d\uFE20z\uFE21h"
+    "\u052C": "D\uFE20c\uFE21h"
+    "\u052D": "d\uFE20c\uFE21h"
+    "\u052E": "L\u0323"
+    "\u052F": "l\u0323"

+ 1 - 1
scriptshifter/tables/data/dungan_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Dungan (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/even-evenki_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Even/Evenki (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/gagauz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Gagauz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 207 - 209
scriptshifter/tables/data/greek_classical.yml

@@ -52,7 +52,7 @@ script_to_roman:
       - "\u1F8E"  # ᾎ 	Greek Capital Letter Alpha With Psili And Perispomeni And Prosgegrammeni
     # Rough alpha
     "\u1F01":  # ἁ 	Greek Small Letter Alpha With Dasia
-      - "\u03B1\u0314" # Small alpha + combining reversed comma above
+      - "\u03B1\u0314"  # Small alpha + combining reversed comma above
       - "\u1F81"  # ᾁ 	Greek Small Letter Alpha With Dasia And Ypogegrammeni
     "\u1F03":  # ἃ 	Greek Small Letter Alpha With Dasia And Varia
       - "\u1F05"  # ἅ 	Greek Small Letter Alpha With Dasia And Oxia
@@ -61,7 +61,7 @@ script_to_roman:
       - "\u1F85"  # ᾅ 	Greek Small Letter Alpha With Dasia And Oxia And Ypogegrammeni
       - "\u1F87"  # ᾇ 	Greek Small Letter Alpha With Dasia And Perispomeni And Ypogegrammeni
     "\u1F09":  # Ἁ 	Greek Capital Letter Alpha With Dasia
-      - "\u0391\u0314" # Capital alpha + combining reversed comma above
+      - "\u0391\u0314"  # Capital alpha + combining reversed comma above
       - "\u1F89"  # ᾉ 	Greek Capital Letter Alpha With Dasia And Prosgegrammeni
     "\u1F0B":  # Ἃ 	Greek Capital Letter Alpha With Dasia And Varia
       - "\u1F8B"  # ᾋ 	Greek Capital Letter Alpha With Dasia And Varia And Prosgegrammeni
@@ -226,7 +226,7 @@ script_to_roman:
       - "\u1FE3"  # ΰ 	Greek Small Letter Upsilon With Dialytika And Oxia
       - "\u1FE7"  # ῧ 	Greek Small Letter Upsilon With Dialytika And Perispomeni
     "\u03A5":
-        # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
+      # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
       - "\u03AB"  # Ϋ 	Greek Capital Letter Upsilon With Dialytika
       - "\u1F59"  # Ὑ 	Greek Capital Letter Upsilon With Dasia
       - "\u1FE8"  # Ῠ 	Greek Capital Letter Upsilon With Vrachy
@@ -297,7 +297,7 @@ script_to_roman:
       - "\u1FAD"  # ᾭ 	Greek Capital Letter Omega With Dasia And Oxia And Prosgegrammeni
       - "\u1FAF"  # ᾯ 	Greek Capital Letter Omega With Dasia And Perispomeni And Prosgegrammeni
 
-    # Remove all combining diacritics.
+    # Remove combining diacritics irrelevant to transliteration.
     "":
       - "\u0314"
       - "\u0342"
@@ -323,7 +323,7 @@ script_to_roman:
     "\u201C": "\"\u0332"
     "\u201D": "\"\u0333"
     "\u2018": "'\u0332"
-    #"\u2019": "'\u0333"
+    # "\u2019": "'\u0333"
     "\u2116": "No\u0332"
     # "\u0300": ""
     # "\u0301": ""
@@ -421,7 +421,6 @@ script_to_roman:
     # \u03A2 reserved
     "\u03A3": "S"
     "\u03A4": "T"
-    "\u1F59": "Hy"
     "\u03A5": "Y"
     "\u03A5\u03B9": "Ui"
     "\u03A5\u1F31": "Hui"
@@ -518,7 +517,6 @@ script_to_roman:
     "\u1F51": "hy"
     "\u1F59": "Hy"
     "\u03C5": "y"
-    "\u03CB": "y"
     "\u03C5\u03B9": "ui"
     "\u03C5\u1F31": "hui"
     "\u03C6": "ph"
@@ -551,9 +549,9 @@ script_to_roman:
     "\u03D9": "ḳ"
     "\u03DA": "6\u0333"
     "\u03DB": "6\u0332"
-    #"\u03DC": "G\u0332"
+    # "\u03DC": "G\u0332"
     "\u03DC": "W"
-    #"\u03DD": "g\u0332"
+    # "\u03DD": "g\u0332"
     "\u03DD": "w"
     "\u03DE": "K\u0324"
     "\u03DF": "k\u0324"
@@ -591,203 +589,203 @@ script_to_roman:
     "\u03FF": ".)\u0333"
 
 roman_to_script:
-    map:
-      ".)\u0333": "\u03FF"
-      ".)": "\u037D"
-      "?\u0333": "\u037E"
-      "?": "\u037E"
-      "\"\u0332": "\u201C"
-      "\"\u0333": "\u201D"
-      "'\u0332": "\u2018"
-      "'\u0333": "\u2019"
-      "(.\u0333": "\u03FE"
-      "(.": "\u037C"
-      "|)\u0333": "\u03FD"
-      "|)": "\u037B"
-      # Left pointing double angle quotation mark
-      "\u003C\u003C": "\u00AB"
-      # Right pointing double angle quotation mark
-      "\u003E\u003E": "\u00BB"
-      "6\u0332": "\u03DB"
-      "6\u0333": "\u03DA"
-      "Au": "\u0391\u03C5"
-      "au": "\u03B1\u03C5"
-      "a\u0301": "\u03AC"
-      "B": "\u0392"
-      "b": "\u03B2"
-      "b\u0333": "\u03D0"
-      "Ch": "\u03A7"
-      "ch": "\u03C7"
-      "c\u030C": "\u03EB"
-      "\u1E0E": "\u039D\u03C4"
-      "\u1E0F": "\u03BD\u03C4"
-      "D": "\u0394"
-      "d": "\u03B4"
-      "Eu": "\u0395\u03C5"
-      "eu": "\u03B5\u03C5"
-      "E\u0301": "\u0388"
-      "e\u0301": "\u03AD"
-      "\u0112\u0301": "\u0389\u0314"
-      "\u0113\u0301": "\u03AE"
-      "\u0112\u0301": "\u0389"
-      "\u0112": "\u0397"
-      "\u0112u": "\u0397\u03C5"
-      "\u0113": "\u03B7"
-      "\u0113u": "\u03B7\u03C5"
-      "h\u0113u": "\u03B7\u1F51"
-      "e\u0332": "\u03F5"
-      "e\u0333": "\u03F6"
-      "F": "\u03E4"
-      "f": "\u03E5"
-      #"G\u0332": "\u03DC"
-      "W": "\u03DC"
-      #"g\u0332": "\u03DD"
-      "w": "\u03DD"
-      "Ha\u0301": "\u0386\u0314"
-      "ha\u0301": "\u03AC\u0314"
-      "Ha": "\u1F09"
-      "ha": "\u03B1\u0314"
-      "A": "\u0391"
-      "a": "\u03B1"
-      "h\u0113\u0301": "\u03AE\u0314"
-      "He\u0301": "\u0388\u0314"
-      "he\u0301": "\u03AD\u0314"
-      "H\u0113": "\u1F29"
-      "H\u0113u": "\u1F29\u03C5"
-      "h\u0113": "\u1F21"
-      "h\u0113u": "\u1F21\u13C5"
-      "He": "\u1F19"
-      "he": "\u1F11"
-      "E": "\u0395"
-      "e": "\u03B5"
-      "Hi\u0301": "\u038A\u0314"
-      "hi\u0301": "\u03AF\u0314"
-      "Hi": "\u1F39"
-      "hi": "\u1F31"
-      "Ho\u0301": "\u038F\u0314"
-      "Ho\u0301": "\u038C\u0314"
-      "h\u014D": "\u1F61"
-      "H\u014D": "\u1F69"
-      "Ho": "\u1F49"
-      "ho": "\u1F41"
-      "H\u0307": "\u03E8"
-      "h\u0307": "\u03E9"
-      "H\u0323": "\u0370"
-      "h\u0323": "\u0371"
-      "H\u0332": "\u03E6"
-      "h\u0332": "\u03E7"
-      "Hy\u0301": "\u038E\u0314"
-      "Hy": "\u1F59"
-      "hy": "\u1F51"
-      "Iu": "\u0399\u03C5"
-      "iu": "\u03B9\u03C5"
-      "I\u0301": "\u038A"
-      "i\u0301": "\u03AF"
-      "I\u0308": "\u03AA"
-      "i\u0308\u0301": "\u0390"
-      "i\u0308": "\u03CA"
-      "J": "\u037F"
-      "j": "\u03F3"
-      "K\u0323y": "\u03EC"
-      "k\u0323y": "\u03ED"
-      "K\u0332": "\u03DE"
-      "k\u0332": "\u03DF"
-      "K\u0326": "\u03CF"
-      "k\u0326": "\u03D7"
-      "k\u0332": "\u03F0"
-      "L": "\u039B"
-      "l": "\u03BB"
-      "M": "\u039C"
-      "m": "\u03BC"
-      "nch": "\u03B3\u03C7"
-      "ng": "\u03B3\u03B3"
-      "%nk%": "\u03B3\u03BA"
-      "nx": "\u03B3\u03BE"
-      "No\u0332": "\u2116"
-      "N": "\u039D"
-      "n": "\u03BD"
-      "K": "\u039A"
-      "k": "\u03BA"
-      "G": "\u0393"
-      "g": "\u03B3"
-      "Ou": "\u039F\u03C5"
-      "ou": "\u03BF\u03C5"
-      "O\u0301": "\u038C"
-      "o\u0301": "\u03CC"
-      "\u014C\u0301": "\u038F"
-      "\u014D\u0301": "\u03CE"
-      "\u014C": "\u03A9"
-      "\u014Cu": "\u03A9\u03C5"
-      "\u014D": "\u03C9"
-      "\u014Du": "\u03D9\u03C5"
-      "O": "\u039F"
-      "o": "\u03BF"
-      "Ph": "\u03A6"
-      "ph": "\u03C6"
-      "Ps": "\u03A8"
-      "ps": "\u03C8"
-      "p\u0333h\u0333": "\u03D5"
-      "p\u0333": "\u03D6"
-      "P": "\u03A0"
-      "p": "\u03C0"
-      "Ḳ": "\u03D8"
-      "ḳ": "\u03D9"
-      "Rh": "\u1FEC"
-      "rh": "\u1FE5"
-      "r\u0332": "\u03F1"
-      "r\u0333": "\u03FC"
-      "R": "\u03A1"
-      "r": "\u03C1"
-      "S\uFE20\u0332S\uFE21\u0332": "\u0372"
-      "s\uFE20\u0332s\uFE21\u0332": "\u0373"
-      "S\uFE20H\uFE21": "\u03F7"
-      "s\uFE20h\uFE21": "\u03F8"
-      "S\uFE20S\uFE21": "\u03E1"
-      "s\uFE20s\uFE21": "\u03E0"
-      "S\u030C": "\u03E2"
-      "s\u030C": "\u03E3"
-      "S\u0323": "\u03FA"
-      "s\u0323": "\u03FB"
-      "S": "\u03F9"
-      "s": "\u03F2"
-      "S": "\u03A3"
-      "%s": "\u03C2"
-      "s": "\u03C3"
-      "T\u0333H\u0333": "\u03F4"
-      "t\u0333h\u0333": "\u03D1"
-      "Th": "\u0398"
-      "th": "\u03B8"
-      "T\u0323i": "\u03EE"
-      "t\u0323i": "\u03EF"
-      "T": "\u03A4"
-      "t": "\u03C4"
-      "I": "\u0399"
-      "i": "\u03B9"
-      "\u0020\u0301": "\u0384"
-      "\u0020\u0308\u0301": "\u0385"
-      ";\u0333": "\u0387"
-      "\u02B9": "\u0374"
-      "\u0326": "\u0375"
-      "\u0328": "\u037A"
-      "V": "\u0392"
-      "v": "\u03B2"
-      "W\u0323": "\u0376"
-      "w\u0323": "\u0377"
-      "X": "\u039E"
-      "x": "\u03BE"
-      "Y\u0301\u0333": "\u03D3"
-      "Y\u0301": "\u038E"
-      "y\u0301": "\u03CD"
-      "Y\u0308\u0333": "\u03D4"
-      "y\u0308\u0301": "\u03B0"
-      "Y\u0308": "\u03AB"
-      "y\u0308": "\u03CB"
-      "Y\u0333": "\u03D2"
-      "Y": "\u03A5"
-      "Ui": "\u03A5\u03B9"
-      "Hui": "\u03A5\u1F31"
-      "y": "\u03C5"
-      "ui": "\u03C5\u03B9"
-      "hui": "\u03C5\u1F31"
-      "Z": "\u0396"
-      "z": "\u03B6"
+  map:
+    ".)\u0333": "\u03FF"
+    ".)": "\u037D"
+    "?\u0333": "\u037E"
+    "?": "\u037E"
+    "\"\u0332": "\u201C"
+    "\"\u0333": "\u201D"
+    "'\u0332": "\u2018"
+    "'\u0333": "\u2019"
+    "(.\u0333": "\u03FE"
+    "(.": "\u037C"
+    "|)\u0333": "\u03FD"
+    "|)": "\u037B"
+    # Left pointing double angle quotation mark
+    "\u003C\u003C": "\u00AB"
+    # Right pointing double angle quotation mark
+    "\u003E\u003E": "\u00BB"
+    "6\u0332": "\u03DB"
+    "6\u0333": "\u03DA"
+    "Au": "\u0391\u03C5"
+    "au": "\u03B1\u03C5"
+    "a\u0301": "\u03AC"
+    "B": "\u0392"
+    "b": "\u03B2"
+    "b\u0333": "\u03D0"
+    "Ch": "\u03A7"
+    "ch": "\u03C7"
+    "c\u030C": "\u03EB"
+    "\u1E0E": "\u039D\u03C4"
+    "\u1E0F": "\u03BD\u03C4"
+    "D": "\u0394"
+    "d": "\u03B4"
+    "Eu": "\u0395\u03C5"
+    "eu": "\u03B5\u03C5"
+    "E\u0301": "\u0388"
+    "e\u0301": "\u03AD"
+    "\u0113\u0301": "\u03AE"
+    # "\u0112\u0301": "\u0389\u0314"
+    "\u0112\u0301": "\u0389"
+    "\u0112": "\u0397"
+    "\u0112u": "\u0397\u03C5"
+    "\u0113": "\u03B7"
+    "\u0113u": "\u03B7\u03C5"
+    "e\u0332": "\u03F5"
+    "e\u0333": "\u03F6"
+    "F": "\u03E4"
+    "f": "\u03E5"
+    # "G\u0332": "\u03DC"
+    "W": "\u03DC"
+    # "g\u0332": "\u03DD"
+    "w": "\u03DD"
+    "Ha\u0301": "\u0386\u0314"
+    "ha\u0301": "\u03AC\u0314"
+    "Ha": "\u1F09"
+    "ha": "\u03B1\u0314"
+    "A": "\u0391"
+    "a": "\u03B1"
+    "h\u0113\u0301": "\u03AE\u0314"
+    "He\u0301": "\u0388\u0314"
+    "he\u0301": "\u03AD\u0314"
+    "H\u0113": "\u1F29"
+    "H\u0113u": "\u1F29\u03C5"
+    "h\u0113": "\u1F21"
+    "h\u0113u": "\u03B7\u1F51"
+    # "h\u0113u": "\u1F21\u13C5"  # FIXME this looks wrong.
+    "He": "\u1F19"
+    "he": "\u1F11"
+    "E": "\u0395"
+    "e": "\u03B5"
+    "Hi\u0301": "\u038A\u0314"
+    "hi\u0301": "\u03AF\u0314"
+    "Hi": "\u1F39"
+    "hi": "\u1F31"
+    # "Ho\u0301": "\u038F\u0314"
+    "Ho\u0301": "\u038C\u0314"
+    "h\u014D": "\u1F61"
+    "H\u014D": "\u1F69"
+    "Ho": "\u1F49"
+    "ho": "\u1F41"
+    "H\u0307": "\u03E8"
+    "h\u0307": "\u03E9"
+    "H\u0323": "\u0370"
+    "h\u0323": "\u0371"
+    "H\u0332": "\u03E6"
+    "h\u0332": "\u03E7"
+    "Hy\u0301": "\u038E\u0314"
+    "Hy": "\u1F59"
+    "hy": "\u1F51"
+    "Iu": "\u0399\u03C5"
+    "iu": "\u03B9\u03C5"
+    "I\u0301": "\u038A"
+    "i\u0301": "\u03AF"
+    "I\u0308": "\u03AA"
+    "i\u0308\u0301": "\u0390"
+    "i\u0308": "\u03CA"
+    "J": "\u037F"
+    "j": "\u03F3"
+    "K\u0323y": "\u03EC"
+    "k\u0323y": "\u03ED"
+    "K\u0326": "\u03CF"
+    "k\u0326": "\u03D7"
+    "K\u0332": "\u03DE"
+    # "k\u0332": "\u03DF"  # FIXME ambiguous.
+    "k\u0332": "\u03F0"
+    "L": "\u039B"
+    "l": "\u03BB"
+    "M": "\u039C"
+    "m": "\u03BC"
+    "nch": "\u03B3\u03C7"
+    "ng": "\u03B3\u03B3"
+    "%nk%": "\u03B3\u03BA"
+    "nx": "\u03B3\u03BE"
+    "No\u0332": "\u2116"
+    "N": "\u039D"
+    "n": "\u03BD"
+    "K": "\u039A"
+    "k": "\u03BA"
+    "G": "\u0393"
+    "g": "\u03B3"
+    "Ou": "\u039F\u03C5"
+    "ou": "\u03BF\u03C5"
+    "O\u0301": "\u038C"
+    "o\u0301": "\u03CC"
+    "\u014C\u0301": "\u038F"
+    "\u014D\u0301": "\u03CE"
+    "\u014C": "\u03A9"
+    "\u014Cu": "\u03A9\u03C5"
+    "\u014D": "\u03C9"
+    "\u014Du": "\u03D9\u03C5"
+    "O": "\u039F"
+    "o": "\u03BF"
+    "Ph": "\u03A6"
+    "ph": "\u03C6"
+    "Ps": "\u03A8"
+    "ps": "\u03C8"
+    "p\u0333h\u0333": "\u03D5"
+    "p\u0333": "\u03D6"
+    "P": "\u03A0"
+    "p": "\u03C0"
+    "Ḳ": "\u03D8"
+    "ḳ": "\u03D9"
+    "Rh": "\u1FEC"
+    "rh": "\u1FE5"
+    "r\u0332": "\u03F1"
+    "r\u0333": "\u03FC"
+    "R": "\u03A1"
+    "r": "\u03C1"
+    "S\uFE20\u0332S\uFE21\u0332": "\u0372"
+    "s\uFE20\u0332s\uFE21\u0332": "\u0373"
+    "S\uFE20H\uFE21": "\u03F7"
+    "s\uFE20h\uFE21": "\u03F8"
+    "S\uFE20S\uFE21": "\u03E1"
+    "s\uFE20s\uFE21": "\u03E0"
+    "S\u030C": "\u03E2"
+    "s\u030C": "\u03E3"
+    "S\u0323": "\u03FA"
+    "s\u0323": "\u03FB"
+    # "S": "\u03F9"  # FIXME ambiguous.
+    "S": "\u03A3"
+    # "s": "\u03F2"  # FIXME ambiguous.
+    "%s": "\u03C2"
+    "s": "\u03C3"
+    "T\u0333H\u0333": "\u03F4"
+    "t\u0333h\u0333": "\u03D1"
+    "Th": "\u0398"
+    "th": "\u03B8"
+    "T\u0323i": "\u03EE"
+    "t\u0323i": "\u03EF"
+    "T": "\u03A4"
+    "t": "\u03C4"
+    "I": "\u0399"
+    "i": "\u03B9"
+    "\u0020\u0301": "\u0384"
+    "\u0020\u0308\u0301": "\u0385"
+    ";\u0333": "\u0387"
+    "\u02B9": "\u0374"
+    "\u0326": "\u0375"
+    "\u0328": "\u037A"
+    "V": "\u0392"
+    "v": "\u03B2"
+    "W\u0323": "\u0376"
+    "w\u0323": "\u0377"
+    "X": "\u039E"
+    "x": "\u03BE"
+    "Y\u0301\u0333": "\u03D3"
+    "Y\u0301": "\u038E"
+    "y\u0301": "\u03CD"
+    "Y\u0308\u0333": "\u03D4"
+    "y\u0308\u0301": "\u03B0"
+    "Y\u0308": "\u03AB"
+    "y\u0308": "\u03CB"
+    "Y\u0333": "\u03D2"
+    "Y": "\u03A5"
+    "Ui": "\u03A5\u03B9"
+    "Hui": "\u03A5\u1F31"
+    "y": "\u03C5"
+    "ui": "\u03C5\u03B9"
+    "hui": "\u03C5\u1F31"
+    "Z": "\u0396"
+    "z": "\u03B6"

+ 18 - 0
scriptshifter/tables/data/greek_modern.yml

@@ -12,3 +12,21 @@ roman_to_script:
   map:
     "V": "\u0392"
     "v": "\u03B2"
+    "Ha": "\u0391"
+    "ha": "\u03B1"
+    "He": "\u0395"
+    "he": "\u03B5"
+    "H\u0113": "\u0397"
+    "h\u0113": "\u03B7"
+    "Hi": "\u0399"
+    "hi": "\u03B9"
+    "Ho": "\u039F"
+    "ho": "\u03BF"
+    "Hou": "\u039F\u03C5"
+    "hou": "\u03BF\u03C5"
+    "H\u014D": "\u03A9"
+    "h\u014D": "\u03C9"
+    "Hy": "\u03A5"
+    "Hui": "\u03A5\u03B9"
+    "hy": "\u03C5"
+    "hui": "\u03C5\u03B9"

+ 1 - 1
scriptshifter/tables/data/kalmyk_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kalmyk (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kara-kalpak_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kara-Kalpak (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/karachay-balkar_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Karachay-Balkar (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/karelian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Karelian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kazakh_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kazakh (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/khakass_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Khakass (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/khanty_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Khanty (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/komi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Komi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/koryak_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Koryak (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kyrgyz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kyrgyz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/lithuanian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Lithuanian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 62 - 16
scriptshifter/tables/data/macedonian.yml

@@ -1,50 +1,99 @@
 general:
   name: Macedonian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:
     "G\u0301": "\u0403"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F4": "\u0403"
     "G": "\u0413"
     "g\u0301": "\u0453"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F5": "\u0453"
     "g": "\u0433"
     "\u0110": "\u0402"
-    # this conversion shouldn't be needed, but does no harm
-    "DZ\u030C": "\u040F"
-    # this conversion shouldn't be needed, but does no harm
-    "DZ": "\u0405"
-    "Dz\u030C": "\u040F"
-    "Dz": "\u0405"
+    "D\uFE20Z\u030C\uFE21": "\u040F"
+    "D\uFE20z\u030C\uFE21": "\u040F"
+    "d\uFE20Z\u030C\uFE21": "\u040F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C4": "\u040F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C5": "\u040F"
+    "d\uFE20z\u030C\uFE21": "\u045F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C6": "\u045F"
+    "D\uFE20Z\uFE21": "\u0405"
+    "D\uFE20z\uFE21": "\u0405"
+    "d\uFE20Z\uFE21": "\u0405"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F1": "\u0405"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F2": "\u0405"
+    "d\uFE20z\uFE21": "\u0455"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F3": "\u0455"
     "\u0111": "\u0452"
     "dz\u030C": "\u045F"
     "dz": "\u0455"
     "Z\u030C": "\u0416"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u017D": "\u0416"
     "z\u030C": "\u0436"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u017E": "\u0436"
     "z": "\u0437"
     "I": "\u0418"
     "i": "\u0438"
     "J": "\u0408"
     "j": "\u0458"
     "K\u0301": "\u040C"
-    "H": "\u0425"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u1E30": "\u040C"
     "k\u0301": "\u045C"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u1E31": "\u045C"
+    "H": "\u0425"
     "h": "\u0445"
-    # this conversion shouldn't be needed, but does no harm
     "LJ": "\u0409"
     "Lj": "\u0409"
+    "lJ": "\u0409"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C7": "\u0409"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C8": "\u0409"
     "lj": "\u0459"
-    # this conversion shouldn't be needed, but does no harm
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C9": "\u0459"
     "NJ": "\u040A"
     "Nj": "\u040A"
+    "nJ": "\u040A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CA": "\u040A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CB": "\u040A"
     "nj": "\u045A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CC": "\u045A"
     "S\u030C": "\u0428"
+    "\u0160": "\u0428"
     "s\u030C": "\u0448"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0161": "\u0448"
     "C\u0301": "\u040B"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0106": "\u040B"
     "C\u030C": "\u0427"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u010C": "\u0427"
     "C": "\u0426"
     "c\u0301": "\u045B"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0107": "\u045B"
     "c\u030C": "\u0447"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u010D": "\u0447"
     "c": "\u0446"
 
 script_to_roman:
@@ -57,8 +106,8 @@ script_to_roman:
     "\u0452": "\u0111"
     "\u0416": "Z\u030C"
     "\u0436": "z\u030C"
-    "\u0405": "Dz"
-    "\u0455": "dz"
+    "\u0405": "D\uFE20Z\uFE21"
+    "\u0455": "d\uFE20z\uFE21"
     "\u0418": "I"
     "\u0438": "i"
     "\u0408": "J"
@@ -79,9 +128,6 @@ script_to_roman:
     "\u0446": "c"
     "\u0427": "C\u030C"
     "\u0447": "c\u030C"
-    "\u040F": "Dz\u030C"
-    "\u045F": "dz\u030C"
-    "\u1029": "D\uFE20Z\uFE21"
-    "\u0455": "d\uFE20z\uFE21"
     "\u040F": "D\uFE20Z\u030C\uFE21"
     "\u045F": "d\uFE20z\u030C\uFE21"
+

+ 191 - 0
scriptshifter/tables/data/manchu.yml

@@ -0,0 +1,191 @@
+---
+general:
+  name: Manchu
+  parents:
+    - _ignore_base
+  case_sensitive: false
+  
+roman_to_script:
+
+  map:
+    # Generates Narrow No-Break Space
+    "\u002D": "\u202F"
+    "Ai": "\u1820\u1873"
+    "ai": "\u1820\u1873"
+    "A": "\u1820"
+    "a": "\u1820"
+    "E": "\u185D"
+    "e": "\u185D"
+    "O": "\u1823"
+    "o": "\u1823"
+    "U\u0304": "\u1861"
+    "u\u0304": "\u1861"
+    "U\u0308": "\u1861"
+    "u\u0308": "\u1861"
+    "U": "\u1860"
+    "u": "\u1860"
+    "I": "\u1873"
+    "i": "\u1873"
+    "B": "\u182A"
+    "b": "\u182A"
+    "Cy": "\u1871\u1873"
+    "cy": "\u1871\u1873"
+    "C": "\u1834"
+    "c": "\u1834"
+    "DZ": "\u186F"
+    "Dz": "\u186F"
+    "dz": "\u186F"
+    "D": "\u1869"
+    "d": "\u1869"
+    "Fa": "\u1876\u1820"
+    "fa": "\u1876\u1820"
+    "Fe": "\u1876\u1850"
+    "fe": "\u1876\u1850"
+    "Fi": "\u1838\u185E"
+    "fi": "\u1838\u185E"
+    "Fo": "\u1838\u1823"
+    "fo": "\u1838\u1823"
+    "Fu\u0304": "\u1838\u1861"
+    "fu\u0304": "\u1838\u1861"
+    "Fu\u0308": "\u1838\u1861"
+    "fu\u0308": "\u1838\u1861"
+    "Fu": "\u1838\u1860"
+    "fu": "\u1838\u1860"
+    "F": "\u1838"
+    "f": "\u1838"
+    "G\u0027": "\u186C"
+    "g\u0027": "\u186C"
+    "G": "\u1864"
+    "g": "\u1864"
+    "H\u0027": "\u186D"
+    "h\u0027": "\u186D"
+    "H": "\u1865"
+    "h": "\u1865"
+    "Jy": "\u1877\u1873"
+    "jy": "\u1872\u1873"
+    "J": "\u1835"
+    "j": "\u1835"
+    "K\u0027": "\u183A"
+    "k\u0027": "\u183A"
+    "K": "\u1874"
+    "k": "\u1874"
+    "L": "\u182F"
+    "l": "\u182F"
+    "M": "\u182E"
+    "m": "\u182E"
+    "NG": "\u1829"
+    "ng": "\u1829"
+    "N": "\u1828"
+    "n": "\u1828"
+    "P": "\u1866"
+    "p": "\u1866"
+    "R": "\u1875"
+    "r": "\u1875"
+    "Sy": "\u186E\u185F"
+    "sy": "\u186E\u185F"
+    "S\u030C": "\u1867"
+    "s\u030C": "\u1867"
+    "S": "\u1830"
+    "s": "\u1830"
+    "TS": "\u186E"
+    "Ts": "\u186E"
+    "ts": "\u186E"
+    "T": "\u1868"
+    "t": "\u1868"
+    "W": "\u1838"
+    "w": "\u1838"
+    "Y": "\u1836"
+    "y": "\u1836"
+    "ZH": "\u1877"
+    "Zh": "\u1877"
+    "zh": "\u1877"
+    "Z\u030C": "\u1870"
+    "z\u030C": "\u1870"
+    "Z": "\u1853"
+    "z": "\u1853"
+    "...": "\u1801"
+    "..": "\u1803"
+    ".": "\u180A"
+    ",": "\u1802"
+    ":": "\u1804"
+    # Left pointing double angle quotation mark
+    "\u003C\u003C": "\u300A"
+    # Right pointing double angle quotation mark
+    "\u003E\u003E": "\u300B"
+    "0": "\u1810"
+    "1": "\u1811"
+    "2": "\u1812"
+    "3": "\u1813"
+    "4": "\u1814"
+    "5": "\u1815"
+    "6": "\u1816"
+    "7": "\u1817"
+    "8": "\u1818"
+    "9": "\u1819"
+    
+script_to_roman:
+
+  map:
+    # Generates Narrow No-Break Space
+    "\u202F": "\u002D"
+    "\u1801": "..."
+    "\u1802": ","
+    "\u1803": ".."
+    "\u1804": ":"
+    "\u180A": "."
+    "\u180E": "-"
+    "\u1810": "0"
+    "\u1811": "1"
+    "\u1812": "2"
+    "\u1813": "3"
+    "\u1814": "4"
+    "\u1815": "5"
+    "\u1816": "6"
+    "\u1817": "7"
+    "\u1818": "8"
+    "\u1819": "9"
+    "\u1820": "a"
+    "\u1822\u1822": "i"
+    "\u1822": "i"
+    "\u1823": "o"
+    "\u1828": "n"
+    "\u1829": "ng"
+    "\u182A": "b"
+    "\u182E": "m"
+    "\u182F": "l"
+    "\u1830": "s"
+    "\u1834": "c"
+    "\u1835": "j"
+    "\u1836": "y"
+    "\u1838": "w"
+    "\u183A": "k\u0027"
+    "\u1853": "z"
+    "\u1856": "v"
+    "\u185D": "e"
+    "\u1860": "u"
+    "\u1861": "u\u0304"
+    "\u1862": "ng"
+    "\u1864": "g"
+    "\u1865": "h"
+    "\u1866": "p"
+    "\u1867": "s\u030C"
+    "\u1868": "t"
+    "\u1869": "d"
+    "\u186A": "j"
+    "\u186C": "g\u0027"
+    "\u186D": "h\u0027"
+    "\u186E\u185F": "sy"
+    "\u186E": "ts"
+    "\u186F": "dz"
+    "\u1870": "z\u030C"
+    "\u1871\u1873": "cy"
+    "\u1877\u1873": "jy"
+    "\u1873": "i"
+    "\u1874": "k"
+    "\u1875": "r"
+    "\u1876": "f"
+    "\u202F": "\u002D"
+    # Left pointing double angle quotation mark
+    "\u300A": "\u003C\u003C"
+    # Right pointing double angle quotation mark
+    "\u300B": " \u003E\u003E"

+ 1 - 1
scriptshifter/tables/data/mansi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mansi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mari_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mari (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/moldovan_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Moldovan (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mongolian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mongolian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mordvin_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mordvin (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/nenets_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Nenets (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/ossetic_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Ossetic (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/romani_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Romani (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/russian.yml

@@ -1,7 +1,7 @@
 general:
   name: Russian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/serbian.yml

@@ -1,7 +1,7 @@
 general:
   name: Serbian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/shor_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Shor (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/syriac_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Syriac (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tajik_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tajik (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tatar-kryashen_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tatar-Kryashen (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tatar_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tatar (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 275 - 0
scriptshifter/tables/data/tod_mongolian.yml

@@ -0,0 +1,275 @@
+---
+general:
+  name: Tod (Mongolian)
+  parents:
+    - _ignore_base
+  case_sensitive: false
+  
+roman_to_script:
+
+  map:
+    # Generates Narrow No-Break Space
+    "\u002Daca": "\u202F\u1820\u1854\u1820"
+    "\u002DA": "\u180E\u1820"
+    "\u002Da": "\u180E\u1820"
+    "A": "\u1820"
+    "a": "\u1820"
+    "\u002Dece": "\u202F\u1844\u1854\u1844"
+    "\u002DE": "\u180E\u1844"
+    "\u002De": "\u180E\u1844"
+    # Generates Narrow No-Break Space
+    "\u002D": "\u202F"
+    "E": "\u1844"
+    "e": "\u1844"
+    "I": "\u1845"
+    "i": "\u1845"
+    "O\u0308": "\u1848"
+    "o\u0308": "\u1848"
+    "O": "\u1846"
+    "o": "\u1846"
+    "U\u0308": "\u1849"
+    "u\u0308": "\u1849"
+    "U": "\u1847"
+    "u": "\u1847"
+    # n followed by a g with dot
+    "ng\u0307": "\u1828\u184E"
+    # this conversion shouldn't be needed, but does no harm
+    "nG": "\u184A"
+    "ng": "\u184A"
+    "N\u0303": "\u185B"
+    "n\u0303": "\u185B"
+    "N": "\u1828"
+    "n": "\u1828"
+    "B": "\u184B"
+    "b": "\u184B"
+    "P": "\u184C"
+    "p": "\u184C"
+    "Q": "\u184E"
+    "q": "\u184E"
+    "KH": "\u183B"
+    "Kh": "\u183B"
+    # this conversion shouldn't be needed, but does no harm
+    "kH": "\u183B"
+    "kh": "\u183B"
+    "K\u0307": "\u1857"
+    "k\u0307": "\u1857"
+    "Ka": "\u1857\u1820"
+    "ka": "\u1857\u1820"
+    "Ke": "\u184D\u1844"
+    "ke": "\u184D\u1844"
+    "Ki": "\u184D\u1845"
+    "ki": "\u184D\u1845"
+    "Ko\u0308": "\u184D\u1848"
+    "ko\u0308": "\u184D\u1848"
+    "Ko": "\u1857\u1846"
+    "ko": "\u1857\u1846"
+    "Ku\u0308": "\u184D\u1849"
+    "ku\u0308": "\u184D\u1849"
+    "Ku": "\u1857\u1847"
+    "ku": "\u1857\u1847"
+    "K": "\u1857"
+    "k": "\u1857"
+    "G\u0307": "\u184E"
+    "g\u0307": "\u184E"
+    "G": "\u184E"
+    "g": "\u184E"
+    "M": "\u184F"
+    "m": "\u184F"
+    "LH": "\u1840"
+    "Lh": "\u1840"
+    # this conversion shouldn't be needed, but does no harm
+    "lH": "\u1840"
+    "lh": "\u1840"
+    "L": "\u182F"
+    "l": "\u182F"
+    "TS\u0307": "\u1854"
+    # this conversion shouldn't be needed, but does no harm
+    "Ts\u0307": "\u1854"
+    # this conversion shouldn't be needed, but does no harm
+    "tS\u0307": "\u1854"
+    "ts\u0307": "\u1854"
+    "S\u0301": "\u1831"
+    "s\u0301": "\u1831"
+    "S": "\u1830"
+    "s": "\u1830"
+    "T": "\u1850"
+    "t": "\u1850"
+    "D": "\u1851"
+    "d": "\u1851"
+    "J\u0301": "\u185A"
+    "j\u0301": "\u185A"
+    "J": "\u1853"
+    "j": "\u1853"
+    "Y": "\u1855"
+    "y": "\u1855"
+    "V": "\u1856"
+    "v": "\u1856"
+    "W": "\u1856"
+    "w": "\u1856"
+    "F": "\u1839"
+    "f": "\u1839"
+    "Xa": "\u184D\u1820"
+    "xa": "\u184D\u1820"
+    "Xe": "\u184D\u1844"
+    "xe": "\u184D\u1844"
+    "Xi": "\u184D\u1845"
+    "xi": "\u184D\u1845"
+    "Xo\u0308": "\u184D\u1848"
+    "xo\u0308": "\u184D\u1848"
+    "Xo": "\u184D\u1846"
+    "xo": "\u184D\u1846"
+    "Xu\u0308": "\u184D\u1849"
+    "xu\u0308": "\u184D\u1849"
+    "Xu": "\u184D\u1847"
+    "xu": "\u184D\u1847"
+    "X": "\u184D"
+    "x": "\u184D"
+    "Z\u0301": "\u183F"
+    "z\u0301": "\u183F"
+    "ZR": "\u183F"
+    # this conversion shouldn't be needed, but does no harm
+    "Zr": "\u183F"
+    # this conversion shouldn't be needed, but does no harm
+    "zR": "\u183F"
+    "zr": "\u183F"
+    "R": "\u1837"
+    "r": "\u1837"
+    "ZH": "\u1841"
+    "Zh": "\u1841"
+    # this conversion shouldn't be needed, but does no harm
+    "zH": "\u1841"
+    "zh": "\u1841"
+    "CH": "\u1842"
+    "Ch": "\u1842"
+    # this conversion shouldn't be needed, but does no harm
+    "cH": "\u1842"
+    "ch": "\u1842"
+    "C\u0307": "\u1878"
+    "c\u0307": "\u1878"
+    "C\u0301": "\u183C"
+    "c\u0301": "\u183C"
+    "C": "\u1852"
+    "c": "\u1852"
+    "H": "\u183E"
+    "h": "\u183E"
+    "Z": "\u1834"
+    "z": "\u1834"
+    "...": "\u1801"
+    "..": "\u1803"
+    ".": "\u180A"
+    ",": "\u1802"
+    ":": "\u1804"
+    # Left pointing double angle quotation mark
+    "\u003C\u003C": "\u300A"
+    # Right pointing double angle quotation mark
+    "\u003E\u003E": "\u300B"
+    "0": "\u1810"
+    "1": "\u1811"
+    "2": "\u1812"
+    "3": "\u1813"
+    "4": "\u1814"
+    "5": "\u1815"
+    "6": "\u1816"
+    "7": "\u1817"
+    "8": "\u1818"
+    "9": "\u1819"
+    "\u0304": "\u1843"
+
+script_to_roman:
+
+  map:
+    "\u184E\u1820": "g\u0307a"
+    "\u184E\u1846": "g\u0307o"
+    "\u184E\u1847": "g\u0307u"
+    "\u1820\u184E": "aq"
+    "\u1846\u184E": "oq"
+    "\u1847\u184E": "uq"
+    "\u184E\u1844": "ge"
+    "\u184E\u1845": "gi"
+    "\u184E\u1848": "go\u0308"
+    "\u184E\u1849": "gu\u0308"
+    "\u1844\u184E": "eq"
+    "\u1845\u184E": "iq"
+    "\u1848\u184E": "o\u0308q"
+    "\u1849\u184E": "u\u0308q"
+    "\u184D\u1820": "xa"
+    "\u184D\u1846": "xo"
+    "\u184D\u1847": "xu"
+    "\u184D\u1844": "ke"
+    "\u184D\u1845": "ki"
+    "\u184D\u1848": "ko\u0308"
+    "\u184D\u1849": "ku\u0308"
+    "\u180E\u1820": "\u002Da"
+    "\u180E\u1844": "\u002De"
+    "\u180E\u1845": "U002Di"
+    "\u180E": "\u002D"
+    "\u202F": "\u002D"
+    "\u1801": "..."
+    "\u1803": "."
+    "\u1802": ","
+    "\u1804": ":"
+    "\u1810": "0"
+    "\u1811": "1"
+    "\u1812": "2"
+    "\u1813": "3"
+    "\u1814": "4"
+    "\u1815": "5"
+    "\u1816": "6"
+    "\u1817": "7"
+    "\u1818": "8"
+    "\u1819": "9"
+    "\u1820": "a"
+    "\u1828": "n"
+    "\u182F": "l"
+    "\u1830": "s"
+    "\u1831": "s\u0301"
+    "\u1834": "z"
+    "\u1837": "r"
+    "\u1839": "f"
+    "\u183C": "c\u0301"
+    "\u183E": "h"
+    "\u183F": "z\u0301"
+    "\u1843": "\u0304"
+    "\u1844": "e"
+    "\u1845": "i"
+    "\u1846": "o"
+    "\u1847": "u"
+    "\u1848": "o\u0308"
+    "\u1849": "u\u0308"
+    "\u184A": "ng"
+    "\u184B": "b"
+    "\u184C": "p"
+    "\u184D\u1820": "xa"
+    "\u184D\u1844": "xe"
+    "\u184D\u1845": "xi"
+    "\u184D\u1848": "xo\u0308"
+    "\u184D\u1846": "xo"
+    "\u184D\u1849": "xu\u0308"
+    "\u184D\u1847": "xu"
+    "\u184D": "q"
+    "\u184E": "g"
+    "\u184F": "m"
+    "\u1850": "t"
+    "\u1851": "d"
+    "\u1852": "c"
+    "\u1853": "j"
+    "\u1854": "ts"
+    "\u1855": "y"
+    "\u1856": "v"
+    "\u1857\u1820": "ka"
+    "\u1857\u1844": "ke"
+    "\u1857\u1845": "ki"
+    "\u1857\u1846": "ko"
+    "\u1857\u1847": "ku"
+    "\u1857\u1848": "ko\u0308"
+    "\u1857\u1849": "ku\u0308"
+    "\u1857": "k"
+    "\u1858": "g"
+    "\u1859": "h"
+    "\u185A": "j\u0301"
+    "\u185B": "k\u0307"
+    "\u185C": "j"
+    "\u00AB": "\u003C\u003C"
+    "\u00BB": "\u003E\u003E"
+    "\u300A": "\u0022"
+    "\u300B": "\u0022"

+ 1 - 1
scriptshifter/tables/data/turkmen_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Turkmen (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tuvinian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tuvinian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/udmurt_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Udmurt (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/uighur_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Uighur (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/ukrainian.yml

@@ -1,7 +1,7 @@
 general:
   name: Ukrainian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/uzbek_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Uzbek (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/yakut_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Yakut (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/yuit_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Yuit (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 33 - 14
scriptshifter/tables/index.yml

@@ -23,20 +23,20 @@ arabic:
 armenian:
   marc_code: arm
   name: Armenian
-asian_cyrillic:
-  description: >
-    Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza,
-    Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat,
-    Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz,
-    Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak,
-    Karelian, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp,
-    Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai,
-    Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor,
-    Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.
-  marc_code: >
-    abk, ady, alt, ava, bak, che, chv, dar, ale, esk, kbd, xal, krc, kaa,
-    krl, kom, kum, lez, lit, chm, nog, oss, rum, rom, sel, udm, sah
-  name: Asian Cyrillic
+# asian_cyrillic:
+#   description: >
+#     Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza,
+#     Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat,
+#     Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz,
+#     Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak,
+#     Karelian, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp,
+#     Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai,
+#     Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor,
+#     Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.
+#   marc_code: >
+#     abk, ady, alt, ava, bak, che, chv, dar, ale, esk, kbd, xal, krc, kaa,
+#     krl, kom, kum, lez, lit, chm, nog, oss, rum, rom, sel, udm, sah
+#   name: Asian Cyrillic
 assamese:
   name: assamese
 azerbaijani_cyrillic:
@@ -71,6 +71,17 @@ church_slavonic:
 chuvash_cyrillic:
   marc_code: chv
   name: Chuvash (Cyrillic)
+cyrillic_generic:
+  description: 'Multi-purpose transliteration for most languages that use the Cyrillic script:
+    Abaza, Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Belarusian, Bulgarian,
+    Buryat, Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz, Ingush,
+    Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak, Karelian, Khakass, Khanty,
+    Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp, Lezghian, Lithuanian, Macedonian, Mansi, Mari,
+    Moldovan, Molodstov, Mordvin, Nanai, Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany,
+    Russian, Selkup, Serbian, Shor, Tabasaran, Tat, Tuva, Udekhe, Udmurt, Ukrainian, Yakut.'
+  marc_code: abk, ady, alt, ava, bak, bel, bul, che, chm, chv, dar, ale, esk, kbd, xal, krc, kaa,
+    krl, kom, kum, lez, lit, mac, nog, oss, rum, rom, sah, sel, srp, udm, ukr
+  name: Cyrillic (Generic)
 devanagari:
   marc_code: hin, san
   name: Devanagari
@@ -167,6 +178,9 @@ macedonian:
 malayalam:
   marc_code: mal
   name: Malayalam
+manchu:
+  marc_code: mnc
+  name: Manchu
 mansi_cyrillic:
   name: Mansi (Cyrillic)
 marathi_devanagari:
@@ -266,6 +280,9 @@ thai:
 tibetan:
   marc_code: tib
   name: Tibetan
+tod_mongolian:
+  marc_code: xal
+  name: Tod Mongolian
 turkmen_cyrillic:
   marc_code: tuk
   name: Turkmen (Cyrillic)
@@ -298,3 +315,5 @@ yiddish:
   name: Yiddish
 yuit_cyrillic:
   name: Yuit (Cyrillic)
+  
+

+ 0 - 9
scriptshifter/tools.py

@@ -1,9 +0,0 @@
-__doc__ = """ Common tools for core and hooks. """
-
-
-def capitalize(src):
-    """ Only capitalize first word and words preceded by space."""
-    orig_ls = src.split(" ")
-    cap_ls = [orig[0].upper() + orig[1:] for orig in orig_ls]
-
-    return " ".join(cap_ls)

+ 10 - 3
scriptshifter/trans.py

@@ -113,7 +113,7 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
             )
 
         # Normalize case before post_config and rule-based normalization.
-        if not ctx.general["case_sensitive"]:
+        if t_dir == FEAT_R2S and not ctx.general["case_sensitive"]:
             ctx._src = ctx.src.lower()
 
         # This hook may take over the whole transliteration process or delegate
@@ -271,7 +271,10 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
                     # A match is found. Stop scanning tokens, append result,
                     # and proceed scanning the source.
 
-                    # Capitalization.
+                    # Capitalization. This applies double capitalization
+                    # rules. The external function in
+                    # scriptshifter.tools.capitalize used for non-table
+                    # languages does not.
                     if (
                         (ctx.options["capitalize"] == "first" and ctx.cur == 0)
                         or
@@ -348,7 +351,11 @@ def _normalize_src(ctx, norm_rules):
     # In using diacritics, LC standards prefer the decomposed form (combining
     # diacritic + base character) to the pre-composed form (single Unicode
     # symbol for the letter with diacritic).
-    ctx._src = precomp_normalize("NFD", ctx.src)
+    #
+    # Note: only safe for R2S.
+    if ctx.t_dir == FEAT_R2S:
+        logger.debug("Normalizing pre-composed symbols.")
+        ctx._src = precomp_normalize("NFD", ctx.src)
 
     for nk, nv in norm_rules.items():
         ctx._src = ctx.src.replace(nk, nv)

+ 176 - 186
test/data/script_samples/sea.csv

@@ -1,187 +1,177 @@
-"assamese","Nāthadharma Nāthayogī, Trailokya Mohana Nātha","নাথধৰ্ম নাথয়োগী, ত্ৰৈলোক্য মোহন নাথ","r2s"
-"assamese","Nadīdvīpa Mājulī : Mājulī samvandhīẏa ekhana aitihāsika grantha","নদীদ্vঈপ মাজুলী : মাজুলী সম্vঅন্ধীয় এখন ঐতিহাসিক গ্ৰন্থ","r2s"
-"assamese","Ḍa. Himanta Biśva Śarmā Carakārara aniruddha yātrāra duṭā bachara","ড হিমন্ত বিশ্vঅ শৰ্মা চৰকাৰৰ অনিৰুদ্ধ যাত্ৰাৰ দুটা বছৰ","r2s"
-"assamese","ādhunika asamīẏā sāhityara camu ābhāsa","আধুনিক অসমীয়া সাহিত্যৰ চমু আভাস","r2s"
-"assamese","svādhikāra spr̥hā : ḍimācā jātisattāra ātmapratishṭhāra saṃgrāma","স্vআধিকাৰ স্পৃহা : ডিমাচা জাতিসত্তাৰ আত্মপ্ৰতিষ্ঠাৰ সঙ্গ্ৰাম","r2s"
-"bengali","Duniẏā / Ābdullāha Āla Muktādira","দুনিয়া / আব্দুল্লাহ আল মুক্তাদির","r2s"
-"bengali","kaẏekajana ardheka mānusha / rāsela rāẏahāna","কয়েকজন অর্ধেক মানুষ / রাসেল রায়হান","r2s"
-"bengali","māẏāphulera bana / hāsāna māhabuba","মায়াফুলের বন / হাসান মাহবুব","r2s"
-"bengali","deśabhāga o udvāstujībanera galpa / sampādanā, hāmida kāẏasāra","দেশভাগ ও উদ্বাস্তুজীবনের গল্প / সম্পাদনা, হামিদ কায়সার","r2s"
-"bengali","sileṭera tāmraśāsana","সিলেটের তাম্রশাসন","r2s"
-"bengali","Prabāsī hāoẏā","প্রবাসী হাওয়া","r2s"
-"bengali","jātīẏa granthapañjī, bāṃlā bibhāga","জাতীয় গ্রন্থপঞ্জী, বাংলা বিভাগ","r2s"
-"bengali","bhānu samagra, bhānu bandyopādhyāẏera samasta racanā o dushprāpya ārṭapleṭa","ভানু সমগ্র, ভানু বন্দ্যোপাধ্যায়ের সমস্ত রচনা ও দুষ্প্রাপ্য আর্টপ্লেট","r2s"
-"bengali","tathẏera āloke rāja āmale tripurā, prathama o dvitīẏa khaṇḍa ekatre","তথ্যের আলোকে রাজ আমলে ত্রিপুরা, প্রথম ও দ্বিতীয় খণ্ড একত্রে","r2s"
-"bengali","pratna-ratna bhāṇḍāra pilāka","প্রত্ন-রত্ন ভাণ্ডার পিলাক","r2s"
-"bengali","bimala caudhurīra śreshṭha galpa","বিমল চৌধুরীর শ্রেষ্ঠ গল্প","r2s"
-"bengali","Tripurāra rājanīti o tāra ghaṭanāprabāha","ত্রিপুরার রাজনীতি ও তার ঘটনাপ্রবাহ","r2s"
-"bengali","kr̥shṇapakshera kālo megha","কৃষ্ণপক্ষের কালো মেঘ","r2s"
-"dogri_devanagari","Chandamukta ḍogarī kavitā : śilpa te śailī","छंदमुक्त डोगरी कविता : शिल्प ते शैली","r2s"
-"dogri_devanagari","ḍogarī de trai muṇḍhale nāṭaka","डोगरी दे त्रै मुंढले नाटक","r2s"
-"dogri_devanagari","che rūpaka : ḍogarī bhāśā, sāhitya te saṃskr̥ti para","छे रूपक : डोगरी भाशा, साहित्य ते संस्कृति पर","r2s"
-"dogri_devanagari","nāṭaka rāheṃ bhāśāvijñāna","नाटक राहें भाशाविज्ञान","r2s"
-"dogri_devanagari","jammū pādaśahī : jammū dā sampūrṇa itehāsa","जम्मू पादशाही : जम्मू दा सम्पूर्ण इतेहास","r2s"
-"gujarati","Kacchadharānī vismr̥ta virāsata Selora-Vāva sthāpatya","કચ્છધરાની વિસ્મૃત વિરાસત સેલોર-વાવ સ્થાપત્ય","r2s"
-"gujarati","bhāratīya kalāsaundaryanā upāsaka śrī vāsudeva smārta","ભારતીય કલાસૌન્દર્યના ઉપાસક શ્રી વાસુદેવ સ્માર્ત","r2s"
-"gujarati","Jagadguru bhagavāna śaṅkarācārya praṇīta saundaryalaharīnāṃ mantra-rahasyo","જગદ્ગુરુ ભગવાન શઙ્કરાચાર્ય પ્રણીત સૌન્દર્યલહરીનાં મન્ત્ર-રહસ્યો","r2s"
-"gujarati","sarvocca sanmānita sāhityasarjako","સર્વોચ્ચ સન્માનિત સાહિત્યસર્જકો","r2s"
-"gujarati","maherajātinī lokasaṃskr̥ti ane kalāo ","મહેરજાતિની લોકસંસ્કૃતિ અને કલાઓ","r2s"
-"hindi","iśqa meṃ māṭī sonā ","इश्क़ में माटी सोना","r2s"
-"hindi","yātrā sāhitya vidhā : śāstra aura itihāsa ","यात्रा साहित्य विधा : शास्त्र और इतिहास ","r2s"
-"hindi","mānavādhikāra aura samakālīna kavitā","मानवाधिकार और समकालीन कविता","r2s"
-"hindi","hindī kī pahalī ādhunika kavitā : pāṭha evaṃ mūlyāṅkana","हिन्दी की पहली आधुनिक कविता : पाठ एवं मूल्याङ्कन","r2s"
-"hindi","adhyāpaka, adhyāpana, aura adhyāpaka śikshā : nītiyām̐, bahaseṃ, aura anubhava","अध्यापक, अध्यापन, और अध्यापक शिक्षा : नीतिाँ, बहसें, और अनुभव","r2s"
-"hindi","sāṭhottarī hindī kavitā meṃ sāṃskr̥tika-cetanā","साठोत्तरी हिन्दी कविता में सांस्कृतिक-चेतना","r2s"
-"hindi","saṅgīta kī devī latā maṅgeśakara kā rūhānī ahasāsa","संगीत की देवी लता मंगेशकर का रूहानी अहसास","r2s"
-"hindi","prācīna bhārata meṃ kr̥shi evaṃ jala saṃsādhana","प्राचीन भारत में कृषि एवं जल संसाधन","r2s"
-"hindi","jilā starīya loka śikāyata nivāraṇa tantra : saṃracanā, vyavahāra evaṃ cunautiyām̐","जिला स्तरीय लोक शिकायत निवारण तन्त्र : संरचना, व्यवहार एवं चुनौतियाँ","r2s"
-"hindi","kaśmīra kā sāṃskr̥tika avabodha aura samakālīna vimarśa","कश्मीर का सांस्कृतिक अवबोध और समकालीन विमर्श","r2s"
-"hindi","antima daśaka kī hindī kahāniyām̐ : parivāra, nārī, dāmpatya jīvana aura mānavīya sambandha ke viśesha sandarbha meṃ...","अन्तिम दशक की हिन्दी कहानियाँ : परिवार, नारी, दाम्पत्य जीवन और मानवीय सम्बन्ध के विशेष सन्दर्भ में","r2s"
-"hindi","naī sadī kī lekhikāoṃ kī kahāniyoṃ meṃ citrita purusha pātra","नई सदी की लेखिकाओं की कहानियों में चित्रित पुरुष पात्र","r2s"
-"hindi","kedāranātha siṃha : kavitā ke deśa meṃ ","केदारनाथ सिंह : कविता के देश में","r2s"
-"hindi","grāmīṇa pariveśa aura śivamūrti kā racanā saṃsāra","ग्रामीण परिवेश और शिवमूर्ति का रचना संसार","r2s"
-"hindi","lokaraṅga : mahārāshṭra kī lokakathāeṃ evaṃ saṃskr̥ti- eka paricaya","लोकरंग : महाराष्ट्र की लोककथाएं एवं संस्कृति- एक परिचय","r2s"
-"hindi","maujūdā samāja evaṃ mīḍiyā meṃ bāla mānasa","मौजूदा समाज एवं मीडिया में बाल मानस","r2s"
-"hindi","bhāratīya samāja meṃ mahilā saśaktīkaraṇa sampūrṇa samāja kā utthāna","भारतीय समाज में महिला सशक्तीकरण सम्पूर्ण समाज का उत्थान","r2s"
-"hindi","rājā mahendra pratāpa siṃha krāntigāthā samagra","राजा महेन्द्र प्रताप सिंह क्रान्तिगाथा समग्र","r2s"
-"hindi","bhārata meṃ kisāna āndolana aura usake netā","भारत में किसान आन्दोलन और उसके नेता","r2s"
-"hindi","kalākāroṃ kī nazara meṃ kāśī ke ghāṭa","कलाकारों की नज़र में काशी के घाट","r2s"
-"hindi","अन्तिम दशक की हिन्दी कहानियाँ : परिवार, नारी, दाम्पत्य जीवन और मानवीय सम्बन्ध के विशेष सन्दर्भ में","antima daśaka kī hindī kahāniyām̐ : parivāra, nārī, dāmpatya jīvana aura mānavīya sambandha ke viśesha sandarbha meṃ","s2r"
-"hindi","नई सदी की लेखिकाओं की कहानियों में चित्रित पुरुष पात्र","na sadī kī lekhikāoṃ kī kahāniyoṃ meṃ citrita purusha pātra","s2r"
-"hindi","केदारनाथ सिंह : कविता के देश में","kedāranātha siṃha : kavitā ke deśa meṃ","s2r"
-"hindi","ग्रामीण परिवेश और शिवमूर्ति का रचना संसार","grāmīṇa pariveśa aura śivamūrti kā racanā saṃsāra","s2r"
-"hindi","लोकरंग : महाराष्ट्र की लोककथाएं एवं संस्कृति- एक परिचय","lokaraṅga : mahārāṭra kī lokakathāeṃ evaṃ saṃskr̥ti- eka paricaya","s2r"
-"hindi","मौजूदा समाज एवं मीडिया में बाल मानस","maujūdā samāja evaṃ mīḍiyā meṃ bāla mānasa","s2r"
-"hindi","भारतीय समाज में महिला सशक्तीकरण सम्पूर्ण समाज का उत्थान","bhāratīya samāja meṃ mahilā saśaktīkaraṇa sampūrṇa samāja kā utthāna","s2r"
-"hindi","राजा महेन्द्र प्रताप सिंह क्रान्तिगाथा समग्र","rājā mahendra pratāpa siṃha krāntigāthā samagra","s2r"
-"hindi","भारत में किसान आन्दोलन और उसके नेता","bhārata meṃ kisāna āndolana aura usake netā","s2r"
-"hindi","कलाकारों की नज़र में काशी के घाट","kalākāroṃ kī nazara meṃ kāśī ke ghāṭa","s2r"
-"kannada","arebhāṣe-kannaḍa gādegaḷa kōśa","ಅರೆಭಾಷೆ-ಕನ್ನಡ ಗಾದೆಗಳ ಕೋಶ","r2s"
-"kannada","byāri-kannaḍa gāde, nuḍigaṭṭu mattu ogaṭugaḷa kōśa","ಬ್ಯಾರಿ-ಕನ್ನಡ ಗಾದೆ, ನುಡಿಗಟ್ಟು ಮತ್ತು ಒಗಟುಗಳ ಕೋಶ","r2s"
-"kannada","śō śuḍ go ān je. lōkēś","ಶೋ ಶುಡ್ ಗೊ ಆನ್ ಜೆ. ಲೋಕೇಶ್","r2s"
-"kannada","saṅgamēśara śr̥ṇgāra śāyiri","ಸಂಗಮೇಶರ ಶೃಂಗಾರ ಶಾಯಿರಿ","r2s"
-"kannada","neladāseya nakṣatragaḷu","ನೆಲದಾಸೆಯ ನಕ್ಷತ್ರಗಳು","r2s"
-"kannada","mahādāni doḍḍabasappa (appaṇṇa) baśeṭṭeppa taṭṭi dampatigaḷa sārthaka baduku","ಮಹಾದಾನಿ ದೊಡ್ಡಬಸಪ್ಪ (ಅಪ್ಪಣ್ಣ) ಬಶೆಟ್ಟೆಪ್ಪ ತಟ್ಟಿ ದಂಪತಿಗಳ ಸಾರ್ಥಕ ಬದುಕು","r2s"
-"kannada","malenāḍina yakṣacētanagaḷu","ಮಲೆನಾಡಿನ ಯಕ್ಷಚೇತನಗಳು","r2s"
-"kannada","ceñcu buḍakaṭṭu saṃskr̥ti","ಚೆಂಚು ಬುಡಕಟ್ಟು ಸಂಸ್ಕೃತಿ","r2s"
-"kannada","hāralu āgasa, ījalu sāgara","ಹಾರಲು ಆಗಸ, ಈಜಲು ಸಾಗರ","r2s"
-"kannada","karunāḍa siḍilu beḷavāḍi rāṇi mallamma","ಕರುನಾಡ ಸಿಡಿಲು ಬೆಳವಾಡಿ ರಾಣಿ ಮಲ್ಲಮ್ಮ","r2s"
-"malayalam","തെരഞ്ഞെടുത്ത പ്രബന്ധങ്ങൾ","teraññeṭutta pr̲abandhaṅṅaḷ",
-"malayalam","മലയാളഭാഷയുടെ അടിവേരുകൾ","malayāḷabhāṣayuṭe aṭivērukaḷ",
-"malayalam","ഒരു നീണ്ട രാത്രിയുടെ ഓർമ്മക്കായി","Oru nīṇṭa rātr̲iyuṭe ōrmmaykkāyi",
-"malayalam","അക്കപ്പോരിന്റെ ഇരുപതു നസ്രാണി വർഷങ്ങൾ","akkappōrint̲e irupat nasrāṇi varṣaṅṅaḷ",
-"malayalam","പെണ്ണും ചെറുക്കനും","peṇṇuṃ cer̲ukkanuṃ",
-"malayalam","വൈശികതന്ത്ര്̲അ / വ്യാഖ്യാനം, എൻ. ഗോപിനാഥന്നായർ","Vaiśikatantr̲aṃ / vyākhyānaṃ, en. gōpināthannāyar",
-"malayalam","നാരായണഗുരു : ചിതർ̲ഇയ ആൾക്കണ്ണാടി / ബിനീഷ് പുതുപ്പണṃ","Nārāyaṇaguru : citar̲iya āḷkkaṇṇāṭi / Binīṣ Putuppaṇaṃ",
-"malayalam","ബുദ്ധിജീവികളെക്കൊണ്ടു് എന്തുപ്ര്̲അയോജനṃ? / സക്കർ̲ഇയ","Buddhijīvikaḷekkoṇṭȧ entupr̲ayōjanaṃ? /  Sakkar̲iya",
-"malayalam","മലയാളപ്പച്ച : കെർ̄അളത്തിന്ത്̲എ ഋതുഭംഗികളിലൂടെ ഒരു യാത്ര്̲അ / പി. സുരേന്ദ്രൻ","Malayāḷappacca : Ker̄aḷattint̲e r̥tubhaṅgikaḷilūṭe oru yātr̲a / Pi. Surēndran",
-"malayalam","തെരഞ്ഞെടുത്ത പ്രബന്ധങ്ങൾ","teraññeṭutta pr̲abandhaṅṅaḷ",
-"malayalam","മലയാളഭാഷയുടെ അടിവേരുകൾ","malayāḷabhāṣayuṭe aṭivērukaḷ",
-"malayalam","ഒരു നീണ്ട രാത്രിയുടെ ഓർമ്മക്കായി","Oru nīṇṭa rātr̲iyuṭe ōrmmaykkāyi",
-"malayalam","അക്കപ്പോരിന്റെ ഇരുപതു നസ്രാണി വർഷങ്ങൾ","akkappōrint̲e irupat nasrāṇi varṣaṅṅaḷ",
-"malayalam","പെണ്ണും ചെറുക്കനും","peṇṇuṃ cer̲ukkanuṃ",
-"malayalam","വൈശികതന്ത്രം /വ്യാഖ്യാനം, എൻ. ഗോപിനാഥൻനായർ","vaiśikatantraṃ / vyākhyānaṃ, en. gōpināthannāyar",
-"malayalam","നാരായണഗുരു: ചിതറിയ ആൽക്കണ്ണാടി / ബിനീഷ് പുതുപ്പണം","nārāyaṇaguru: citaṟiya ālkkaṇṇāṭi / binīṣ putuppaṇaṃ",
-"malayalam","ബുദ്ധിജീവികളെക്കൊണ്ടു് എന്തുപ്ര്̲അയോജനṃ? / സക്കർ̲ഇയ","Buddhijīvikaḷekkoṇṭȧ entupr̲ayōjanaṃ? / Sakkar̲iya",
-"malayalam","മലയാളപ്പച്ച : കെർ̄അളത്തിന്ത്̲എ ഋതുഭംഗികളിലൂടെ ഒരു യാത്ര്̲അ / പി. സുരേന്ദ്രൻ","Malayāḷappacca : Ker̄aḷattint̲e r̥tubhaṅgikaḷilūṭe oru yātr̲a / Pi. Surēndran",
-"marathi_devanagari","kshatriya marāṭhyāñcī 96 kuḷe / saṅkalana","क्षत्रिय मराठ्यांची ९६ कुळे / संकलन, अशोक सातपुते","r2s"
-"marathi_devanagari","mahārāshṭrāce śilpakāra ḍô bābāsāheba āmbeḍakara ","महाराष्ट्राचे शिल्पकार डॉ बाबासाहेब आंबेडकर ","r2s"
-"marathi_devanagari","anuvādātūna anusarjanākaḍe : sāhityika anuvādāce tantra ani mantra ","अनुवादातून अनुसर्जनाकडे : साहित्यिक अनुवादाचे तंत्र आणि मंत्र  ","r2s"
-"marathi_devanagari","āra e kumbhāra","आर ए कुंभार","r2s"
-"marathi_devanagari","thora itihāsa sãśodhaka rāvabahādūra dattātreya baḷavanta pārasanīsa :  caritra va kārya / ḍô. surendra śrīkr̥shṇa pārasanīsa","थोर इतिहास संशोधक रावबहादूर दत्तात्रेय बळवंत पारसनीस : चरित्र व कार्य / डॉ सुरेन्द्र श्रीकृष्ण पारसनीस","r2s"
-"marathi_devanagari","pūrvakāḷātīla shaṭkarmī-dvivedī ","पूर्वकाळातील षटकर्मी-द्विवेदी ","r2s"
-"marathi_devanagari","mehatā pabliśiṅga hāūsa yāñcī ","मेहता पब्लिशिंग हाऊस यांची","r2s"
-"marathi_devanagari","rājarshī śāhū chatrapatīñce ","राजर्षी शाहू छत्रपतींचे ","r2s"
-"marathi_devanagari","āmacyā lagnācẽ khatakhatẽ : dona aṅkī Koṅkaṇī nāṭaka","आमच्या लग्नाचें खतखतें : दोन अंकी कोंकणी नाटक","r2s"
+"assamese","nāthadharma nāthayogī, trailokya mohana nātha","নাথধর্ম নাথযোগী, ত্রৈলোক্য মোহন নাথ   ","r2s",,
+"assamese","nadīdvīpa mājulī : mājulī samvandhīẏa ekhana aitihāsika grantha","নবদ্বীপ মাজুলী : মাজুলি সম্বন্ধীয় এখন ঐতিহাসিক গ্রন্থ ","r2s",,
+"assamese","ḍa. himanta biśva śarmā carakārara aniruddha yātrāra duṭā bachara","ডাঃ হিমন্ত বিশ্ব শর্মা চৰকাৰৰ অনিৰুদ্ধ যাত্ৰাৰ দুটা বছৰ","r2s",,
+"assamese","ādhunika asamīẏā sāhityara camu ābhāsa","আধুনিক অসমীয়া সাহিত্যৰ চমু আভাস","r2s",,
+"assamese","svādhikāra spr̥hā : ḍimācā jātisattāra ātmapratishṭhāra saṃgrāma","স্বাধিকার স্পৃহা : ডিমাচা জাতিসত্তাৰ আত্মপ্ৰতিষ্ঠাৰ সংগ্রাম ","r2s",,
+"bengali","duniẏā / ābdullāha āla muktādira","দুনিয়া / আব্দুল্লাহ আল মুক্তাদির","r2s",,
+"bengali","kaẏekajana ardheka mānusha / rāsela rāẏahāna","কয়েকজন অর্ধেক মানুষ / রাসেল রায়হান","r2s",,
+"bengali","māẏāphulera bana / hāsāna māhabuba","মায়াফুলের বন / হাসান মাহবুব","r2s",,
+"bengali","deśabhāga o udvāstujībanera galpa / sampādanā, hāmida kāẏasāra","দেশভাগ ও উদ্বাস্তুজীবনের গল্প / সম্পাদনা, হামিদ কায়সার","r2s",,
+"bengali","sileṭera tāmraśāsana","সিলেটের তাম্রশাসন","r2s",,
+"bengali","Prabāsī hāoẏā","প্রবাসী হাওয়া","r2s","{""capitalize"": ""first""}",
+"bengali","jātīẏa granthapañjī, bāṃlā bibhāga","জাতীয় গ্রন্থপঞ্জী, বাংলা বিভাগ","r2s",,
+"bengali","bhānu samagra, bhānu bandyopādhyāẏera samasta racanā o dushprāpya ārṭapleṭa","ভানু সমগ্র, ভানু বন্দ্যোপাধ্যায়ের সমস্ত রচনা ও দুষ্প্রাপ্য আর্টপ্লেট","r2s",,
+"bengali","tathẏera āloke rāja āmale tripurā, prathama o dvitīẏa khaṇḍa ekatre","তথ্যের আলোকে রাজ আমলে ত্রিপুরা, প্রথম ও দ্বিতীয় খণ্ড একত্রে","r2s",,
+"bengali","pratna-ratna bhāṇḍāra pilāka","প্রত্ন-রত্ন ভাণ্ডার পিলাক","r2s",,
+"bengali","bimala caudhurīra śreshṭha galpa","বিমল চৌধুরীর শ্রেষ্ঠ গল্প","r2s",,
+"bengali","Tripurāra rājanīti o tāra ghaṭanāprabāha","ত্রিপুরার রাজনীতি ও তার ঘটনাপ্রবাহ","r2s","{""capitalize"": ""first""}",
+"bengali","kr̥shṇapakshera kālo megha","কৃষ্ণপক্ষের কালো মেঘ","r2s",,
+"dogri_devanagari","Chandamukta ḍogarī kavitā : śilpa te śailī","छंदमुक्त डोगरी कविता : शिल्प ते शैली","r2s",,
+"dogri_devanagari","ḍogarī de trai muṇḍhale nāṭaka","डोगरी दे त्रै मुंढले नाटक","r2s",,
+"dogri_devanagari","che rūpaka : ḍogarī bhāśā, sāhitya te saṃskr̥ti para","छे रूपक : डोगरी भाशा, साहित्य ते संस्कृति पर","r2s",,
+"dogri_devanagari","nāṭaka rāheṃ bhāśāvijñāna","नाटक राहें भाशाविज्ञान","r2s",,
+"dogri_devanagari","jammū pādaśahī : jammū dā sampūrṇa itehāsa","जम्मू पादशाही : जम्मू दा सम्पूर्ण इतेहास","r2s",,
+"gujarati","Kacchadharānī vismr̥ta virāsata Selora-Vāva sthāpatya","કચ્છધરાની વિસ્મૃત વિરાસત સેલોર-વાવ સ્થાપત્ય","r2s","{""capitalize"": ""first""}",
+"gujarati","bhāratīya kalāsaundaryanā upāsaka śrī vāsudeva smārta","ભારતીય કલાસૌન્દર્યના ઉપાસક શ્રી વાસુદેવ સ્માર્ત","r2s",,
+"gujarati","Jagadguru bhagavāna śaṅkarācārya praṇīta saundaryalaharīnāṃ mantra-rahasyo","જગદ્ગુરુ ભગવાન શઙ્કરાચાર્ય પ્રણીત સૌન્દર્યલહરીનાં મન્ત્ર-રહસ્યો","r2s","{""capitalize"": ""first""}",
+"gujarati","sarvocca sanmānita sāhityasarjako","સર્વોચ્ચ સન્માનિત સાહિત્યસર્જકો","r2s",,
+"gujarati","maherajātinī lokasaṃskr̥ti ane kalāo ","મહેરજાતિની લોકસંસ્કૃતિ અને કલાઓ","r2s",,
+"hindi_devanagari","iśqa meṃ māṭī sonā ","इश्क़ में माटी सोना","r2s",,
+"hindi_devanagari","yātrā sāhitya vidhā : śāstra aura itihāsa ","यात्रा साहित्य विधा : शास्त्र और इतिहास ","r2s",,
+"hindi_devanagari","mānavādhikāra aura samakālīna kavitā","मानवाधिकार और समकालीन कविता","r2s",,
+"hindi_devanagari","hindī kī pahalī ādhunika kavitā : pāṭha evaṃ mūlyāṅkana","हिन्दी की पहली आधुनिक कविता : पाठ एवं मूल्यांकन","r2s",,
+"hindi_devanagari","adhyāpaka, adhyāpana, aura adhyāpaka śikshā : nītiyām̐, bahaseṃ, aura anubhava","अध्यापक, अध्यापन, और अध्यापक शिक्षा : नीतियां, बहसें, और अनुभव","r2s",,
+"hindi_devanagari","sāṭhottarī hindī kavitā meṃ sāṃskr̥tika-cetanā","साठोत्तरी हिन्दी कविता में सांस्कृतिक-चेतना","r2s",,
+"hindi_devanagari","saṅgīta kī devī latā maṅgeśakara kā rūhānī ahasāsa","संगीत की देवी लता मंगेशकर का रूहानी अहसास","r2s",,
+"hindi_devanagari","prācīna bhārata meṃ kr̥shi evaṃ jala saṃsādhana","प्राचीन भारत में कृषि एवं जल संसाधन","r2s",,
+"hindi_devanagari","jilā starīya loka śikāyata nivāraṇa tantra : saṃracanā, vyavahāra evaṃ cunautiyām̐","जिला स्तरीय लोक शिकायत निवारण तन्त्र : संरचना, व्यवहार एवं चुनौतियाँ","r2s",,
+"hindi_devanagari","kaśmīra kā sāṃskr̥tika avabodha aura samakālīna vimarśa","कश्मीर का सांस्कृतिक अवबोध और समकालीन विमर्श","r2s",,
+"hindi_devanagari","antima daśaka kī hindī kahāniyām̐ : parivāra, nārī, dāmpatya jīvana aura mānavīya sambandha ke viśesha sandarbha meṃ","अन्तिम दशक की हिन्दी कहानियाँ : परिवार, नारी, दाम्पत्य जीवन और मानवीय सम्बन्ध के विशेष सन्दर्भ में","r2s",,"This had an ellipsis in the Roman part."
+"hindi_devanagari","naī sadī kī lekhikāoṃ kī kahāniyoṃ meṃ citrita purusha pātra","नई सदी की लेखिकाओं की कहानियों में चित्रित पुरुष पात्र","r2s",,
+"hindi_devanagari","kedāranātha siṃha : kavitā ke deśa meṃ ","केदारनाथ सिंह : कविता के देश में","r2s",,
+"hindi_devanagari","grāmīṇa pariveśa aura śivamūrti kā racanā saṃsāra","ग्रामीण परिवेश और शिवमूर्ति का रचना संसार","r2s",,
+"hindi_devanagari","lokaraṅga : mahārāshṭra kī lokakathāeṃ evaṃ saṃskr̥ti- eka paricaya","लोकरंग : महाराष्ट्र की लोककथाएं एवं संस्कृति- एक परिचय","r2s",,
+"hindi_devanagari","maujūdā samāja evaṃ mīḍiyā meṃ bāla mānasa","मौजूदा समाज एवं मीडिया में बाल मानस","r2s",,
+"hindi_devanagari","bhāratīya samāja meṃ mahilā saśaktīkaraṇa sampūrṇa samāja kā utthāna","भारतीय समाज में महिला सशक्तीकरण सम्पूर्ण समाज का उत्थान","r2s",,
+"hindi_devanagari","rājā mahendra pratāpa siṃha krāntigāthā samagra","राजा महेन्द्र प्रताप सिंह क्रान्तिगाथा समग्र","r2s",,
+"hindi_devanagari","bhārata meṃ kisāna āndolana aura usake netā","भारत में किसान आन्दोलन और उसके नेता","r2s",,
+"hindi_devanagari","kalākāroṃ kī nazara meṃ kāśī ke ghāṭa","कलाकारों की नज़र में काशी के घाट","r2s",,
+"hindi_devanagari","अन्तिम दशक की हिन्दी कहानियाँ : परिवार, नारी, दाम्पत्य जीवन और मानवीय सम्बन्ध के विशेष सन्दर्भ में","antima daśaka kī hindī kahāniyām̐ : parivāra, nārī, dāmpatya jīvana aura mānavīya sambandha ke viśesha sandarbha meṃ","s2r",,
+"hindi_devanagari","नई सदी की लेखिकाओं की कहानियों में चित्रित पुरुष पात्र","na sadī kī lekhikāoṃ kī kahāniyoṃ meṃ citrita purusha pātra","s2r",,
+"hindi_devanagari","केदारनाथ सिंह : कविता के देश में","kedāranātha siṃha : kavitā ke deśa meṃ","s2r",,
+"hindi_devanagari","ग्रामीण परिवेश और शिवमूर्ति का रचना संसार","grāmīṇa pariveśa aura śivamūrti kā racanā saṃsāra","s2r",,
+"hindi_devanagari","लोकरंग : महाराष्ट्र की लोककथाएं एवं संस्कृति- एक परिचय","lokaraṅga : mahārāshṭra kī lokakathāeṃ evaṃ saṃskr̥ti- eka paricaya","s2r",,
+"hindi_devanagari","मौजूदा समाज एवं मीडिया में बाल मानस","maujūdā samāja evaṃ mīḍiyā meṃ bāla mānasa","s2r",,
+"hindi_devanagari","भारतीय समाज में महिला सशक्तीकरण सम्पूर्ण समाज का उत्थान","bhāratīya samāja meṃ mahilā saśaktīkaraṇa sampūrṇa samāja kā utthāna","s2r",,
+"hindi_devanagari","राजा महेन्द्र प्रताप सिंह क्रान्तिगाथा समग्र","rājā mahendra pratāpa siṃha krāntigāthā samagra","s2r",,
+"hindi_devanagari","भारत में किसान आन्दोलन और उसके नेता","bhārata meṃ kisāna āndolana aura usake netā","s2r",,
+"hindi_devanagari","कलाकारों की नज़र में काशी के घाट","kalākāroṃ kī nazara meṃ kāśī ke ghāṭa","s2r",,
+"kannada","arebhāṣe-kannaḍa gādegaḷa kōśa","ಅರೆಭಾಷೆ-ಕನ್ನಡ ಗಾದೆಗಳ ಕೋಶ","r2s",,
+"kannada","byāri-kannaḍa gāde, nuḍigaṭṭu mattu ogaṭugaḷa kōśa","ಬ್ಯಾರಿ-ಕನ್ನಡ ಗಾದೆ, ನುಡಿಗಟ್ಟು ಮತ್ತು ಒಗಟುಗಳ ಕೋಶ","r2s",,
+"kannada","śō śuḍ go ān je. lōkēś","ಶೋ ಶುಡ್ ಗೊ ಆನ್ ಜೆ. ಲೋಕೇಶ್","r2s",,
+"kannada","saṅgamēśara śr̥ṇgāra śāyiri","ಸಂಗಮೇಶರ ಶೃಂಗಾರ ಶಾಯಿರಿ","r2s",,
+"kannada","neladāseya nakṣatragaḷu","ನೆಲದಾಸೆಯ ನಕ್ಷತ್ರಗಳು","r2s",,
+"kannada","mahādāni doḍḍabasappa (appaṇṇa) baśeṭṭeppa taṭṭi dampatigaḷa sārthaka baduku","ಮಹಾದಾನಿ ದೊಡ್ಡಬಸಪ್ಪ (ಅಪ್ಪಣ್ಣ) ಬಶೆಟ್ಟೆಪ್ಪ ತಟ್ಟಿ ದಂಪತಿಗಳ ಸಾರ್ಥಕ ಬದುಕು","r2s",,
+"kannada","malenāḍina yakṣacētanagaḷu","ಮಲೆನಾಡಿನ ಯಕ್ಷಚೇತನಗಳು","r2s",,
+"kannada","ceñcu buḍakaṭṭu saṃskr̥ti","ಚೆಂಚು ಬುಡಕಟ್ಟು ಸಂಸ್ಕೃತಿ","r2s",,
+"kannada","hāralu āgasa, ījalu sāgara","ಹಾರಲು ಆಗಸ, ಈಜಲು ಸಾಗರ","r2s",,
+"kannada","karunāḍa siḍilu beḷavāḍi rāṇi mallamma","ಕರುನಾಡ ಸಿಡಿಲು ಬೆಳವಾಡಿ ರಾಣಿ ಮಲ್ಲಮ್ಮ","r2s",,
+"malayalam","തെരഞ്ഞെടുത്ത പ്രബന്ധങ്ങൾ","teraññeṭutta pr̲abandhaṅṅaḷ",,,
+"malayalam","മലയാളഭാഷയുടെ അടിവേരുകൾ","malayāḷabhāṣayuṭe aṭivērukaḷ",,,
+"malayalam","ഒരു നീണ്ട രാത്രിയുടെ ഓർമ്മക്കായി","Oru nīṇṭa rātr̲iyuṭe ōrmmaykkāyi",,"{""capitalize"": ""first""}",
+"malayalam","അക്കപ്പോരിന്റെ ഇരുപതു നസ്രാണി വർഷങ്ങൾ","akkappōrint̲e irupat nasrāṇi varṣaṅṅaḷ",,,
+"malayalam","പെണ്ണും ചെറുക്കനും","peṇṇuṃ cer̲ukkanuṃ",,,
+"malayalam","വൈശികതന്ത്ര്̲അ / വ്യാഖ്യാനം, എൻ. ഗോപിനാഥന്നായർ","Vaiśikatantr̲aṃ / vyākhyānaṃ, en. gōpināthannāyar",,"{""capitalize"": ""first""}",
+"malayalam","നാരായണഗുരു : ചിതർ̲ഇയ ആൾക്കണ്ണാടി / ബിനീഷ് പുതുപ്പണṃ","nārāyaṇaguru : citar̲iya āḷkkaṇṇāṭi / binīṣ putuppaṇaṃ",,,
+"malayalam","ബുദ്ധിജീവികളെക്കൊണ്ടു് എന്തുപ്ര്̲അയോജനṃ? / സക്കർ̲ഇയ","buddhijīvikaḷekkoṇṭȧ entupr̲ayōjanaṃ? /  sakkar̲iya",,,
+"malayalam","മലയാളപ്പച്ച : കെർ̄അളത്തിന്ത്̲എ ഋതുഭംഗികളിലൂടെ ഒരു യാത്ര്̲അ / പി. സുരേന്ദ്രൻ","malayāḷappacca : ker̄aḷattint̲e r̥tubhaṅgikaḷilūṭe oru yātr̲a / pi. surēndran",,,
+"malayalam","തെരഞ്ഞെടുത്ത പ്രബന്ധങ്ങൾ","teraññeṭutta pr̲abandhaṅṅaḷ",,,
+"malayalam","മലയാളഭാഷയുടെ അടിവേരുകൾ","malayāḷabhāṣayuṭe aṭivērukaḷ",,,
+"malayalam","ഒരു നീണ്ട രാത്രിയുടെ ഓർമ്മക്കായി","Oru nīṇṭa rātr̲iyuṭe ōrmmaykkāyi",,"{""capitalize"": ""first""}",
+"malayalam","അക്കപ്പോരിന്റെ ഇരുപതു നസ്രാണി വർഷങ്ങൾ","akkappōrint̲e irupat nasrāṇi varṣaṅṅaḷ",,,
+"malayalam","പെണ്ണും ചെറുക്കനും","peṇṇuṃ cer̲ukkanuṃ",,,
+"malayalam","വൈശികതന്ത്രം /വ്യാഖ്യാനം, എൻ. ഗോപിനാഥൻനായർ","vaiśikatantraṃ / vyākhyānaṃ, en. gōpināthannāyar",,,
+"malayalam","നാരായണഗുരു: ചിതറിയ ആൽക്കണ്ണാടി / ബിനീഷ് പുതുപ്പണം","nārāyaṇaguru: citaṟiya ālkkaṇṇāṭi / binīṣ putuppaṇaṃ",,,
+"malayalam","ബുദ്ധിജീവികളെക്കൊണ്ടു് എന്തുപ്ര്̲അയോജനṃ? / സക്കർ̲ഇയ","buddhijīvikaḷekkoṇṭȧ entupr̲ayōjanaṃ? / sakkar̲iya",,,
+"malayalam","മലയാളപ്പച്ച : കെർ̄അളത്തിന്ത്̲എ ഋതുഭംഗികളിലൂടെ ഒരു യാത്ര്̲അ / പി. സുരേന്ദ്രൻ","malayāḷappacca : ker̄aḷattint̲e r̥tubhaṅgikaḷilūṭe oru yātr̲a / pi. surēndran",,,
+"marathi_devanagari","kshatriya marāṭhyāñcī 96 kuḷe / saṅkalana","क्षत्रिय मराठ्यांची ९६ कुळे / संकलन, अशोक सातपुते","r2s",,
+"marathi_devanagari","mahārāshṭrāce śilpakāra ḍô bābāsāheba āmbeḍakara ","महाराष्ट्राचे शिल्पकार डॉ बाबासाहेब आंबेडकर ","r2s",,
+"marathi_devanagari","anuvādātūna anusarjanākaḍe : sāhityika anuvādāce tantra ani mantra ","अनुवादातून अनुसर्जनाकडे : साहित्यिक अनुवादाचे तंत्र आणि मंत्र  ","r2s",,
+"marathi_devanagari","āra e kumbhāra","आर ए कुंभार","r2s",,
+"marathi_devanagari","thora itihāsa sãśodhaka rāvabahādūra dattātreya baḷavanta pārasanīsa :  caritra va kārya / ḍô. surendra śrīkr̥shṇa pārasanīsa","थोर इतिहास संशोधक रावबहादूर दत्तात्रेय बळवंत पारसनीस : चरित्र व कार्य / डॉ सुरेन्द्र श्रीकृष्ण पारसनीस","r2s",,
+"marathi_devanagari","pūrvakāḷātīla shaṭkarmī-dvivedī ","पूर्वकाळातील षटकर्मी-द्विवेदी ","r2s",,
+"marathi_devanagari","mehatā pabliśiṅga hāūsa yāñcī ","मेहता पब्लिशिंग हाऊस यांची","r2s",,
+"marathi_devanagari","rājarshī śāhū chatrapatīñce ","राजर्षी शाहू छत्रपतींचे ","r2s",,
+"marathi_devanagari","āmacyā lagnācẽ khatakhatẽ : dona aṅkī Koṅkaṇī nāṭaka","आमच्या लग्नाचें खतखतें : दोन अंकी कोंकणी नाटक","r2s",,
 "marathi_devanagari","saṅgīta sãśaya kallōḷa 
-raṅgamañca digdarśaka jayanta ","संगीत संशय कल्लोळ   रंगमंच दिग्दर्शक जयंत  ","r2s"
-"marathi_devanagari","samāja-sãvāda : viśvaśāntī ","समाज-संवाद : विश्वशांती","r2s"
-"marathi_devanagari","kr̥shṇakanyā ","कृष्णकन्या","r2s"
-"marathi_devanagari","sãvāda gānagurũ̄śi  / śabdāṅkana","संवाद  गानगुरुंशी / शब्दांकन","r2s"
-"marathi_devanagari","śrī gopīnātha taḷavalakara  ","श्री गोपीनाथ तळवलकर  ","r2s"
-"marathi_devanagari","śāntatā! korṭa cālū āhe!","शांतता! कोर्ट चालू आहे!","r2s"
-"marathi_devanagari","mr̥gayākutūhala, athavā, śrīmanta mahārāja gāyakavāḍa yāñcī paṭāita vāghācī pahilī śikāra  / vishṇu govinda cipaḷoṇakara  mumbaī  ","मृगयाकुतूहल, अथवा, श्रीमंत महाराज गायकवाड यांची पटाइत वाघाची पहिली शिकार / विष्णु गोविंद चिपळोणकर मुंबई  ","r2s"
-"marathi_devanagari","śrīmanta nāmadāra jagannātha śaṅkaraśeṭa urpha nānāśaṅkaraśeṭa hyāñcẽ caritra / vināyakarāva mādhavarāva pitaḷe","श्रीमंत नामदार जगन्नाथ शंकरशेट उर्फ नानाशंकरशेट ह्यांचे चरित्र / विनायकराव माधवराव पितळे","r2s"
-"marathi_devanagari","śantanurāva kirloskara, vyaktī āṇi kārya, 1903-2003","शंतनुराव किर्लोस्कर, व्यक्ती आणि कार्य, १९०३-२००३","r2s"
-"marathi_devanagari","dhārānr̥tya / maṅgeśa pāḍagāvakara","धारानृत्य / मंगेश पाडगांवकर","r2s"
-"marathi_devanagari","kshamā    vyaṅkojī      āṭhavaṇītalã","क्षमा  व्यंकोजी आठवणीतलं","r2s"
-"marathi_devanagari","ज्ञानेश्वर ते आंबेडकर /   डॉ  श्रीपाल सबनीस","jñāneśvara te āmbeḍakara / ḍô śrīpāla sabanīsa","s2r"
-"nepali_devanagari","nepālī saṃskr̥tiko vivecanā / vishṇu prabhāta","नेपाली संस्कृतिको विवेचना / विष्णु प्रभात","r2s"
-"nepali_devanagari","baiṅkiṅa anuśāsana : digo vikāsakā lāgi / bīena ghartī","बैंकिङ अनुशासन : दिगो विकासका लागि / बीएन घर्ती","r2s"
-"nepali_devanagari","nepālī laghukathākā pravr̥ttiharū / ḍā pushkararāja bhaṭṭa","नेपाली लघुकथाका प्रवृत्तिहरू / डा पुस्ह्करराज भट्ट","r2s"
-"nepali_devanagari","sirjanaśīla kiśora pahāḍī / sampādaka, ḍā. śekharakumāra śreshṭha","सिर्जनशील किशोर पहाडी / सम्पादक, डा शेखरकुमार श्रेष्ठ","r2s"
-"nepali_devanagari","viśveśvara prasāda koirālā : cetanā, cintana ra rājanīti / kr̥shṇa khanāla","विश्वेश्वर प्रसाद कोइराला : चेतना, चिन्तन र राजनीति / कृष्ण खनाल","r2s"
-"nepali_devanagari","rāshṭra-pararāshṭra : ekatantradekhi gaṇatantrasamma / ḍā. bheshabahādura thāpā, sāthamā, haribahādura thāpā","  राष्ट्र- परराष्ट्र : एकतन्त्रदेखि गणतन्त्रसम्म / डा भेषबहादुर थापा, साथमा, हरिबहादुर थापा","r2s"
-"nepali_devanagari","madhyayugīna maithilī nāṭya ra gīti kr̥tiharūko bhāshāvaijñānika varṇana-viśleshaṇa / prā ḍā rāmāvatāra yādava ; sampādaka dhīrendra premarshi","मध्ययुगीन मैथिली नाट्य र गीति कृतिहरूको भाषावैज्ञानिक वर्णन-विश्लेषण / प्रा डा रामावतार यादव ; सम्पादक धीरेन्द्र प्रेमर्षि","r2s"
-"nepali_devanagari","siddhi-setulī smr̥ti pratishṭhāna","सिद्धि-सेतुली स्मृति प्रतिष्ठान","r2s"
-"nepali_devanagari","nepālī krāntiko ān̐khījhyāla / kr̥shṇa kesī  kāṭhamāḍaum̐ : jāgaraṇa buka hāusa","नेपाली क्रान्तिको आँखीझ्याल / कृष्ण केसी काठमाडौँ : जागरण बुक हाउस","r2s"
-"nepali_devanagari","buddhakālīna samāja : pāli tripiṭakako sākshyamā / umākānta pauḍyāla    ","बुद्धकालीन समाज : पालि त्रिपिटकको साक्ष्यमा / उमाकान्त पौड्याल","r2s"
-"nepali_devanagari","pala palako mr̥tyu / saṃsmaraṇa / sudana kirātī   kāṭhamāḍauṃ : ghosṭa rāiṭiṅa nepāla","पल पलको मृत्यु / संस्मरण / सुदन किराती काठमाडौं : घोस्ट राइटिङ नेपाल","r2s"
-"nepali_devanagari","smr̥tibimba : paṇḍita ṭekanātha  bhaṭṭarāī /  sampādana, ḍā tulasī bhaṭṭarāī","स्मृतिबिम्ब : पण्डित टेकनाथ भट्टराई / सम्पादन, डा तुलसी भट्टराई","r2s"
-"nepali_devanagari","पल पलको मृत्यु / संस्मरण / सुदन किराती काठमाडौं : घोस्ट राइटिङ नेपाल","pala palako mr̥tyu / saṃsmaraṇa / sudana kirātī kāṭhamāḍauṃ : ghosṭa rāiṭiṅa nepāla","s2r"
-"nepali_devanagari","स्मृतिबिम्ब : पण्डित टेकनाथ भट्टराई","smr̥tibimba : paṇḍita ṭekanātha bhaṭṭarāī","s2r"
-"panjabi_gurmukhi","ਅਨੁਪਮ ਗਾਥਾ ਫਫੜੇ ਭਾਈ ਕੇ ਨਗਰ ਦੀ","Anupama gāthā Phaphaṛe Bhāī Ke nagara dī",
-"panjabi_gurmukhi","ਭਾਖਾ ਦੇ ਮਾਮਲਿਆਂ ਬਾਰੇ ਕੌਮਾਨ੍ਤਰੀ ਖੋਜ : ਮਾਤ ਭਾਖਾ ਖੋਲ੍ਹਦੀ ਏ ਸਿੱਖਿਆ, ਗਿਆਨ, ਅਤੇ ਅੰਗਰੇਜੀ ਦੇ ਬੂਹੇ","Bhākhā de māmaliāṃ bāre kaumāntarī khoja : ‡b māta bhākhā kholhadī e sikkhiā, giāna, ate Aṅgarejī de būhe",
-"panjabi_gurmukhi","ਗੁਰਬਾਣੀ ਦੀਆਂ ਗੁਹਜ ਰਮਜ਼ਾਂ : ਵਿਆਖਿਆ ਤੇ ਵਿਚਾਰ / ਪ੍ਰੋਫ਼ੇਸਰ ਰਾਮ ਸਿੰਘ","Gurabāṇī dīāṃ guhaja ramazāṃ : wiākhiā te wicāra / profesara rāma siṅgha",
-"panjabi_gurmukhi","ਮੁੱਢਲਾ ਪੰਜਾਬੀ ਨਾਵਲ : ਪੁਨਰ-ਮੁਲਾਙ੍ਕਣ : ਪ੍ਰਭਾ, ਪ੍ਰੇਮ ਲਗਨ ਅਤੇ ਵਗਦੀ ਸੀ ਰਾਵੀ ਦੇ ਪ੍ਰਸੰਗ ਵਿਚ / ਡਾ। ਰੇਖਾ ਰਾਣੀ","Muḍḍhalā Pañjābī nāwala : punara-mulāṅkaṇa : Prabhā, Prema lagana ate Wagadī sī Rāwī de prasaṅga wica / Ḍā. Rekhā Rāṇī.",
-"panjabi_gurmukhi","ਨੋ ਮੈਨਜ਼-ਲੈਣ੍ਡ ਤੋਂ ਸ਼ਕੀਲਾ... : ਗਲਪ ਰਚਨਾ","No mainaza-laiṇḍa toṃ Shakīlā... : galapa racanā",
-"panjabi_gurmukhi","ਮੈਂ, ਬੱਚਾ ਤੇ ਕੁੱਤਾ : ਘੁੰਮਦਿਆਂ ਫਿਰਦਿਆਂ","Maiṃ, baccā te kuttā : ‡b ghum̆madiāṃ phiradiāṃ",
-"panjabi_gurmukhi","ਇਤਿਹਾਸ ਬਾਬਾ ਬੁੱਡਾ ਜੀ","Itihāsa Bābā Buḍḍā jī",
-"panjabi_gurmukhi","ਆਸ਼ਿਕ ਅਜ਼ਾਦੀ ਦੇ : ਜੁਝਾਰੂਆਂ ਦੀ ਗਾਥਾ : ਪ੍ਰਸੰਗ ਅਤੇ ਵਾਰਾਂ","āshika azādī de : ‡b jujhārūāṃ dī gāthā : prasaṅga ate wārāṃ",
-"panjabi_gurmukhi","ਗ਼ਦਰੀ ਕਿਰਪਾ ਸਿੰਘ ਲੰਗ ਮਜਾਰੀ (ਮੀਰਪੁਰ) : ਜਿਹੜੇ ਝੁੱਕੇ ਨਹੀਂ","G̲h̲adarī Kirapā Siṅgha Laṅga Majārī (Mīrapura) : jihaṛe jhukke nahīṃ",
-"panjabi_gurmukhi","ਜੇਲ੍ਹ ਦੀਆਂ ਭੁੱਲੀਆਂ ਵਿੱਸਰੀਆਂ ਯਾਦਾਂ","Jelha dīāṃ bhullīāṃ wissarīāṃ yādāṃ",
-"panjabi_gurmukhi","ਅਨੁਪਮ ਗਾਥਾ ਫਫੜੇ ਭਾਈ ਕੇ ਨਗਰ ਦੀ","Anupama gāthā Phaphaṛe Bhāī Ke nagara dī",
-"panjabi_gurmukhi","ਭਾਖਾ ਦੇ ਮਾਮਲਿਆਂ ਬਾਰੇ ਕੌਮਾਨ੍ਤਰੀ ਖੋਜ : ਮਾਤ ਭਾਖਾ ਖੋਲ੍ਹਦੀ ਏ ਸਿੱਖਿਆ, ਗਿਆਨ, ਅਤੇ ਅੰਗਰੇਜੀ ਦੇ ਬੂਹੇ","Bhākhā de māmaliāṃ bāre kaumāntarī khoja : māta bhākhā kholhadī e sikkhiā, giāna, ate Aṅgarejī de būhe",
-"panjabi_gurmukhi","ਗੁਰਬਾਣੀ ਦੀਆਂ ਗੁਹਜ ਰਮਜ਼ਾਂ : ਵਿਆਖਿਆ ਤੇ ਵਿਚਾਰ / ਪ੍ਰੋਫ਼ੇਸਰ ਰਾਮ ਸਿੰਘ","Gurabāṇī dīāṃ guhaja ramazāṃ : wiākhiā te wicāra / profesara rāma siṅgha",
-"panjabi_gurmukhi","ਮੁੱਢਲਾ ਸਿੰਘ ਨਾਵਲ : ਪੁਨਰ- ਮੁਲਾਂਕਣ : ਪ੍ਰਭਾ, ਪ੍ਰੇਮ ਲਗਨ ਅਤੇ ਵਗਦੀ ਸੀ ਰਾਵੀ ਦੇ ਪ੍ਰਸੰਗ ਵਿਚ / ਡਾ. ਰੇਖਾ ਰਾਣੀ","Muḍḍhalā Pañjābī nāwala : punara-mulāṅkaṇa : Prabhā, Prema lagana ate Wagadī sī Rāwī de prasaṅga wica / ḍā. Rekhā Rāṇī.",
-"panjabi_gurmukhi","ਨੋ ਮੈਨਜ਼-ਲੈਂਡ ਤੋਂ ਸ਼ਕੀਲਾ... : ਗਲਪ ਰਚਨਾ","No mainaza-laiṇḍa toṃ Shakīlā... : galapa racanā",
-"panjabi_gurmukhi","ਮੈਂ, ਬੱਚਾ ਤੇ ਕੁੱਤਾ : ਘੁੰਮਦਿਆਂ ਫਿਰਦਿਆਂ","Maiṃ, baccā te kuttā : ghum̆madiāṃ phiradiāṃ",
-"panjabi_gurmukhi","ਇਤਿਹਾਸ ਬਾਬਾ ਬੁੱਡਾ ਜੀ","Itihāsa Bābā Buḍḍā jī",
-"panjabi_gurmukhi","ਆਸ਼ਿਕ ਅਜ਼ਾਦੀ ਦੇ : ਜੁਝਾਰੂਆਂ ਦੀ ਗਾਥਾ : ਪ੍ਰਸੰਗ ਅਤੇ ਵਾਰਾਂ","āshika azādī de : jujhārūāṃ dī gāthā : prasaṅga ate wārāṃ",
-"panjabi_gurmukhi","ਗ਼ਦਰੀ ਕਿਰਪਾ ਸਿੰਘ ਲੰਗ ਮਜਾਰੀ (ਮੀਰਪੁਰ) : ਜਿਹੜੇ ਝੁੱਕੇ ਨਹੀਂ","G̲h̲adarī Kirapā Siṅgha Laṅga Majārī (Mīrapura) : jihaṛe jhukke nahīṃ",
-"panjabi_gurmukhi","ਜੇਲ੍ਹ ਦੀਆਂ ਭੁੱਲੀਆਂ ਵਿੱਸਰੀਆਂ ਯਾਦਾਂ","Jelha dīāṃ bhullīāṃ wissarīāṃ yādāṃ",
-"sanskrit_devanagari","अद्वैतवेदान्ते श्री श्री सच्चिदानन्देन्द्रसरस्वतीमहास्वामिनां योगदानम्","advaitavedānte śrī śrī saccidānandendrasarasvatīmahāsvāmināṃ yogadānam",
-"sanskrit_devanagari","अनुसन्धानसम्पादनप्रविधिः","anusandhānasampādanapravidhiḥ",
-"sanskrit_devanagari","जयसिंहगुणवर्णनम्, अपरनाम, जयप्रशस्ति","jayasiṃhaguṇavarṇanam, aparanāma, jayapraśasti",
-"sanskrit_devanagari","भाष्यपरम्परा ज्ञानप्रवाहश्च","bhāṣyaparamparā jñānapravāhaśca",
-"sanskrit_devanagari","दशग्रीवराक्षसवधचरितम्","Daśagrīvarākṣasavadhacaritam",
-"sinhalese","සිංහල භා ෂා  විකා ශය සහ ශිලා ලේඛන විමර්ශන","siṃhala bhāṣā vikāśaya saha śilālēkhana vimarśana",
-"sinhalese","සමා ජ හා  මා නව විද්යා ත්මක ලිපි","Samāja hā mānava vidyātmaka lipi",
-"sinhalese","ගම් රටා ව : පැ රණි ගමක කෘ ෂි තා ක්ෂණික සමා ජ උරුමය","Gam raṭāva : păraṇi gamaka kr̥ṣi tākṣaṇika samāja urumaya ",
-"sinhalese","පෞ  ද්ගලික විශ්වවිද්යා ල: විලා සිතා ව හා යථා ර්ථය","Paudgalika viśvavidyāla : vilāsitāva hā yathārthaya",
-"sinhalese","ස්වයං චරිතා පදා නය : ආනන්දනීයමතක සටහන්","Svayaṃ caritāpadānaya : ānandanīyamataka saṭahan",
-"sinhalese","මා ධ්ය විමර්ශන හා 2010 ජනා ධිපතිවරණ ඇගයීම","Mādhya vimarśana hā 2010 janādhipativaraṇa ăgayīma",
-"sinhalese","රටක් කැ ලැ ඹු පුවතක අලුත් කතා ව","Raṭak kălămbū puvataka alut katāva",
-"sinhalese","නව කවි සලකුණ : සමකා ලීන කවිය පිළිබඳ න්යා යික සහ සනිදර්ශනා ත්මක සම්භා ෂණයක්","Nava kavi salakuṇa : samakālīna kaviya piḷibanda nyāyika saha sanidarśanātmaka sambhāṣaṇayak",
-"sinhalese","1908 දී හම්බන්තො ට හද්දා පිටිසර ගමක් වූ බද්දේගමට ආ සුද්දා","1908 dī Hambantoṭa haddā piṭisara gamak vū Baddēgamaṭa ā suddā",
-"sinhalese","ශ්රී ලංකා වේ භික්ෂුණි පර්ෂදය පිලිබඳ විමර්ශනයක්","śrī laṃkāvē bhiksuṇi parṣadaya pilibanda vimarśanayak  ",
-"sinhalese","20 සියවසේ කලා දිව්ය අප්සරා වී රද්දො ළුවේ ජෙන්නෝ නා : නා ඩගම් ගී ගා යිකා ව හා  නළඟනගේ කතා ව (ක්රි.ව. 1890 - 1975)","20 siyavasē kalā divya apsarāvī Raddoḷuvē Jennōnā : nāḍagam gī gāyikāva hā naḷaṅganagē katāva (kri.va. 1890 - 1975)",
-"sinhalese","පො තට ළැ දි නන්දා ගො ඩගේ : නන්දා ගො ඩගේ ගෞ  රව ප්රණා ම","Potaṭa ḷădi Nandā Goḍagē : Nandā Goḍagē gaurava praṇāma",
-"sinhalese","අබය මුද්රා ව : ඩබ්ලියු.ඒ. අබේසිoහගේ සමා ජ සා හිත්ය මෙහෙවර","Abaya mudrāva : Ḍabliyu.Ē. Abēsimhagē samāja sāhitya mehevara",
-"sinhalese","නන්දිකඩා ල් දක්වා   පා ර කැ පුවේ කවුරුන්ද?","Nandikaḍāl dakvā pāra kăpuvē kavurunda?",
-"sinhalese","පුරා විද්යා ස්ථා න නමා වලිය : කෑ ගල්ල දිස්ත්රික්කය","Purāvidyā sthāna namāvaliya : Kâgalla distrikkaya",
-"tamil","மணிக்கொடி இதழ் தொகுப்ப","Maṇikkoṭi ital̲ tokuppu",
-"tamil","சுந்தர ராமசாமி சிறுகதைகள்","Cuntara Rāmacāmi cir̲ukataikaḷ",
-"tamil","வானம் வசப்படும்","Vān̲am vacappaṭum",
-"tamil","சிவப்புக் கழுத்துடன் ஒரு பச்சைப்  பறவை","Civappuk kal̲uttutan̲ oru paccaip par̲avai",
-"tamil","தீண்டப்படாத முத்தம்","Tīṇṭappaṭāta muttam",
-"tamil","ஜி. நாகராஜன்̲ சிறுகதைகள்","Ji. Nākarājan̲ cir̲ukataikaḷ",
-"tamil","அவ்வை மண்ணில் பெண் எழுத்தாளர்கள் : பெண் எழுத்தாளர்கள் சந்திப்பு, 2000 கருத்தரங்கக் கட்டுரைகள்","Avvai maṇṇil peṇ el̲uttāḷarkaḷ : peṇ el̲uttāḷarkaḷ cantippu, 2000 karuttaraṅkak kaṭṭuraikaḷ",
-"tamil","ஐந்து வருட மௌனம் : சிறுகதைத் தொகுப்பு","Aintu varuṭa maun̲am : cir̲ukatait tokuppu",
-"tamil","உ. வே. சாமிநாதையர் கடிதக் கருவூலம்","U. Vē. Cāminātaiyar kaṭitak karuvūlam",
-"tamil","அர்ச்சுனனின் தமிழ்க் காதலிகள் : மகாபாரதம் பற்றிய நாட்டார் கதைகள்","Arccunan̲in̲ Tamil̲k kātalikaḷ : Makāpāratam par̲r̲iya nāṭṭār kataikaḷ",
-"telugu","హిందూత్వ మతోన్మాదం","hindūtva matōnmādaṃ ",
-"telugu","ఆకురాలిన చప్పుడు","ākurālina cappuḍu",
-"telugu","భాషా ప్రయుక్త సమైక్య రాష్ట్రాలు : సి. పి. ఐ. (ఎం) వైఖరి వివరించే ఆరు దశాబ్దాల పత్రాలు, 1954-2013.","bhāṣā prayukta samaikya rāṣṭrālu : si. pi. ai. (eṃ) vaikhari vivariñcē āru daśābdāla patrālu, 1954-2013.",
-"telugu","తెలుగాంగ్ల మిశ్రసమాసనిఘంటువు : పత్రికాభాషానిఘంటువు : వివరణాత్మకనిఘంటువు","telugāṅgla miśrasamāsanighaṇṭuvu :  patrikābhāṣānighaṇṭuvu : vivaraṇātmakanighaṇṭuvu",
-"telugu","స్వరజ్ఞాన వర్షిణీ!!! : సంగీత శాస్త్రంలో సరికొత్త సృష్టి","svarajñāna varṣiṇī!!! : saṅgīta śāstraṃlō sarikotta sr̥ṣṭi",
-"telugu","అఖిలకళా వైభవశ్రీ అడివి బాపిరాజు : సాహితి, చిత్రలేఖన, శిల్ప, నాట్య, సంపాదక, సినీకళా నైపుణ్యాల విశ్లేషణ","akhilakaḷā vaibhavaśrī aḍivi bāpirāju :  sāhiti, citralēkhana, śilpa, nāṭya, sampādaka, sinīkaḷā naipuṇyāla viślēṣaṇa",
-"telugu","శ్రీమత్త్యాగరాజ విజయం","śrīmattyāgarāja vijayaṃ",
-"telugu","తెలంగాణ ప్రముఖ కవులు - కావ్యాలు : క్రీ. శ. 941 నుంచి 1975 వరకు మధ్య జన్మించి, 2000లోపు కావ్యాలతో ఖ్యాతిగాంచిన కవుల సంబంధిత విశ్లేషణ","telaṅgāṇa pramukha kavulu - kāvyālu : krī. śa. 941 nuñci 1975 varaku madhya janmiñci, 2000lōpu kāvyālatō khyātigāñcina kavula sambandhita viślēṣaṇa",
-"telugu","తెలంగాణ కవితా వైభవం : పాల్కురికి నుండి గద్దర్ దాకా :  ఎఱుక","telaṅgāṇa kavitā vaibhavaṃ :  pālkuriki nuṇḍi gaddar dākā : er̲uka",
-"telugu","శ్రీమదాంధ్ర మహాభారత ప్రవచనము","śrīmadāndhra mahābhārata pravacanamu",
-"telugu","ద్రాక్షారం కథలు","drākṣāraṃ kathalu",
-"telugu","టి. దేవేందర్ గౌడ్ శాసనసభ ప్రసంగాలు","ṭi. dēvēndar gauḍ śāsanasabha prasaṅgālu",
-"telugu","విలక్షణ పి. వి. నరసింహారావు గారి జీవిత చరిత్ర ","vilakṣaṇa pi. vi. narasiṃhārāvu gāri jīvita caritra",
-"telugu","కాకతీయుల శాసనాలలో ఛందోవైవిధ్యం : వరంగల్ జిల్లా పరిధిలోనివి","kākatīyula śāsanālalō chandōvaividhyaṃ : varaṅgal jillā paridhilōnivi",
-"telugu","చీకటి వెన్నెల : స్త్రీల కథలు","cīkaṭi vennela : strīla kathalu",
+raṅgamañca digdarśaka jayanta ","संगीत संशय कल्लोळ   रंगमंच दिग्दर्शक जयंत  ","r2s",,
+"marathi_devanagari","samāja-sãvāda : viśvaśāntī ","समाज-संवाद : विश्वशांती","r2s",,
+"marathi_devanagari","kr̥shṇakanyā ","कृष्णकन्या","r2s",,
+"marathi_devanagari","sãvāda gānagurũ̄śi  / śabdāṅkana","संवाद  गानगुरुंशी / शब्दांकन","r2s",,
+"marathi_devanagari","śrī gopīnātha taḷavalakara  ","श्री गोपीनाथ तळवलकर  ","r2s",,
+"marathi_devanagari","śāntatā! korṭa cālū āhe!","शांतता! कोर्ट चालू आहे!","r2s",,
+"marathi_devanagari","mr̥gayākutūhala, athavā, śrīmanta mahārāja gāyakavāḍa yāñcī paṭāita vāghācī pahilī śikāra  / vishṇu govinda cipaḷoṇakara  mumbaī  ","मृगयाकुतूहल, अथवा, श्रीमंत महाराज गायकवाड यांची पटाइत वाघाची पहिली शिकार / विष्णु गोविंद चिपळोणकर मुंबई  ","r2s",,
+"marathi_devanagari","śrīmanta nāmadāra jagannātha śaṅkaraśeṭa urpha nānāśaṅkaraśeṭa hyāñcẽ caritra / vināyakarāva mādhavarāva pitaḷe","श्रीमंत नामदार जगन्नाथ शंकरशेट उर्फ नानाशंकरशेट ह्यांचे चरित्र / विनायकराव माधवराव पितळे","r2s",,
+"marathi_devanagari","śantanurāva kirloskara, vyaktī āṇi kārya, 1903-2003","शंतनुराव किर्लोस्कर, व्यक्ती आणि कार्य, १९०३-२००३","r2s",,
+"marathi_devanagari","dhārānr̥tya / maṅgeśa pāḍagāvakara","धारानृत्य / मंगेश पाडगांवकर","r2s",,
+"marathi_devanagari","kshamā    vyaṅkojī      āṭhavaṇītalã","क्षमा  व्यंकोजी आठवणीतलं","r2s",,
+"marathi_devanagari","ज्ञानेश्वर ते आंबेडकर /   डॉ  श्रीपाल सबनीस","jñāneśvara te āmbeḍakara / ḍô śrīpāla sabanīsa","s2r",,
+"nepali_devanagari","nepālī saṃskr̥tiko vivecanā / vishṇu prabhāta","नेपाली संस्कृतिको विवेचना / विष्णु प्रभात","r2s",,
+"nepali_devanagari","baiṅkiṅa anuśāsana : digo vikāsakā lāgi / bīena ghartī","बैंकिङ अनुशासन : दिगो विकासका लागि / बीएन घर्ती","r2s",,
+"nepali_devanagari","nepālī laghukathākā pravr̥ttiharū / ḍā pushkararāja bhaṭṭa","नेपाली लघुकथाका प्रवृत्तिहरू / डा पुष्करराज भट्ट","r2s",,
+"nepali_devanagari","sirjanaśīla kiśora pahāḍī / sampādaka, ḍā. śekharakumāra śreshṭha","सिर्जनशील किशोर पहाडी / सम्पादक, डा शेखरकुमार श्रेष्ठ","r2s",,
+"nepali_devanagari","viśveśvara prasāda koirālā : cetanā, cintana ra rājanīti / kr̥shṇa khanāla","विश्वेश्वर प्रसाद कोइराला : चेतना, चिन्तन र राजनीति / कृष्ण खनाल","r2s",,
+"nepali_devanagari","rāshṭra-pararāshṭra : ekatantradekhi gaṇatantrasamma / ḍā. bheshabahādura thāpā, sāthamā, haribahādura thāpā","  राष्ट्र- परराष्ट्र : एकतन्त्रदेखि गणतन्त्रसम्म / डा भेषबहादुर थापा, साथमा, हरिबहादुर थापा","r2s",,
+"nepali_devanagari","madhyayugīna maithilī nāṭya ra gīti kr̥tiharūko bhāshāvaijñānika varṇana-viśleshaṇa / prā ḍā rāmāvatāra yādava ; sampādaka dhīrendra premarshi","मध्ययुगीन मैथिली नाट्य र गीति कृतिहरूको भाषावैज्ञानिक वर्णन-विश्लेषण / प्रा डा रामावतार यादव ; सम्पादक धीरेन्द्र प्रेमर्षि","r2s",,
+"nepali_devanagari","siddhi-setulī smr̥ti pratishṭhāna","सिद्धि-सेतुली स्मृति प्रतिष्ठान","r2s",,
+"nepali_devanagari","nepālī krāntiko ān̐khījhyāla / kr̥shṇa kesī  kāṭhamāḍaum̐ : jāgaraṇa buka hāusa","नेपाली क्रान्तिको आँखीझ्याल / कृष्ण केसी काठमाडौँ : जागरण बुक हाउस","r2s",,
+"nepali_devanagari","buddhakālīna samāja : pāli tripiṭakako sākshyamā / umākānta pauḍyāla    ","बुद्धकालीन समाज : पालि त्रिपिटकको साक्ष्यमा / उमाकान्त पौड्याल","r2s",,
+"nepali_devanagari","pala palako mr̥tyu / saṃsmaraṇa / sudana kirātī   kāṭhamāḍauṃ : ghosṭa rāiṭiṅa nepāla","पल पलको मृत्यु / संस्मरण / सुदन किराती काठमाडौं : घोस्ट राइटिङ नेपाल","r2s",,
+"nepali_devanagari","smr̥tibimba : paṇḍita ṭekanātha  bhaṭṭarāī /  sampādana, ḍā tulasī bhaṭṭarāī","स्मृतिबिम्ब : पण्डित टेकनाथ भट्टराई / सम्पादन, डा तुलसी भट्टराई","r2s",,
+"nepali_devanagari","पल पलको मृत्यु / संस्मरण / सुदन किराती काठमाडौं : घोस्ट राइटिङ नेपाल","pala palako mr̥tyu / saṃsmaraṇa / sudana kirātī kāṭhamāḍauṃ : ghosṭa rāiṭiṅa nepāla","s2r",,
+"nepali_devanagari","स्मृतिबिम्ब : पण्डित टेकनाथ भट्टराई","smr̥tibimba : paṇḍita ṭekanātha bhaṭṭarāī","s2r",,
+"panjabi_gurmukhi","ਅਨੁਪਮ ਗਾਥਾ ਫਫੜੇ ਭਾਈ ਕੇ ਨਗਰ ਦੀ","anupama gāthā phaphaṛe bhāī Ke nagara dī",,,
+"panjabi_gurmukhi","ਭਾਖਾ ਦੇ ਮਾਮਲਿਆਂ ਬਾਰੇ ਕੌਮਾਨ੍ਤਰੀ ਖੋਜ : ਮਾਤ ਭਾਖਾ ਖੋਲ੍ਹਦੀ ਏ ਸਿੱਖਿਆ, ਗਿਆਨ, ਅਤੇ ਅੰਗਰੇਜੀ ਦੇ ਬੂਹੇ","bhākhā de māmaliāṃ bāre kaumāntarī khoja : māta bhākhā kholhadī e sikkhiā, giāna, ate aṅgarejī de būhe",,,
+"panjabi_gurmukhi","ਗੁਰਬਾਣੀ ਦੀਆਂ ਗੁਹਜ ਰਮਜ਼ਾਂ : ਵਿਆਖਿਆ ਤੇ ਵਿਚਾਰ / ਪ੍ਰੋਫ਼ੇਸਰ ਰਾਮ ਸਿੰਘ","Gurabāṇī dīāṃ guhaja ramazāṃ : wiākhiā te wicāra / profesara rāma siṅgha",,"{""capitalize"": ""first""}",
+"panjabi_gurmukhi","ਮੁੱਢਲਾ ਸਿੰਘ ਨਾਵਲ : ਪੁਨਰ- ਮੁਲਾਂਕਣ : ਪ੍ਰਭਾ, ਪ੍ਰੇਮ ਲਗਨ ਅਤੇ ਵਗਦੀ ਸੀ ਰਾਵੀ ਦੇ ਪ੍ਰਸੰਗ ਵਿਚ / ਡਾ. ਰੇਖਾ ਰਾਣੀ","muḍḍhalā pañjābī nāwala : punara-mulāṅkaṇa : prabhā, prema lagana ate wagadī sī rāwī de prasaṅga wica / ḍā. rekhā rāṇī.",,,
+"panjabi_gurmukhi","ਨੋ ਮੈਨਜ਼-ਲੈਂਡ ਤੋਂ ਸ਼ਕੀਲਾ... : ਗਲਪ ਰਚਨਾ","no mainaza-laiṇḍa toṃ shakīlā... : galapa racanā",,,
+"panjabi_gurmukhi","ਮੈਂ, ਬੱਚਾ ਤੇ ਕੁੱਤਾ : ਘੁੰਮਦਿਆਂ ਫਿਰਦਿਆਂ","maiṃ, baccā te kuttā : ghum̆madiāṃ phiradiāṃ",,,
+"panjabi_gurmukhi","ਇਤਿਹਾਸ ਬਾਬਾ ਬੁੱਡਾ ਜੀ","itihāsa bābā buḍḍā jī",,,
+"panjabi_gurmukhi","ਆਸ਼ਿਕ ਅਜ਼ਾਦੀ ਦੇ : ਜੁਝਾਰੂਆਂ ਦੀ ਗਾਥਾ : ਪ੍ਰਸੰਗ ਅਤੇ ਵਾਰਾਂ","āshika azādī de : jujhārūāṃ dī gāthā : prasaṅga ate wārāṃ",,,
+"panjabi_gurmukhi","ਗ਼ਦਰੀ ਕਿਰਪਾ ਸਿੰਘ ਲੰਗ ਮਜਾਰੀ (ਮੀਰਪੁਰ) : ਜਿਹੜੇ ਝੁੱਕੇ ਨਹੀਂ","gh̲adarī kirapā siṅgha laṅga majārī (mīrapura) : jihaṛe jhukke nahīṃ",,,
+"panjabi_gurmukhi","ਜੇਲ੍ਹ ਦੀਆਂ ਭੁੱਲੀਆਂ ਵਿੱਸਰੀਆਂ ਯਾਦਾਂ","Jelha dīāṃ bhullīāṃ wissarīāṃ yādāṃ",,"{""capitalize"": ""first""}",
+"sanskrit_devanagari","अद्वैतवेदान्ते श्री श्री सच्चिदानन्देन्द्रसरस्वतीमहास्वामिनां योगदानम्","advaitavedānte śrī śrī saccidānandendrasarasvatīmahāsvāmināṃ yogadānam",,,
+"sanskrit_devanagari","अनुसन्धानसम्पादनप्रविधिः","anusandhānasampādanapravidhiḥ",,,
+"sanskrit_devanagari","जयसिंहगुणवर्णनम्, अपरनाम, जयप्रशस्ति","jayasiṃhaguṇavarṇanam, aparanāma, jayapraśasti",,,
+"sanskrit_devanagari","भाष्यपरम्परा ज्ञानप्रवाहश्च","bhāṣyaparamparā jñānapravāhaśca",,,
+"sanskrit_devanagari","दशग्रीवराक्षसवधचरितम्","Daśagrīvarākṣasavadhacaritam",,"{""capitalize"": ""first""}",
+"sinhalese","සිංහල භා ෂා  විකා ශය සහ ශිලා ලේඛන විමර්ශන","siṃhala bhāṣā vikāśaya saha śilālēkhana vimarśana",,,
+"sinhalese","සමා ජ හා  මා නව විද්යා ත්මක ලිපි","Samāja hā mānava vidyātmaka lipi",,"{""capitalize"": ""first""}",
+"sinhalese","ගම් රටා ව : පැ රණි ගමක කෘ ෂි තා ක්ෂණික සමා ජ උරුමය","Gam raṭāva : păraṇi gamaka kr̥ṣi tākṣaṇika samāja urumaya ",,"{""capitalize"": ""first""}",
+"sinhalese","පෞ  ද්ගලික විශ්වවිද්යා ල: විලා සිතා ව හා යථා ර්ථය","Paudgalika viśvavidyāla : vilāsitāva hā yathārthaya",,"{""capitalize"": ""first""}",
+"sinhalese","ස්වයං චරිතා පදා නය : ආනන්දනීයමතක සටහන්","Svayaṃ caritāpadānaya : ānandanīyamataka saṭahan",,"{""capitalize"": ""first""}",
+"sinhalese","මා ධ්ය විමර්ශන හා 2010 ජනා ධිපතිවරණ ඇගයීම","Mādhya vimarśana hā 2010 janādhipativaraṇa ăgayīma",,"{""capitalize"": ""first""}",
+"sinhalese","රටක් කැ ලැ ඹු පුවතක අලුත් කතා ව","Raṭak kălămbū puvataka alut katāva",,"{""capitalize"": ""first""}",
+"sinhalese","නව කවි සලකුණ : සමකා ලීන කවිය පිළිබඳ න්යා යික සහ සනිදර්ශනා ත්මක සම්භා ෂණයක්","Nava kavi salakuṇa : samakālīna kaviya piḷibanda nyāyika saha sanidarśanātmaka sambhāṣaṇayak",,"{""capitalize"": ""first""}",
+"sinhalese","1908 දී හම්බන්තො ට හද්දා පිටිසර ගමක් වූ බද්දේගමට ආ සුද්දා","1908 dī hambantoṭa haddā piṭisara gamak vū baddēgamaṭa ā suddā",,,
+"sinhalese","ශ්රී ලංකා වේ භික්ෂුණි පර්ෂදය පිලිබඳ විමර්ශනයක්","śrī laṇkāvē bhiksuṇi parṣadaya pilibanda vimarśanayak",,,
+"sinhalese","20 සියවසේ කලා දිව්ය අප්සරා වී රද්දො ළුවේ ජෙන්නෝ නා : නා ඩගම් ගී ගා යිකා ව හා  නළඟනගේ කතා ව (ක්රි.ව. 1890 - 1975)","20 siyavasē kalā divya apsarāvī raddoḷuvē jennōnā : nāḍagam gī gāyikāva hā naḷaṅganagē katāva (kri.va. 1890 - 1975)",,,
+"sinhalese","පො තට ළැ දි නන්දා ගො ඩගේ : නන්දා ගො ඩගේ ගෞ  රව ප්රණා ම","potaṭa iădi nandā goḍagē : nandā goḍagē gaurava praṇāma",,,
+"sinhalese","අබය මුද්රා ව : ඩබ්ලියු.ඒ. අබේසිoහගේ සමා ජ සා හිත්ය මෙහෙවර","abaya mudrāva : ḍabliyu.ē. abēsimhagē samāja sāhitya mehevara",,,
+"sinhalese","නන්දිකඩා ල් දක්වා   පා ර කැ පුවේ කවුරුන්ද?","Nandikaḍāl dakvā pāra kăpuvē kavurunda?",,"{""capitalize"": ""first""}",
+"sinhalese","පුරා විද්යා ස්ථා න නමා වලිය : කෑ ගල්ල දිස්ත්රික්කය","purāvidyā sthāna namāvaliya : kâgalla distrikkaya",,,
+"tamil","மணிக்கொடி இதழ் தொகுப்பு","Maṇikkoṭi ital̲ tokuppu",,"{""capitalize"": ""first""}",
+"tamil","சுந்தர ராமசாமி சிறுகதைகள்","cuntara rāmacāmi cir̲ukataikaḷ",,,
+"tamil","வானம் வசப்படும்","Vān̲am vacappaṭum",,"{""capitalize"": ""first""}",
+"tamil","சிவப்புக் கழுத்துடன் ஒரு பச்சைப்  பறவை","Civappuk kal̲uttutan̲ oru paccaip par̲avai",,"{""capitalize"": ""first""}",
+"tamil","தீண்டப்படாத முத்தம்","Tīṇṭappaṭāta muttam",,"{""capitalize"": ""first""}",
+"tamil","ஜி. நாகராஜன்̲ சிறுகதைகள்","ji. nākarājan̲ cir̲ukataikaḷ",,,
+"tamil","அவ்வை மண்ணில் பெண் எழுத்தாளர்கள் : பெண் எழுத்தாளர்கள் சந்திப்பு, 2000 கருத்தரங்கக் கட்டுரைகள்","Avvai maṇṇil peṇ el̲uttāḷarkaḷ : peṇ el̲uttāḷarkaḷ cantippu, 2000 karuttaraṅkak kaṭṭuraikaḷ",,"{""capitalize"": ""first""}",
+"tamil","ஐந்து வருட மௌனம் : சிறுகதைத் தொகுப்பு","Aintu varuṭa maun̲am : cir̲ukatait tokuppu",,"{""capitalize"": ""first""}",
+"tamil","உ. வே. சாமிநாதையர் கடிதக் கருவூலம்","u. vē. cāminātaiyar kaṭitak karuvūlam",,,
+"tamil","அர்ச்சுனனின் தமிழ்க் காதலிகள் : மகாபாரதம் பற்றிய நாட்டார் கதைகள்","arccunan̲in̲ tamil̲k kātalikaḷ : makāpāratam par̲r̲iya nāṭṭār kataikaḷ",,,
+"telugu","హిందూత్వ మతోన్మాదం","hindūtva matōnmādaṃ ",,,
+"telugu","ఆకురాలిన చప్పుడు","ākurālina cappuḍu",,,
+"telugu","భాషా ప్రయుక్త సమైక్య రాష్ట్రాలు : సి. పి. ఐ. (ఎం) వైఖరి వివరించే ఆరు దశాబ్దాల పత్రాలు, 1954-2013.","bhāṣā prayukta samaikya rāṣṭrālu : si. pi. ai. (eṃ) vaikhari vivariñcē āru daśābdāla patrālu, 1954-2013.",,,
+"telugu","తెలుగాంగ్ల మిశ్రసమాసనిఘంటువు : పత్రికాభాషానిఘంటువు : వివరణాత్మకనిఘంటువు","telugāṅgla miśrasamāsanighaṇṭuvu :  patrikābhāṣānighaṇṭuvu : vivaraṇātmakanighaṇṭuvu",,,
+"telugu","స్వరజ్ఞాన వర్షిణీ!!! : సంగీత శాస్త్రంలో సరికొత్త సృష్టి","svarajñāna varṣiṇī!!! : saṅgīta śāstraṃlō sarikotta sr̥ṣṭi",,,
+"telugu","అఖిలకళా వైభవశ్రీ అడివి బాపిరాజు : సాహితి, చిత్రలేఖన, శిల్ప, నాట్య, సంపాదక, సినీకళా నైపుణ్యాల విశ్లేషణ","akhilakaḷā vaibhavaśrī aḍivi bāpirāju :  sāhiti, citralēkhana, śilpa, nāṭya, sampādaka, sinīkaḷā naipuṇyāla viślēṣaṇa",,,
+"telugu","శ్రీమత్త్యాగరాజ విజయం","śrīmattyāgarāja vijayaṃ",,,
+"telugu","తెలంగాణ ప్రముఖ కవులు - కావ్యాలు : క్రీ. శ. 941 నుంచి 1975 వరకు మధ్య జన్మించి, 2000లోపు కావ్యాలతో ఖ్యాతిగాంచిన కవుల సంబంధిత విశ్లేషణ","telaṅgāṇa pramukha kavulu - kāvyālu : krī. śa. 941 nuñci 1975 varaku madhya janmiñci, 2000lōpu kāvyālatō khyātigāñcina kavula sambandhita viślēṣaṇa",,,
+"telugu","తెలంగాణ కవితా వైభవం : పాల్కురికి నుండి గద్దర్ దాకా :  ఎఱుక","telaṅgāṇa kavitā vaibhavaṃ :  pālkuriki nuṇḍi gaddar dākā : er̲uka",,,
+"telugu","శ్రీమదాంధ్ర మహాభారత ప్రవచనము","śrīmadāndhra mahābhārata pravacanamu",,,
+"telugu","ద్రాక్షారం కథలు","drākṣāraṃ kathalu",,,
+"telugu","టి. దేవేందర్ గౌడ్ శాసనసభ ప్రసంగాలు","ṭi. dēvēndar gauḍ śāsanasabha prasaṅgālu",,,
+"telugu","విలక్షణ పి. వి. నరసింహారావు గారి జీవిత చరిత్ర ","vilakṣaṇa pi. vi. narasiṃhārāvu gāri jīvita caritra",,,
+"telugu","కాకతీయుల శాసనాలలో ఛందోవైవిధ్యం : వరంగల్ జిల్లా పరిధిలోనివి","kākatīyula śāsanālalō chandōvaividhyaṃ : varaṅgal jillā paridhilōnivi",,,
+"telugu","చీకటి వెన్నెల : స్త్రీల కథలు","cīkaṭi vennela : strīla kathalu",,,

+ 26 - 5
test/data/script_samples/tibetan.csv

@@ -1,5 +1,26 @@
-"tibetan","བྱང་ཕྱོགས་བསྟན་འགྲོའི་སྐྱབས་མགོན་ཐམས་ཅད་མཁྱེན་པ་ཁལ་ཁ་ཨེར་ཏེ་ནེ་ཁུ་ཐག་ཐུ་བློ་བཟང་བསྟན་འཛིན་རྒྱལ་མཚན་གྱིའི་གསུང་འབུམ།","Byang phyogs bstan ‘gro’i skyabs mgon Thams-cad-mkhyen-pa Khal-kha Er-te-ne Khu-thag-thu Blo-bzang-bstan-‘dzin-rgyal-mtshan gyi’i gsung ʼbum",,"{""capitalize"": ""first""}"
-"tibetan","རྗེ་བཙུན་དམ་པ་སྐུ་ཕྲེང་བརྒྱད་པའི་གསུང་འབུམ","Rje-btsun-dam-pa sku phreng brgyad paʼi gsung ʼbum",,"{""capitalize"": ""first""}"
-"tibetan","རྗེ་བཙུན་ཐམས་ཅད་མཁྱེན་པ་དགེ་འདུན་རྒྱ་མཚོའི་གསུང་འབུམ་བཞུགས་སོ་","Rje-btsun Thams-cad-mkhyen-pa Dge-ʼdun-rgya-mtshoʼi gsung ʼbum bzhugs so",,"{""capitalize"": ""first""}"
-"tibetan","སྒྲུབ་ཐབས་འདོད་འཇོའི་བུམ་བཟང་གི་བརྒྱུད་པའི་རིམ་པ་ཕྱོགས་གཅིག་ཏུ་བསྡེབས་པ་བཞུགས་སོ།","Sgrub thabs ʼdod ʼjoʼi bum bzang gi brgyud paʼi rim pa phyogs gcig tu bsdebs pa bzhugs so",,"{""capitalize"": ""first""}"
-"tibetan","བཀའ་གདམས་ཀྱི་སྐྱེས་བུ་དམ་པ་རྣམས་ཀྱི་གསུང་བགྲོས་ཐོར་བུ་རྣམས་བཞུགས་སོ།","Bkaʼ gdams kyi skyes bu dam pa rnams kyi gsung bgros thor bu rnams bzhugs so",,"{""capitalize"": ""first""}"
+"tibetan","བྱང་ཕྱོགས་བསྟན་འགྲོའི་སྐྱབས་མགོན་ཐམས་ཅད་མཁྱེན་པ་ཁལ་ཁ་ཨེར་ཏེ་ནེ་ཁུ་ཐག་ཐུ་བློ་བཟང་བསྟན་འཛིན་རྒྱལ་མཚན་གྱིའི་གསུང་འབུམ།","Byang phyogs bstan ‘gro’i skyabs mgon thams-cad-mkhyen-pa khal-kha er-te-ne khu-thag-thu blo-bzang-bstan-‘dzin-rgyal-mtshan gyi’i gsung ʼbum",,"{""capitalize"": ""first""}",
+"tibetan","རྗེ་བཙུན་དམ་པ་སྐུ་ཕྲེང་བརྒྱད་པའི་གསུང་འབུམ","Rje-btsun-dam-pa sku phreng brgyad paʼi gsung ʼbum",,"{""capitalize"": ""first""}",
+"tibetan","རྗེ་བཙུན་ཐམས་ཅད་མཁྱེན་པ་དགེ་འདུན་རྒྱ་མཚོའི་གསུང་འབུམ་བཞུགས་སོ་","Rje-btsun Thams-cad-mkhyen-pa dge-ʼdun-rgya-mtshoʼi gsung ʼbum bzhugs so",,"{""capitalize"": ""first""}",
+"tibetan","སྒྲུབ་ཐབས་འདོད་འཇོའི་བུམ་བཟང་གི་བརྒྱུད་པའི་རིམ་པ་ཕྱོགས་གཅིག་ཏུ་བསྡེབས་པ་བཞུགས་སོ།","Sgrub thabs ʼdod ʼjoʼi bum bzang gi brgyud paʼi rim pa phyogs gcig tu bsdebs pa bzhugs so",,"{""capitalize"": ""first""}",
+"tibetan","བཀའ་གདམས་ཀྱི་སྐྱེས་བུ་དམ་པ་རྣམས་ཀྱི་གསུང་བགྲོས་ཐོར་བུ་རྣམས་བཞུགས་སོ།","Bkaʼ gdams kyi skyes bu dam pa rnams kyi gsung bgros thor bu rnams bzhugs so",,"{""capitalize"": ""first""}",
+"tibetan","སྤྱི་སྨན་ཞབས་ཞུ་དང་བློ་ཡིད་ཚིམས་པའི་མི་ཚེའི་བྱུང་བ","spyi sman zhabs zhu dang blo yid tshims paʼi mi tsheʼi byung ba ",,,
+"tibetan","རྒྱལ་དབང་སྐུ་ཕྲེང་རིམ་བྱོན་གྱི་མཛད་རྣམ་","rgyal dbang sku phreng rim byon gyi mdzad rnam",,,
+"tibetan","དག་ཡིག་ངག་སྒྲོན་རྩ་འགྲེལ་","dag yig ngag sgron rtsa ʼgrel ",,,
+"tibetan","གྲུབ་མཐའི་རྣམ་བཤད་རང་གཞན་གྲུབ་མཐའ་ཀུན་དང་ཟབ་དོན་མཆོག་ཏུ་གསལ་བ་ཀུན་བཟང་ཞིང་གི་ཉི་མ་ལུང་རིགས་རྒྱ་མཚོ་སྐྱེ་དགུའི་རེ་བ་ཀུན་སྐོང་","grub mthaʼi rnam bshad rang gzhan grub mthaʼ kun dang zab don mchog tu gsal ba kun bzang zhing gi nyi ma lung rigs rgya mtsho skye dguʼi re ba kun skong ",,,
+"tibetan","རྗེ་བཙུན་དམ་པ་སྐུ་ཕྲེང་ལྔ་པ་དང་བདུན་པའི་གསུང་ཐོར་བུ་བཞུགས་སོ","rje btsun dam pa sku phreng lnga pa dang bdun paʼi gsung thor bu bzhugs so",,,
+"tibetan","བདེར་གཤེགས་བདུན་གྱི་མཆོད་པའི་ཆོག་བསྒྲིགས་ཡིད་བཞིན་དབང་རྒྱལ་ཞེས་བྱ་བ་བཞུགས་སོ","bder gshegs bdun gyi mchod paʼi chog bsgrigs yid bzhin dbang rgyal zhes bya ba bzhugs so",,,
+"tibetan","Tshe dbang rgya gar maʼi khrid dang sgrub pa bcas kyi gsung pod","ཚེ་དབང་རྒྱ་གར་མའི་ཁྲིད་དང་སྒྲུབ་པ་བཅས་ཀྱི་གསུང་པོད།",,,
+"tibetan","Tshe dbaṅ rgya gar maʼi khrid daṅ sgrub pa bcas kyi gsuṅ pod","ཚེ་དབང་རྒྱ་གར་མའི་ཁྲིད་དང་སྒྲུབ་པ་བཅས་ཀྱི་གསུང་པོད།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Bde gshegs kun ʼdus kyi gtum moʼi khrid yig zab gnad gsal sgron zhes bya ba bzhugs so / $c [Smin-gling Lo-chen Dharma-śrī].","བདེ་གཤེགས་ཀུན་འདུས་ཀྱི་གཏུམ་མོའི་ཁྲིད་ཡིག་ཟབ་གནད་གསལ་སྒྲོན་ཞེས་བྱ་བ་བཞུགས་སོ། / $c སྨིན་གླིང་ལོ་ཆེན་དྷརྨ་ཤྲི།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Bde gśegs kun ʼdus kyi gtum moʼi khrid yig zab gnad gsal sgron źes bya ba bźugs so","བདེ་གཤེགས་ཀུན་འདུས་ཀྱི་གཏུམ་མོའི་ཁྲིད་ཡིག་ཟབ་གནད་གསལ་སྒྲོན་ཞེས་བྱ་བ་བཞུགས་སོ།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Bod kyi la gzhas gzhon nu ʼgugs paʼi lcags kyu / $c Kun-dgaʼ-rgyal-mtshan gyis bsdu sgrig byas","བོད་ཀྱི་ལ་གཞས་གཞོན་ནུ་འགུགས་པའི་ལྕགས་ཀྱུ། / ཀུན་དགའ་རྒྱལ་མཚན་གྱིས་བསྡུ་སྒྲིག་བྱས།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Bod kyi la gźas gźon nu ʼgugs paʼi lcags kyu","བོད་ཀྱི་ལ་གཞས་གཞོན་ནུ་འགུགས་པའི་ལྕགས་ཀྱུ།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Lhug rtsom pad maʼi zeʼu ʼbru","ལྷུག་རྩོམ་པད་མའི་ཟེའུ་འབྲུ།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Sbrang char lo ʾkhor 20 dpe tshogs, 1981-2001","སྦྲང་ཆར་ལོ་འཁོར་༢༠་དཔེ་ཚོགས། ༡༩༨༡-༢༠༠༡","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","""Sbrang char"" rtsom sgrig khang gis bsgrigs","སྦྲང་ཆར་རྩོམ་སྒྲིག་ཁང་གིས་བསྒྲིགས།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Dpar thengs 1","དཔར་ཐེངས། ༡","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Zi-ling","ཟི་ལིང་།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Mtsho-sngon mi rigs dpe skrun khang","མཚོ་སྔོན་མི་རིགས་དཔེ་སྐྲུན་ཁང་།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Rgyud don rig-ʼdzin dgyes paʼi zhal lung","རྒྱུད་དོན་རིག་འཛིན་དགྱེས་པའི་ཞལ་ལུང་།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Rgyud don rig-ʼdzin dgyes paʾi źal luṅ ","རྒྱུད་དོན་རིག་འཛིན་དགྱེས་པའི་ཞལ་ལུང་།","r2s","{""capitalize"": ""first""}","From Lobsang"
+"tibetan","Gnas brtan chen po bcu drug gi mchod pa rgyal bstan mdzad med nor bu zhes bya ba bzhugs so","གནས་བརྟན་ཆེན་པོ་བཅུ་དྲུག་གི་མཆོད་པ་རྒྱལ་བསྟན་མཛད་མེད་ནོར་བུ་ཞེས་བྱ་བ་བཞུགས་སོ།","r2s","{""capitalize"": ""first""}","From Lobsang"

+ 30 - 19
test/integration.py

@@ -10,7 +10,7 @@ from test import TEST_DATA_DIR
 logger = getLogger(__name__)
 
 
-def test_sample(dset):
+def test_sample(dset, report=True):
     """
     Test an individual sample set and produce a human-readable report.
 
@@ -18,8 +18,12 @@ def test_sample(dset):
 
     @param dset (str): sample set name (without the .csv extension) found in
     the `data/script_samples` directory.
+
+    @param report (bool): if True (the default), print fail/success ticks and
+    write out a report to file at the end. Otherwise, raise an exception on
+    the first error encountered.
     """
-    deltas = []
+    deltas = [] if report else None
     dset_fpath = path.join(TEST_DATA_DIR, "script_samples", dset + ".csv")
     log_fpath = path.join(TEST_DATA_DIR, "log", f"test_{dset}.log")
 
@@ -41,20 +45,21 @@ def test_sample(dset):
                 _trans(rom, lang, "r2s", opts, script, deltas)
             i += 1
 
-    with open(log_fpath, "w") as fh:
-        # If no deltas, just truncate the file.
-        for lang, script, delta in deltas:
-            fh.write(f"Language: {lang}\n")
-            fh.write(f"Original: {script}\nDiff (result vs. expected):\n")
-            for dline in delta:
-                fh.write(dline.strip() + "\n")
-            fh.write("\n\n")
+    if report:
+        with open(log_fpath, "w") as fh:
+            # If no deltas, just truncate the file.
+            for lang, script, delta in deltas:
+                fh.write(f"Language: {lang}\n")
+                fh.write(f"Original: {script}\nDiff (result vs. expected):\n")
+                for dline in delta:
+                    fh.write(dline.strip() + "\n")
+                fh.write("\n\n")
 
-    ct = len(deltas)
-    if ct > 0:
-        print(f"\n\n{ct} failed tests. See report at {log_fpath}")
-    else:
-        print("All tests passed.")
+        ct = len(deltas)
+        if ct > 0:
+            print(f"\n\n{ct} failed tests. See report at {log_fpath}")
+        else:
+            print("All tests passed.")
 
 
 def _trans(script, lang, t_dir, opts, rom, deltas):
@@ -62,8 +67,14 @@ def _trans(script, lang, t_dir, opts, rom, deltas):
     trans, warnings = transliterate(
             script, lang, t_dir=t_dir,
             capitalize=opts.get("capitalize"), options=opts)
-    if (trans == rom):
-        print(".", end="")
+    try:
+        assert trans == rom
+    except AssertionError as e:
+        if deltas is not None:
+            print("F", end="")
+            deltas.append((lang, script, ndiff([trans], [rom])))
+        else:
+            raise e
     else:
-        print("F", end="")
-        deltas.append((lang, script, ndiff([trans], [rom])))
+        if deltas:
+            print(".", end="")

+ 8 - 59
test/unittest/test02_transliteration.py

@@ -1,13 +1,10 @@
 import logging
 
-from unittest import TestCase, TestSuite, TextTestRunner
-from csv import reader
-from json import loads as jloads
-from os import environ, path, unlink
+from unittest import TestCase
+from os import environ, unlink
 
-from scriptshifter.trans import transliterate
-from scriptshifter.tables import get_language, init_db
-from test import TEST_DATA_DIR
+from scriptshifter.tables import init_db
+from test.integration import test_sample
 
 
 logger = logging.getLogger(__name__)
@@ -23,58 +20,10 @@ def tearDownModule():
 
 class TestTrans(TestCase):
     """
-    Test S2R transliteration.
+    Test transliteration.
 
-    Modified test case class to run independent tests for each CSV row.
-
-    TODO use a comprehensive sample table and report errors for unsupported
-    languages.
-    """
-    def sample(self):
-        """
-        Test transliteration for one CSV row.
-
-        This function name won't start with `test_` otherwise will be
-        automatically run without parameters.
-        """
-        config = get_language(self.tbl)
-        t_dir = self.options.get("t_dir", "s2r")
-        if (
-                t_dir == "s2r" and config["has_s2r"]
-                or t_dir == "r2s" and config["has_r2s"]):
-            txl = transliterate(
-                    self.script, self.tbl,
-                    t_dir=t_dir,
-                    capitalize=self.options.get("capitalize", False),
-                    options=self.options)[0]
-            self.assertEqual(
-                    txl, self.roman,
-                    f"S2R transliteration error for {self.tbl}!\n"
-                    f"Original: {self.script}")
-
-
-def make_suite():
+    Use "unittest" sample table.
     """
-    Build parametrized test cases.
-    """
-    suite = TestSuite()
-
-    with open(path.join(
-        TEST_DATA_DIR, "script_samples", "unittest.csv"
-    ), newline="") as fh:
-        csv = reader(fh)
-        for row in csv:
-            if len(row[0]):
-                # Inject transliteration info in the test case.
-                tcase = TestTrans("sample")
-                tcase.tbl = row[0]
-                tcase.script = row[1].strip()
-                tcase.roman = row[2].strip()
-                tcase.options = jloads(row[3]) if len(row[3]) else {}
-
-                suite.addTest(tcase)
-
-    return suite
-
 
-TextTestRunner().run(make_suite())
+    def test_integration_sample(self):
+        test_sample("unittest", False)