Bladeren bron

Merge pull request #190 from lcnetdev/greek_modern

Use monotonic modern Greek; resolve some ambiguities in classical Greek.
Stefano Cossu 2 maanden geleden
bovenliggende
commit
6cc7089fea
2 gewijzigde bestanden met toevoegingen van 223 en 209 verwijderingen
  1. 207 209
      scriptshifter/tables/data/greek_classical.yml
  2. 16 0
      scriptshifter/tables/data/greek_modern.yml

+ 207 - 209
scriptshifter/tables/data/greek_classical.yml

@@ -52,7 +52,7 @@ script_to_roman:
       - "\u1F8E"  # ᾎ 	Greek Capital Letter Alpha With Psili And Perispomeni And Prosgegrammeni
     # Rough alpha
     "\u1F01":  # ἁ 	Greek Small Letter Alpha With Dasia
-      - "\u03B1\u0314" # Small alpha + combining reversed comma above
+      - "\u03B1\u0314"  # Small alpha + combining reversed comma above
       - "\u1F81"  # ᾁ 	Greek Small Letter Alpha With Dasia And Ypogegrammeni
     "\u1F03":  # ἃ 	Greek Small Letter Alpha With Dasia And Varia
       - "\u1F05"  # ἅ 	Greek Small Letter Alpha With Dasia And Oxia
@@ -61,7 +61,7 @@ script_to_roman:
       - "\u1F85"  # ᾅ 	Greek Small Letter Alpha With Dasia And Oxia And Ypogegrammeni
       - "\u1F87"  # ᾇ 	Greek Small Letter Alpha With Dasia And Perispomeni And Ypogegrammeni
     "\u1F09":  # Ἁ 	Greek Capital Letter Alpha With Dasia
-      - "\u0391\u0314" # Capital alpha + combining reversed comma above
+      - "\u0391\u0314"  # Capital alpha + combining reversed comma above
       - "\u1F89"  # ᾉ 	Greek Capital Letter Alpha With Dasia And Prosgegrammeni
     "\u1F0B":  # Ἃ 	Greek Capital Letter Alpha With Dasia And Varia
       - "\u1F8B"  # ᾋ 	Greek Capital Letter Alpha With Dasia And Varia And Prosgegrammeni
@@ -226,7 +226,7 @@ script_to_roman:
       - "\u1FE3"  # ΰ 	Greek Small Letter Upsilon With Dialytika And Oxia
       - "\u1FE7"  # ῧ 	Greek Small Letter Upsilon With Dialytika And Perispomeni
     "\u03A5":
-        # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
+      # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
       - "\u03AB"  # Ϋ 	Greek Capital Letter Upsilon With Dialytika
       - "\u1F59"  # Ὑ 	Greek Capital Letter Upsilon With Dasia
       - "\u1FE8"  # Ῠ 	Greek Capital Letter Upsilon With Vrachy
@@ -297,7 +297,7 @@ script_to_roman:
       - "\u1FAD"  # ᾭ 	Greek Capital Letter Omega With Dasia And Oxia And Prosgegrammeni
       - "\u1FAF"  # ᾯ 	Greek Capital Letter Omega With Dasia And Perispomeni And Prosgegrammeni
 
-    # Remove all combining diacritics.
+    # Remove combining diacritics irrelevant to transliteration.
     "":
       - "\u0314"
       - "\u0342"
@@ -323,7 +323,7 @@ script_to_roman:
     "\u201C": "\"\u0332"
     "\u201D": "\"\u0333"
     "\u2018": "'\u0332"
-    #"\u2019": "'\u0333"
+    # "\u2019": "'\u0333"
     "\u2116": "No\u0332"
     # "\u0300": ""
     # "\u0301": ""
@@ -421,7 +421,6 @@ script_to_roman:
     # \u03A2 reserved
     "\u03A3": "S"
     "\u03A4": "T"
-    "\u1F59": "Hy"
     "\u03A5": "Y"
     "\u03A5\u03B9": "Ui"
     "\u03A5\u1F31": "Hui"
@@ -518,7 +517,6 @@ script_to_roman:
     "\u1F51": "hy"
     "\u1F59": "Hy"
     "\u03C5": "y"
-    "\u03CB": "y"
     "\u03C5\u03B9": "ui"
     "\u03C5\u1F31": "hui"
     "\u03C6": "ph"
@@ -551,9 +549,9 @@ script_to_roman:
     "\u03D9": "ḳ"
     "\u03DA": "6\u0333"
     "\u03DB": "6\u0332"
-    #"\u03DC": "G\u0332"
+    # "\u03DC": "G\u0332"
     "\u03DC": "W"
-    #"\u03DD": "g\u0332"
+    # "\u03DD": "g\u0332"
     "\u03DD": "w"
     "\u03DE": "K\u0324"
     "\u03DF": "k\u0324"
@@ -591,203 +589,203 @@ script_to_roman:
     "\u03FF": ".)\u0333"
 
 roman_to_script:
-    map:
-      ".)\u0333": "\u03FF"
-      ".)": "\u037D"
-      "?\u0333": "\u037E"
-      "?": "\u037E"
-      "\"\u0332": "\u201C"
-      "\"\u0333": "\u201D"
-      "'\u0332": "\u2018"
-      "'\u0333": "\u2019"
-      "(.\u0333": "\u03FE"
-      "(.": "\u037C"
-      "|)\u0333": "\u03FD"
-      "|)": "\u037B"
-      # Left pointing double angle quotation mark
-      "\u003C\u003C": "\u00AB"
-      # Right pointing double angle quotation mark
-      "\u003E\u003E": "\u00BB"
-      "6\u0332": "\u03DB"
-      "6\u0333": "\u03DA"
-      "Au": "\u0391\u03C5"
-      "au": "\u03B1\u03C5"
-      "a\u0301": "\u03AC"
-      "B": "\u0392"
-      "b": "\u03B2"
-      "b\u0333": "\u03D0"
-      "Ch": "\u03A7"
-      "ch": "\u03C7"
-      "c\u030C": "\u03EB"
-      "\u1E0E": "\u039D\u03C4"
-      "\u1E0F": "\u03BD\u03C4"
-      "D": "\u0394"
-      "d": "\u03B4"
-      "Eu": "\u0395\u03C5"
-      "eu": "\u03B5\u03C5"
-      "E\u0301": "\u0388"
-      "e\u0301": "\u03AD"
-      "\u0112\u0301": "\u0389\u0314"
-      "\u0113\u0301": "\u03AE"
-      "\u0112\u0301": "\u0389"
-      "\u0112": "\u0397"
-      "\u0112u": "\u0397\u03C5"
-      "\u0113": "\u03B7"
-      "\u0113u": "\u03B7\u03C5"
-      "h\u0113u": "\u03B7\u1F51"
-      "e\u0332": "\u03F5"
-      "e\u0333": "\u03F6"
-      "F": "\u03E4"
-      "f": "\u03E5"
-      #"G\u0332": "\u03DC"
-      "W": "\u03DC"
-      #"g\u0332": "\u03DD"
-      "w": "\u03DD"
-      "Ha\u0301": "\u0386\u0314"
-      "ha\u0301": "\u03AC\u0314"
-      "Ha": "\u1F09"
-      "ha": "\u03B1\u0314"
-      "A": "\u0391"
-      "a": "\u03B1"
-      "h\u0113\u0301": "\u03AE\u0314"
-      "He\u0301": "\u0388\u0314"
-      "he\u0301": "\u03AD\u0314"
-      "H\u0113": "\u1F29"
-      "H\u0113u": "\u1F29\u03C5"
-      "h\u0113": "\u1F21"
-      "h\u0113u": "\u1F21\u13C5"
-      "He": "\u1F19"
-      "he": "\u1F11"
-      "E": "\u0395"
-      "e": "\u03B5"
-      "Hi\u0301": "\u038A\u0314"
-      "hi\u0301": "\u03AF\u0314"
-      "Hi": "\u1F39"
-      "hi": "\u1F31"
-      "Ho\u0301": "\u038F\u0314"
-      "Ho\u0301": "\u038C\u0314"
-      "h\u014D": "\u1F61"
-      "H\u014D": "\u1F69"
-      "Ho": "\u1F49"
-      "ho": "\u1F41"
-      "H\u0307": "\u03E8"
-      "h\u0307": "\u03E9"
-      "H\u0323": "\u0370"
-      "h\u0323": "\u0371"
-      "H\u0332": "\u03E6"
-      "h\u0332": "\u03E7"
-      "Hy\u0301": "\u038E\u0314"
-      "Hy": "\u1F59"
-      "hy": "\u1F51"
-      "Iu": "\u0399\u03C5"
-      "iu": "\u03B9\u03C5"
-      "I\u0301": "\u038A"
-      "i\u0301": "\u03AF"
-      "I\u0308": "\u03AA"
-      "i\u0308\u0301": "\u0390"
-      "i\u0308": "\u03CA"
-      "J": "\u037F"
-      "j": "\u03F3"
-      "K\u0323y": "\u03EC"
-      "k\u0323y": "\u03ED"
-      "K\u0332": "\u03DE"
-      "k\u0332": "\u03DF"
-      "K\u0326": "\u03CF"
-      "k\u0326": "\u03D7"
-      "k\u0332": "\u03F0"
-      "L": "\u039B"
-      "l": "\u03BB"
-      "M": "\u039C"
-      "m": "\u03BC"
-      "nch": "\u03B3\u03C7"
-      "ng": "\u03B3\u03B3"
-      "%nk%": "\u03B3\u03BA"
-      "nx": "\u03B3\u03BE"
-      "No\u0332": "\u2116"
-      "N": "\u039D"
-      "n": "\u03BD"
-      "K": "\u039A"
-      "k": "\u03BA"
-      "G": "\u0393"
-      "g": "\u03B3"
-      "Ou": "\u039F\u03C5"
-      "ou": "\u03BF\u03C5"
-      "O\u0301": "\u038C"
-      "o\u0301": "\u03CC"
-      "\u014C\u0301": "\u038F"
-      "\u014D\u0301": "\u03CE"
-      "\u014C": "\u03A9"
-      "\u014Cu": "\u03A9\u03C5"
-      "\u014D": "\u03C9"
-      "\u014Du": "\u03D9\u03C5"
-      "O": "\u039F"
-      "o": "\u03BF"
-      "Ph": "\u03A6"
-      "ph": "\u03C6"
-      "Ps": "\u03A8"
-      "ps": "\u03C8"
-      "p\u0333h\u0333": "\u03D5"
-      "p\u0333": "\u03D6"
-      "P": "\u03A0"
-      "p": "\u03C0"
-      "Ḳ": "\u03D8"
-      "ḳ": "\u03D9"
-      "Rh": "\u1FEC"
-      "rh": "\u1FE5"
-      "r\u0332": "\u03F1"
-      "r\u0333": "\u03FC"
-      "R": "\u03A1"
-      "r": "\u03C1"
-      "S\uFE20\u0332S\uFE21\u0332": "\u0372"
-      "s\uFE20\u0332s\uFE21\u0332": "\u0373"
-      "S\uFE20H\uFE21": "\u03F7"
-      "s\uFE20h\uFE21": "\u03F8"
-      "S\uFE20S\uFE21": "\u03E1"
-      "s\uFE20s\uFE21": "\u03E0"
-      "S\u030C": "\u03E2"
-      "s\u030C": "\u03E3"
-      "S\u0323": "\u03FA"
-      "s\u0323": "\u03FB"
-      "S": "\u03F9"
-      "s": "\u03F2"
-      "S": "\u03A3"
-      "%s": "\u03C2"
-      "s": "\u03C3"
-      "T\u0333H\u0333": "\u03F4"
-      "t\u0333h\u0333": "\u03D1"
-      "Th": "\u0398"
-      "th": "\u03B8"
-      "T\u0323i": "\u03EE"
-      "t\u0323i": "\u03EF"
-      "T": "\u03A4"
-      "t": "\u03C4"
-      "I": "\u0399"
-      "i": "\u03B9"
-      "\u0020\u0301": "\u0384"
-      "\u0020\u0308\u0301": "\u0385"
-      ";\u0333": "\u0387"
-      "\u02B9": "\u0374"
-      "\u0326": "\u0375"
-      "\u0328": "\u037A"
-      "V": "\u0392"
-      "v": "\u03B2"
-      "W\u0323": "\u0376"
-      "w\u0323": "\u0377"
-      "X": "\u039E"
-      "x": "\u03BE"
-      "Y\u0301\u0333": "\u03D3"
-      "Y\u0301": "\u038E"
-      "y\u0301": "\u03CD"
-      "Y\u0308\u0333": "\u03D4"
-      "y\u0308\u0301": "\u03B0"
-      "Y\u0308": "\u03AB"
-      "y\u0308": "\u03CB"
-      "Y\u0333": "\u03D2"
-      "Y": "\u03A5"
-      "Ui": "\u03A5\u03B9"
-      "Hui": "\u03A5\u1F31"
-      "y": "\u03C5"
-      "ui": "\u03C5\u03B9"
-      "hui": "\u03C5\u1F31"
-      "Z": "\u0396"
-      "z": "\u03B6"
+  map:
+    ".)\u0333": "\u03FF"
+    ".)": "\u037D"
+    "?\u0333": "\u037E"
+    "?": "\u037E"
+    "\"\u0332": "\u201C"
+    "\"\u0333": "\u201D"
+    "'\u0332": "\u2018"
+    "'\u0333": "\u2019"
+    "(.\u0333": "\u03FE"
+    "(.": "\u037C"
+    "|)\u0333": "\u03FD"
+    "|)": "\u037B"
+    # Left pointing double angle quotation mark
+    "\u003C\u003C": "\u00AB"
+    # Right pointing double angle quotation mark
+    "\u003E\u003E": "\u00BB"
+    "6\u0332": "\u03DB"
+    "6\u0333": "\u03DA"
+    "Au": "\u0391\u03C5"
+    "au": "\u03B1\u03C5"
+    "a\u0301": "\u03AC"
+    "B": "\u0392"
+    "b": "\u03B2"
+    "b\u0333": "\u03D0"
+    "Ch": "\u03A7"
+    "ch": "\u03C7"
+    "c\u030C": "\u03EB"
+    "\u1E0E": "\u039D\u03C4"
+    "\u1E0F": "\u03BD\u03C4"
+    "D": "\u0394"
+    "d": "\u03B4"
+    "Eu": "\u0395\u03C5"
+    "eu": "\u03B5\u03C5"
+    "E\u0301": "\u0388"
+    "e\u0301": "\u03AD"
+    "\u0113\u0301": "\u03AE"
+    # "\u0112\u0301": "\u0389\u0314"
+    "\u0112\u0301": "\u0389"
+    "\u0112": "\u0397"
+    "\u0112u": "\u0397\u03C5"
+    "\u0113": "\u03B7"
+    "\u0113u": "\u03B7\u03C5"
+    "e\u0332": "\u03F5"
+    "e\u0333": "\u03F6"
+    "F": "\u03E4"
+    "f": "\u03E5"
+    # "G\u0332": "\u03DC"
+    "W": "\u03DC"
+    # "g\u0332": "\u03DD"
+    "w": "\u03DD"
+    "Ha\u0301": "\u0386\u0314"
+    "ha\u0301": "\u03AC\u0314"
+    "Ha": "\u1F09"
+    "ha": "\u03B1\u0314"
+    "A": "\u0391"
+    "a": "\u03B1"
+    "h\u0113\u0301": "\u03AE\u0314"
+    "He\u0301": "\u0388\u0314"
+    "he\u0301": "\u03AD\u0314"
+    "H\u0113": "\u1F29"
+    "H\u0113u": "\u1F29\u03C5"
+    "h\u0113": "\u1F21"
+    "h\u0113u": "\u03B7\u1F51"
+    # "h\u0113u": "\u1F21\u13C5"  # FIXME this looks wrong.
+    "He": "\u1F19"
+    "he": "\u1F11"
+    "E": "\u0395"
+    "e": "\u03B5"
+    "Hi\u0301": "\u038A\u0314"
+    "hi\u0301": "\u03AF\u0314"
+    "Hi": "\u1F39"
+    "hi": "\u1F31"
+    # "Ho\u0301": "\u038F\u0314"
+    "Ho\u0301": "\u038C\u0314"
+    "h\u014D": "\u1F61"
+    "H\u014D": "\u1F69"
+    "Ho": "\u1F49"
+    "ho": "\u1F41"
+    "H\u0307": "\u03E8"
+    "h\u0307": "\u03E9"
+    "H\u0323": "\u0370"
+    "h\u0323": "\u0371"
+    "H\u0332": "\u03E6"
+    "h\u0332": "\u03E7"
+    "Hy\u0301": "\u038E\u0314"
+    "Hy": "\u1F59"
+    "hy": "\u1F51"
+    "Iu": "\u0399\u03C5"
+    "iu": "\u03B9\u03C5"
+    "I\u0301": "\u038A"
+    "i\u0301": "\u03AF"
+    "I\u0308": "\u03AA"
+    "i\u0308\u0301": "\u0390"
+    "i\u0308": "\u03CA"
+    "J": "\u037F"
+    "j": "\u03F3"
+    "K\u0323y": "\u03EC"
+    "k\u0323y": "\u03ED"
+    "K\u0326": "\u03CF"
+    "k\u0326": "\u03D7"
+    "K\u0332": "\u03DE"
+    # "k\u0332": "\u03DF"  # FIXME ambiguous.
+    "k\u0332": "\u03F0"
+    "L": "\u039B"
+    "l": "\u03BB"
+    "M": "\u039C"
+    "m": "\u03BC"
+    "nch": "\u03B3\u03C7"
+    "ng": "\u03B3\u03B3"
+    "%nk%": "\u03B3\u03BA"
+    "nx": "\u03B3\u03BE"
+    "No\u0332": "\u2116"
+    "N": "\u039D"
+    "n": "\u03BD"
+    "K": "\u039A"
+    "k": "\u03BA"
+    "G": "\u0393"
+    "g": "\u03B3"
+    "Ou": "\u039F\u03C5"
+    "ou": "\u03BF\u03C5"
+    "O\u0301": "\u038C"
+    "o\u0301": "\u03CC"
+    "\u014C\u0301": "\u038F"
+    "\u014D\u0301": "\u03CE"
+    "\u014C": "\u03A9"
+    "\u014Cu": "\u03A9\u03C5"
+    "\u014D": "\u03C9"
+    "\u014Du": "\u03D9\u03C5"
+    "O": "\u039F"
+    "o": "\u03BF"
+    "Ph": "\u03A6"
+    "ph": "\u03C6"
+    "Ps": "\u03A8"
+    "ps": "\u03C8"
+    "p\u0333h\u0333": "\u03D5"
+    "p\u0333": "\u03D6"
+    "P": "\u03A0"
+    "p": "\u03C0"
+    "Ḳ": "\u03D8"
+    "ḳ": "\u03D9"
+    "Rh": "\u1FEC"
+    "rh": "\u1FE5"
+    "r\u0332": "\u03F1"
+    "r\u0333": "\u03FC"
+    "R": "\u03A1"
+    "r": "\u03C1"
+    "S\uFE20\u0332S\uFE21\u0332": "\u0372"
+    "s\uFE20\u0332s\uFE21\u0332": "\u0373"
+    "S\uFE20H\uFE21": "\u03F7"
+    "s\uFE20h\uFE21": "\u03F8"
+    "S\uFE20S\uFE21": "\u03E1"
+    "s\uFE20s\uFE21": "\u03E0"
+    "S\u030C": "\u03E2"
+    "s\u030C": "\u03E3"
+    "S\u0323": "\u03FA"
+    "s\u0323": "\u03FB"
+    # "S": "\u03F9"  # FIXME ambiguous.
+    "S": "\u03A3"
+    # "s": "\u03F2"  # FIXME ambiguous.
+    "%s": "\u03C2"
+    "s": "\u03C3"
+    "T\u0333H\u0333": "\u03F4"
+    "t\u0333h\u0333": "\u03D1"
+    "Th": "\u0398"
+    "th": "\u03B8"
+    "T\u0323i": "\u03EE"
+    "t\u0323i": "\u03EF"
+    "T": "\u03A4"
+    "t": "\u03C4"
+    "I": "\u0399"
+    "i": "\u03B9"
+    "\u0020\u0301": "\u0384"
+    "\u0020\u0308\u0301": "\u0385"
+    ";\u0333": "\u0387"
+    "\u02B9": "\u0374"
+    "\u0326": "\u0375"
+    "\u0328": "\u037A"
+    "V": "\u0392"
+    "v": "\u03B2"
+    "W\u0323": "\u0376"
+    "w\u0323": "\u0377"
+    "X": "\u039E"
+    "x": "\u03BE"
+    "Y\u0301\u0333": "\u03D3"
+    "Y\u0301": "\u038E"
+    "y\u0301": "\u03CD"
+    "Y\u0308\u0333": "\u03D4"
+    "y\u0308\u0301": "\u03B0"
+    "Y\u0308": "\u03AB"
+    "y\u0308": "\u03CB"
+    "Y\u0333": "\u03D2"
+    "Y": "\u03A5"
+    "Ui": "\u03A5\u03B9"
+    "Hui": "\u03A5\u1F31"
+    "y": "\u03C5"
+    "ui": "\u03C5\u03B9"
+    "hui": "\u03C5\u1F31"
+    "Z": "\u0396"
+    "z": "\u03B6"

+ 16 - 0
scriptshifter/tables/data/greek_modern.yml

@@ -12,3 +12,19 @@ roman_to_script:
   map:
     "V": "\u0392"
     "v": "\u03B2"
+    "Ha": "\u0391"
+    "ha": "\u03B1"
+    "He": "\u0395"
+    "he": "\u03B5"
+    "Hi": "\u0399"
+    "hi": "\u03B9"
+    "Ho": "\u039F"
+    "ho": "\u03BF"
+    "Hou": "\u039F\u03C5"
+    "hou": "\u03BF\u03C5"
+    "H\u014D": "\u03A9"
+    "h\u014D": "\u03C9"
+    "Hy": "\u03A5"
+    "Hui": "\u03A5\u03B9"
+    "hy": "\u03C5"
+    "hui": "\u03C5\u03B9"