scossu
/
scriptshifter


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
							private processNumbers(pinyinString: string, tag: string, code: string): string {
    let outputString = "";
    let useNumVersion = false;
    //useNumVersion is set in specific subfields where we definitely want to treat numbers as numbers
    if ((tag == "245" || tag == "830") && code == "n") {
       useNumVersion = true;
    }

    /*
     * The input string is split, with any space or punctuation character (except for #) as the delimiter.
     * The delimiters will be captured and included in the string of tokens.  Only the even-numbered
     * array elements are the true 'tokens', so the code for processing tokens is run only for even
     * values of j.
     */
    let tokens: string[] = pinyinString.split(new RegExp("([^\\P{P}#]|\\s)","u"));
    let numTokenPattern = "^([A-Za-z]+)#([0-9]*)$";
    let numToken_re = new RegExp(numTokenPattern);
    let n = tokens.length
    //this.alert.info(tokens.join("|"),{autoClose: false})
    for (let i = 0; i < n; i++) {
        let toki = tokens[i];
        if (toki.match(numToken_re)) {
            /*
             * When a numerical token (containing #) is reached, the inner loop consumes it and all consecutive numerical tokens
             * found after it.  Two versions of the string are maintained.  The textVersion is the original pinyin (minus the
             * # suffixes).  In the numVersion, characters representing numbers are converted to Arabic numerals.  When a
             * non-numerical token (or end of string) is encountered, the string of numerical tokens is evaluated to determine
             * which version should be used in the output string.  The outer loop then continues where the inner loop left off.
             */
            let textVersion = "";
            let numVersion = "";
            for (let j = i; j < n; j++) {
                let tokj = tokens[j];
                /* a token without # (or the end of string) is reached */
                if ((j % 2 == 0 && !tokj.match(numToken_re)) || j == n - 1) {
                    //If this runs, then we are on the last token and it is numeric. Add text after # (if present) to numerical version
                    let m = tokj.match(numToken_re);
                    if (m) {
                        textVersion += m[1]
                        if (m[2] == "") {
                            numVersion += m[1];
                        } else {
                            numVersion += m[2];
                        }
                    } else if (j == n - 1) {
                    //if last token is non-numerical, just tack it on.
                        textVersion += tokj;
                        numVersion += tokj;
                    } else if (textVersion.length > 0 && numVersion.length > 0) {
                    //if not at end of string yet and token is non-numerical, remove the last delimiter that was appended
                    //(outer loop will pick up at this point)
                        textVersion = textVersion.substring(0, textVersion.length - 1);
                        numVersion = numVersion.substring(0, numVersion.length - 1);
                    }
                    //evaluate numerical string that has been constructed so far
                    //use num version for ordinals and date strings
                    if (numVersion.match(/^di [0-9]/i) ||
                        numVersion.match(/[0-9] [0-9] [0-9] [0-9]/) ||
                        numVersion.match(/[0-9]+ nian [0-9]+ yue/i) ||
                        numVersion.match(/"[0-9]+ yue [0-9]+ ri/i) ||
                        useNumVersion
                       ) {
                        useNumVersion = true;
                        /*
                         * At this point, string may contain literal translations of Chinese numerals
                         * Convert these to Arabic numerals (for example "2 10 7" = "27").
                         */

                        while (numVersion.match(/[0-9] 10+/) || numVersion.match(/[1-9]0+ [1-9]/)) {
                            m = numVersion.match(/([0-9]+) ([1-9]0+)/);
                            if (m) {
                                let sum = Number(m[1]) * Number(m[2]);
                                numVersion = numVersion.replace(/[0-9]+ [1-9]0+/, String(sum));
                            } else {
                                let mb = numVersion.match(/([1-9]0+) ([0-9]+)/);
                                if (mb)
                                {
                                    let sumb = Number(mb[1]) + Number(mb[2]);
                                    numVersion = numVersion.replace(/[1-9]0+ [0-9]+/, String(sumb));
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }

                        //A few other tweaks
                        numVersion = numVersion.replace(/([0-9]) ([0-9]) ([0-9]) ([0-9])/g, "$1$2$3$4");
                        if ((tag == "245" || tag == "830") && code == "n") {
                            while (numVersion.match(/[0-9] [0-9]/)) {
                                numVersion = numVersion.replace(/([0-9]) ([0-9])/, "$1$2");
                            }
                        }
                    }
                    if (useNumVersion)
                    {
                        outputString += numVersion;
                    }
                    else
                    {
                        outputString += textVersion;
                    }
                    //if the end of the string is not reached, backtrack to the delimiter after the last numerical token
                    //(i.e. two tokens ago)
                    if (j < n - 1)
                    {
                        i = j - 2;
                    }
                    else //we are at the end of the string, so we are done!
                    {
                        i = j;
                    }
                    break;
                }
                //this is run when we are not yet at the end of the string and have not yet reached a non-numerical token
                //This is identical to the code that is run above when the last token is numeric.
                if (j % 2 == 0)
                {
                    let m = tokj.match(numToken_re);
                    textVersion += m[1];
                    if (m[2]== "")
                    {
                        numVersion += m[1];
                    }
                    else
                    {
                        numVersion += m[2];
                    }
                }
                else //a delimiter, just tack it on.
                {
                    textVersion += tokj;
                    numVersion += tokj;
                }
            }
        }
        else // the outer loop has encountered a non-numeric token or delimiter, just tack it on.
        {
            outputString += toki;
        }
    }
    return outputString;
 }