123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- private processNumbers(pinyinString: string, tag: string, code: string): string {
- let outputString = "";
- let useNumVersion = false;
- //useNumVersion is set in specific subfields where we definitely want to treat numbers as numbers
- if ((tag == "245" || tag == "830") && code == "n") {
- useNumVersion = true;
- }
- /*
- * The input string is split, with any space or punctuation character (except for #) as the delimiter.
- * The delimiters will be captured and included in the string of tokens. Only the even-numbered
- * array elements are the true 'tokens', so the code for processing tokens is run only for even
- * values of j.
- */
- let tokens: string[] = pinyinString.split(new RegExp("([^\\P{P}#]|\\s)","u"));
- let numTokenPattern = "^([A-Za-z]+)#([0-9]*)$";
- let numToken_re = new RegExp(numTokenPattern);
- let n = tokens.length
- //this.alert.info(tokens.join("|"),{autoClose: false})
- for (let i = 0; i < n; i++) {
- let toki = tokens[i];
- if (toki.match(numToken_re)) {
- /*
- * When a numerical token (containing #) is reached, the inner loop consumes it and all consecutive numerical tokens
- * found after it. Two versions of the string are maintained. The textVersion is the original pinyin (minus the
- * # suffixes). In the numVersion, characters representing numbers are converted to Arabic numerals. When a
- * non-numerical token (or end of string) is encountered, the string of numerical tokens is evaluated to determine
- * which version should be used in the output string. The outer loop then continues where the inner loop left off.
- */
- let textVersion = "";
- let numVersion = "";
- for (let j = i; j < n; j++) {
- let tokj = tokens[j];
- /* a token without # (or the end of string) is reached */
- if ((j % 2 == 0 && !tokj.match(numToken_re)) || j == n - 1) {
- //If this runs, then we are on the last token and it is numeric. Add text after # (if present) to numerical version
- let m = tokj.match(numToken_re);
- if (m) {
- textVersion += m[1]
- if (m[2] == "") {
- numVersion += m[1];
- } else {
- numVersion += m[2];
- }
- } else if (j == n - 1) {
- //if last token is non-numerical, just tack it on.
- textVersion += tokj;
- numVersion += tokj;
- } else if (textVersion.length > 0 && numVersion.length > 0) {
- //if not at end of string yet and token is non-numerical, remove the last delimiter that was appended
- //(outer loop will pick up at this point)
- textVersion = textVersion.substring(0, textVersion.length - 1);
- numVersion = numVersion.substring(0, numVersion.length - 1);
- }
- //evaluate numerical string that has been constructed so far
- //use num version for ordinals and date strings
- if (numVersion.match(/^di [0-9]/i) ||
- numVersion.match(/[0-9] [0-9] [0-9] [0-9]/) ||
- numVersion.match(/[0-9]+ nian [0-9]+ yue/i) ||
- numVersion.match(/"[0-9]+ yue [0-9]+ ri/i) ||
- useNumVersion
- ) {
- useNumVersion = true;
- /*
- * At this point, string may contain literal translations of Chinese numerals
- * Convert these to Arabic numerals (for example "2 10 7" = "27").
- */
- while (numVersion.match(/[0-9] 10+/) || numVersion.match(/[1-9]0+ [1-9]/)) {
- m = numVersion.match(/([0-9]+) ([1-9]0+)/);
- if (m) {
- let sum = Number(m[1]) * Number(m[2]);
- numVersion = numVersion.replace(/[0-9]+ [1-9]0+/, String(sum));
- } else {
- let mb = numVersion.match(/([1-9]0+) ([0-9]+)/);
- if (mb)
- {
- let sumb = Number(mb[1]) + Number(mb[2]);
- numVersion = numVersion.replace(/[1-9]0+ [0-9]+/, String(sumb));
- }
- else
- {
- break;
- }
- }
- }
- //A few other tweaks
- numVersion = numVersion.replace(/([0-9]) ([0-9]) ([0-9]) ([0-9])/g, "$1$2$3$4");
- if ((tag == "245" || tag == "830") && code == "n") {
- while (numVersion.match(/[0-9] [0-9]/)) {
- numVersion = numVersion.replace(/([0-9]) ([0-9])/, "$1$2");
- }
- }
- }
- if (useNumVersion)
- {
- outputString += numVersion;
- }
- else
- {
- outputString += textVersion;
- }
- //if the end of the string is not reached, backtrack to the delimiter after the last numerical token
- //(i.e. two tokens ago)
- if (j < n - 1)
- {
- i = j - 2;
- }
- else //we are at the end of the string, so we are done!
- {
- i = j;
- }
- break;
- }
- //this is run when we are not yet at the end of the string and have not yet reached a non-numerical token
- //This is identical to the code that is run above when the last token is numeric.
- if (j % 2 == 0)
- {
- let m = tokj.match(numToken_re);
- textVersion += m[1];
- if (m[2]== "")
- {
- numVersion += m[1];
- }
- else
- {
- numVersion += m[2];
- }
- }
- else //a delimiter, just tack it on.
- {
- textVersion += tokj;
- numVersion += tokj;
- }
- }
- }
- else // the outer loop has encountered a non-numeric token or delimiter, just tack it on.
- {
- outputString += toki;
- }
- }
- return outputString;
- }
|