|
@@ -0,0 +1,144 @@
|
|
|
+private processNumbers(pinyinString: string, tag: string, code: string): string {
|
|
|
+ let outputString = "";
|
|
|
+ let useNumVersion = false;
|
|
|
+ //useNumVersion is set in specific subfields where we definitely want to treat numbers as numbers
|
|
|
+ if ((tag == "245" || tag == "830") && code == "n") {
|
|
|
+ useNumVersion = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The input string is split, with any space or punctuation character (except for #) as the delimiter.
|
|
|
+ * The delimiters will be captured and included in the string of tokens. Only the even-numbered
|
|
|
+ * array elements are the true 'tokens', so the code for processing tokens is run only for even
|
|
|
+ * values of j.
|
|
|
+ */
|
|
|
+ let tokens: string[] = pinyinString.split(new RegExp("([^\\P{P}#]|\\s)","u"));
|
|
|
+ let numTokenPattern = "^([A-Za-z]+)#([0-9]*)$";
|
|
|
+ let numToken_re = new RegExp(numTokenPattern);
|
|
|
+ let n = tokens.length
|
|
|
+ //this.alert.info(tokens.join("|"),{autoClose: false})
|
|
|
+ for (let i = 0; i < n; i++) {
|
|
|
+ let toki = tokens[i];
|
|
|
+ if (toki.match(numToken_re)) {
|
|
|
+ /*
|
|
|
+ * When a numerical token (containing #) is reached, the inner loop consumes it and all consecutive numerical tokens
|
|
|
+ * found after it. Two versions of the string are maintained. The textVersion is the original pinyin (minus the
|
|
|
+ * # suffixes). In the numVersion, characters representing numbers are converted to Arabic numerals. When a
|
|
|
+ * non-numerical token (or end of string) is encountered, the string of numerical tokens is evaluated to determine
|
|
|
+ * which version should be used in the output string. The outer loop then continues where the inner loop left off.
|
|
|
+ */
|
|
|
+ let textVersion = "";
|
|
|
+ let numVersion = "";
|
|
|
+ for (let j = i; j < n; j++) {
|
|
|
+ let tokj = tokens[j];
|
|
|
+ /* a token without # (or the end of string) is reached */
|
|
|
+ if ((j % 2 == 0 && !tokj.match(numToken_re)) || j == n - 1) {
|
|
|
+ //If this runs, then we are on the last token and it is numeric. Add text after # (if present) to numerical version
|
|
|
+ let m = tokj.match(numToken_re);
|
|
|
+ if (m) {
|
|
|
+ textVersion += m[1]
|
|
|
+ if (m[2] == "") {
|
|
|
+ numVersion += m[1];
|
|
|
+ } else {
|
|
|
+ numVersion += m[2];
|
|
|
+ }
|
|
|
+ } else if (j == n - 1) {
|
|
|
+ //if last token is non-numerical, just tack it on.
|
|
|
+ textVersion += tokj;
|
|
|
+ numVersion += tokj;
|
|
|
+ } else if (textVersion.length > 0 && numVersion.length > 0) {
|
|
|
+ //if not at end of string yet and token is non-numerical, remove the last delimiter that was appended
|
|
|
+ //(outer loop will pick up at this point)
|
|
|
+ textVersion = textVersion.substring(0, textVersion.length - 1);
|
|
|
+ numVersion = numVersion.substring(0, numVersion.length - 1);
|
|
|
+ }
|
|
|
+ //evaluate numerical string that has been constructed so far
|
|
|
+ //use num version for ordinals and date strings
|
|
|
+ if (numVersion.match(/^di [0-9]/i) ||
|
|
|
+ numVersion.match(/[0-9] [0-9] [0-9] [0-9]/) ||
|
|
|
+ numVersion.match(/[0-9]+ nian [0-9]+ yue/i) ||
|
|
|
+ numVersion.match(/"[0-9]+ yue [0-9]+ ri/i) ||
|
|
|
+ useNumVersion
|
|
|
+ ) {
|
|
|
+ useNumVersion = true;
|
|
|
+ /*
|
|
|
+ * At this point, string may contain literal translations of Chinese numerals
|
|
|
+ * Convert these to Arabic numerals (for example "2 10 7" = "27").
|
|
|
+ */
|
|
|
+
|
|
|
+ while (numVersion.match(/[0-9] 10+/) || numVersion.match(/[1-9]0+ [1-9]/)) {
|
|
|
+ m = numVersion.match(/([0-9]+) ([1-9]0+)/);
|
|
|
+ if (m) {
|
|
|
+ let sum = Number(m[1]) * Number(m[2]);
|
|
|
+ numVersion = numVersion.replace(/[0-9]+ [1-9]0+/, String(sum));
|
|
|
+ } else {
|
|
|
+ let mb = numVersion.match(/([1-9]0+) ([0-9]+)/);
|
|
|
+ if (mb)
|
|
|
+ {
|
|
|
+ let sumb = Number(mb[1]) + Number(mb[2]);
|
|
|
+ numVersion = numVersion.replace(/[1-9]0+ [0-9]+/, String(sumb));
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //A few other tweaks
|
|
|
+ numVersion = numVersion.replace(/([0-9]) ([0-9]) ([0-9]) ([0-9])/g, "$1$2$3$4");
|
|
|
+ if ((tag == "245" || tag == "830") && code == "n") {
|
|
|
+ while (numVersion.match(/[0-9] [0-9]/)) {
|
|
|
+ numVersion = numVersion.replace(/([0-9]) ([0-9])/, "$1$2");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (useNumVersion)
|
|
|
+ {
|
|
|
+ outputString += numVersion;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ outputString += textVersion;
|
|
|
+ }
|
|
|
+ //if the end of the string is not reached, backtrack to the delimiter after the last numerical token
|
|
|
+ //(i.e. two tokens ago)
|
|
|
+ if (j < n - 1)
|
|
|
+ {
|
|
|
+ i = j - 2;
|
|
|
+ }
|
|
|
+ else //we are at the end of the string, so we are done!
|
|
|
+ {
|
|
|
+ i = j;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ //this is run when we are not yet at the end of the string and have not yet reached a non-numerical token
|
|
|
+ //This is identical to the code that is run above when the last token is numeric.
|
|
|
+ if (j % 2 == 0)
|
|
|
+ {
|
|
|
+ let m = tokj.match(numToken_re);
|
|
|
+ textVersion += m[1];
|
|
|
+ if (m[2]== "")
|
|
|
+ {
|
|
|
+ numVersion += m[1];
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ numVersion += m[2];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else //a delimiter, just tack it on.
|
|
|
+ {
|
|
|
+ textVersion += tokj;
|
|
|
+ numVersion += tokj;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else // the outer loop has encountered a non-numeric token or delimiter, just tack it on.
|
|
|
+ {
|
|
|
+ outputString += toki;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return outputString;
|
|
|
+ }
|