HEX
Server: Apache/2.4.65 (Debian)
System: Linux kubikelcreative 5.10.0-35-amd64 #1 SMP Debian 5.10.237-1 (2025-05-19) x86_64
User: www-data (33)
PHP: 8.4.13
Disabled: NONE
Upload Files
File: //usr/share/nodejs/iconv-lite/generation/research/gen-normalization.js
// This script generates unicode normalization data.

var utils = require("../utils"),
    errTo = require("errto"),
    async = require("async");

var baseUrl = "http://www.unicode.org/Public/6.3.0/ucd/";

async.parallel({
    data:       utils.getFile.bind(null, baseUrl + "UnicodeData.txt"),
    exclusions: utils.getFile.bind(null, baseUrl + "CompositionExclusions.txt")
}, errTo(console.log, function(data) {

    var features = {};
    utils.parseText(data.data, ";").map(function(a) {
        var ch = parseInt(a[0], 16);
        var combiningClass = parseInt(a[3], 10) || 0;
        var decompStr = a[5].trim();
        var canonical, decomp;

        if (decompStr.length > 0) {
            decomp = decompStr.split(" ").map(function(s) {return parseInt(s, 16)});;
            canonical = true;
            if (isNaN(decomp[0])) {  // When first item is a tag (unparsable as int), this is a 'compatibility decomposition'
                canonical = false;
                decomp.shift();
            }
            //console.log(String.fromCharCode(ch), " -> ", decomp.map(function(c) { return String.fromCharCode(c)}).join(" + "), canonical ? "canonical" : "compat");
        }

        if (decomp || combiningClass) {
            features[ch] = {
                decomp: decomp,
                canonical: canonical,
                combiningClass: combiningClass,
            };
        }
    });

    // Process CompositionExclusions.txt
    utils.parseText(data.exclusions).map(function(a) { 
        var ch = parseInt(a[0], 16);
        features[ch].noCompose = true;
    });

    // Exclude Non-Starter Decompositions and Singleton Decompositions (CompositionExclusions.txt parts 3, 4)
    for (var ch in features) {
        var feat = features[ch];
        if (feat.canonical && (feat.decomp.length == 1 || feat.combiningClass || (features[feat.decomp[0]] || {}).combiningClass)) {
            //console.log("Excluded:", (+ch).toString(16));
            feat.noCompose = true;
        }
    }

    // Add Jamo decompositions (see part 3.12 of http://www.unicode.org/versions/Unicode6.3.0/ch03.pdf)
    var LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7, SBase = 0xAC00;
    var LCount = 19, VCount = 21, TCount = 28;

    for (var l = 0; l < LCount; l++)
        for (var v = 0; v < VCount; v++) {
            var lv = l * VCount * TCount + v * TCount + SBase;
            features[lv] = {
                decomp: [l + LBase, v + VBase],
                canonical: true,
                combiningClass: 0
            }

            for (var t = 1; t < TCount; t++)
                features[lv + t] = {
                    decomp: [lv, t + TBase],
                    canonical: true,
                    combiningClass: 0
                };
        }

    // -------------------------------------------------------------------------
    
    function f(ch) { return features[ch] || {combiningClass: 0}; }
    function hex(ch) { return (+ch).toString(16);}

    function decompose(ch, canonical) {
        var feat = f(ch);
        if (feat.decomp && (feat.canonical || !canonical)) {
            return [].concat.apply([], feat.decomp.map(function(c) {return decompose(c, canonical)}));
        } else return [ch];
    }
    /*
    for (var ch in features) {
        [true, false].map(function(can) {
            var arr = decompose(ch, can);
            for (var i = 0; i < arr.length-1; i++)
                if (f(arr[i]).combiningClass > f(arr[i+1]).combiningClass)
                    console.log("Err", (+ch).toString(16), can, arr.map(function(ch) {return hex(ch)+"/"+f(ch).combiningClass;}));


        });
    }
    */
    // var asciiString = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'+
    //           ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f';


    // var encodings = require("../../encodings/sbcs-data-generated");
    // for (var encName in encodings) {
    //     var enc = encodings[encName];
    //     if (enc.chars) {
    //         if (enc.chars.length == 128)
    //             enc.chars = asciiString + enc.chars;

    //         var existChars = {};
    //         for (var i = 0; i < enc.chars.length; i++)
    //             existChars[enc.chars.charCodeAt(i)] = true;

    //         for (var i = 0; i < enc.chars.length; i++) {
    //             var charCode = enc.chars.charCodeAt(i);
    //             var feat = f(charCode);
    //             if (feat.decomp && feat.canonical && feat.decomp.length == 2) {
    //                 if (!existChars[feat.decomp[0]])
    //                     console.log("!!", encName, hex(enc.chars.charCodeAt(i)), "->", feat.decomp.map(hex));    
    //                 if (f(feat.decomp[0]).combiningClass != 0 || f(feat.decomp[1]).combiningClass == 0)
    //                     console.log("!!2", encName, hex(enc.chars.charCodeAt(i)), "->", feat.decomp.map(hex));    
    //             }

    //             var decomp = decompose(charCode, true);
    //             if (decomp.length > 2) {

    //                 console.log("!!3", encName, hex(enc.chars.charCodeAt(i)), "->", decomp.map(hex));

    //             }
    //         }


    //     }

    // }

    for (var charCode in features) {
        var feat = f(charCode);
        if (feat.decomp && feat.canonical) {
            if (feat.decomp.length == 1) {
                if (f(feat.decomp[0]).combiningClass != feat.combiningClass)
                    console.log("!!1", hex(charCode), "->", feat.decomp.map(hex));    

            } else if (feat.decomp.length == 2) {
                if (f(feat.decomp[0]).combiningClass != feat.combiningClass) // || f(feat.decomp[1]).combiningClass == 0)
                    console.log("!!2", hex(charCode), "->", feat.decomp.map(hex));    

            } else {
                console.log("comp - not 1 or 2", hex(charCode))
            }
        }
    }
}));