Hacker News new | ask | show | jobs
by voisine 6229 days ago
UTF-8 encoding in 6 lines, from ezxml:

    if (c < 0x80) *(s++) = c; // US-ASCII subset
    else { // multi-byte UTF-8 sequence
        for (b = 0, d = c; d; d /= 2) b++; // number of bits in c
        b = (b - 2) / 5; // number of bytes in payload
        *(s++) = (0xFF << (7 - b)) | (c >> (6 * b)); // head
        while (b) *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F); // payload
    }