summaryrefslogtreecommitdiff
path: root/lib/utf8.c
blob: acc10c4627eb501aa03f570fbc023db6703a4b87 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include <stdlib.h>
#include <stdio.h>
#include <memory.h>
#include "utf8.h"

static char utf8buf[16];

void writeUTF8(unsigned int charnum, char*dest)
{
    dest[0] = 0;
    if(charnum < 0x80) {
	dest[0] = charnum;
	dest[1] = 0;
    } else if(charnum <0x800) {
	/* 0000 0080-0000 07FF   110xxxxx 10xxxxxx */
	dest[0] = 0xc0 | (charnum >> 6);
	dest[1] = 0x80 | (charnum & 0x3f);
	dest[2] = 0;
    } else if(charnum < 0x10000) {
	/* 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx */
	dest[0] = 0xe0 | (charnum >> 12);
	dest[1] = 0x80 |((charnum >> 6)&0x3f);
	dest[2] = 0x80 |((charnum     )&0x3f);
	dest[3] = 0;
    } else if(charnum < 0x200000) {
	/* 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	dest[0] = 0xf0 | (charnum >> 18);
	dest[1] = 0x80 |((charnum >> 12)&0x3f);
	dest[2] = 0x80 |((charnum >> 6 )&0x3f);
	dest[3] = 0x80 |((charnum      )&0x3f);
	dest[4] = 0;
    } else if(charnum < 0x4000000) {
	/* 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
	dest[0] = 0xf8 | (charnum >> 24);
	dest[1] = 0x80 |((charnum >> 18)&0x3f);
	dest[2] = 0x80 |((charnum >> 12)&0x3f);
	dest[3] = 0x80 |((charnum >> 6 )&0x3f);
	dest[4] = 0x80 |((charnum      )&0x3f);
	dest[5] = 0;
    } else if(charnum < 0x80000000) {
	/* 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx */
	dest[0] = 0xfc | (charnum >> 30);
	dest[1] = 0x80 |((charnum >> 24)&0x3f);
	dest[2] = 0x80 |((charnum >> 18)&0x3f);
	dest[3] = 0x80 |((charnum >> 12)&0x3f);
	dest[4] = 0x80 |((charnum >> 6 )&0x3f);
	dest[5] = 0x80 |((charnum      )&0x3f);
	dest[6] = 0;
    } else {
	fprintf(stderr, "Illegal character: 0x%08x\n", charnum);
	dest[0] = 0;
    }
}

char* getUTF8(unsigned int charnum)
{
    memset(utf8buf, 0, sizeof(utf8buf));
    writeUTF8(charnum, utf8buf);
    return utf8buf;
}