]> git.gir.st - base65536.git/blob - base65536.c
Code Import
[base65536.git] / base65536.c
1 /* base65536 semi-library.
2 (C) 2016 Tobias Girstmair, http://isticktoit.net/
3 Released under the GNU GPL v3. See LICENSE for details. */
4
5 #include <stdlib.h>
6 #include <string.h>
7 #include "base65536.h"
8
9
10 struct block b65k_tree[] = {
11 {128, 132608},
12 {64, 29952},
13 {192, 148992},
14 {32, 21760},
15 {96, 38144},
16 {160, 140800},
17 {224, 157184},
18 {16, 17408},
19 {48, 25856},
20 {80, 34048},
21 {112, 73984},
22 {144, 136704},
23 {176, 144896},
24 {208, 153088},
25 {240, 161280},
26 {8, 15360},
27 {24, 19456},
28 {40, 23808},
29 {56, 27904},
30 {72, 32000},
31 {88, 36096},
32 {104, 40192},
33 {120, 92160},
34 {136, 134656},
35 {152, 138752},
36 {168, 142848},
37 {184, 146944},
38 {200, 151040},
39 {216, 155136},
40 {232, 159232},
41 {248, 163328},
42 {4, 14336},
43 {12, 16384},
44 {20, 18432},
45 {28, 20736},
46 {36, 22784},
47 {44, 24832},
48 {52, 26880},
49 {60, 28928},
50 {68, 30976},
51 {76, 33024},
52 {84, 35072},
53 {92, 37120},
54 {100, 39168},
55 {108, 41728},
56 {116, 78336},
57 {124, 131584},
58 {132, 133632},
59 {140, 135680},
60 {148, 137728},
61 {156, 139776},
62 {164, 141824},
63 {172, 143872},
64 {180, 145920},
65 {188, 147968},
66 {196, 150016},
67 {204, 152064},
68 {212, 154112},
69 {220, 156160},
70 {228, 158208},
71 {236, 160256},
72 {244, 162304},
73 {252, 164352},
74 {2, 13824},
75 {6, 14848},
76 {10, 15872},
77 {14, 16896},
78 {18, 17920},
79 {22, 18944},
80 {26, 20224},
81 {30, 21248},
82 {34, 22272},
83 {38, 23296},
84 {42, 24320},
85 {46, 25344},
86 {50, 26368},
87 {54, 27392},
88 {58, 28416},
89 {62, 29440},
90 {66, 30464},
91 {70, 31488},
92 {74, 32512},
93 {78, 33536},
94 {82, 34560},
95 {86, 35584},
96 {90, 36608},
97 {94, 37632},
98 {98, 38656},
99 {102, 39680},
100 {106, 41216},
101 {110, 67072},
102 {114, 77824},
103 {118, 82944},
104 {122, 131072},
105 {126, 132096},
106 {130, 133120},
107 {134, 134144},
108 {138, 135168},
109 {142, 136192},
110 {146, 137216},
111 {150, 138240},
112 {154, 139264},
113 {158, 140288},
114 {162, 141312},
115 {166, 142336},
116 {170, 143360},
117 {174, 144384},
118 {178, 145408},
119 {182, 146432},
120 {186, 147456},
121 {190, 148480},
122 {194, 149504},
123 {198, 150528},
124 {202, 151552},
125 {206, 152576},
126 {210, 153600},
127 {214, 154624},
128 {218, 155648},
129 {222, 156672},
130 {226, 157696},
131 {230, 158720},
132 {234, 159744},
133 {238, 160768},
134 {242, 161792},
135 {246, 162816},
136 {250, 163840},
137 {254, 164864},
138 {0, 13312},
139 {3, 14080},
140 {5, 14592},
141 {7, 15104},
142 {9, 15616},
143 {11, 16128},
144 {13, 16640},
145 {15, 17152},
146 {17, 17664},
147 {19, 18176},
148 {21, 18688},
149 {23, 19200},
150 {25, 19968},
151 {27, 20480},
152 {29, 20992},
153 {31, 21504},
154 {33, 22016},
155 {35, 22528},
156 {37, 23040},
157 {39, 23552},
158 {41, 24064},
159 {43, 24576},
160 {45, 25088},
161 {47, 25600},
162 {49, 26112},
163 {51, 26624},
164 {53, 27136},
165 {55, 27648},
166 {57, 28160},
167 {59, 28672},
168 {61, 29184},
169 {63, 29696},
170 {65, 30208},
171 {67, 30720},
172 {69, 31232},
173 {71, 31744},
174 {73, 32256},
175 {75, 32768},
176 {77, 33280},
177 {79, 33792},
178 {81, 34304},
179 {83, 34816},
180 {85, 35328},
181 {87, 35840},
182 {89, 36352},
183 {91, 36864},
184 {93, 37376},
185 {95, 37888},
186 {97, 38400},
187 {99, 38912},
188 {101, 39424},
189 {103, 39936},
190 {105, 40448},
191 {107, 41472},
192 {109, 42240},
193 {111, 73728},
194 {113, 74240},
195 {115, 78080},
196 {117, 78592},
197 {119, 83200},
198 {121, 92416},
199 {123, 131328},
200 {125, 131840},
201 {127, 132352},
202 {129, 132864},
203 {131, 133376},
204 {133, 133888},
205 {135, 134400},
206 {137, 134912},
207 {139, 135424},
208 {141, 135936},
209 {143, 136448},
210 {145, 136960},
211 {147, 137472},
212 {149, 137984},
213 {151, 138496},
214 {153, 139008},
215 {155, 139520},
216 {157, 140032},
217 {159, 140544},
218 {161, 141056},
219 {163, 141568},
220 {165, 142080},
221 {167, 142592},
222 {169, 143104},
223 {171, 143616},
224 {173, 144128},
225 {175, 144640},
226 {177, 145152},
227 {179, 145664},
228 {181, 146176},
229 {183, 146688},
230 {185, 147200},
231 {187, 147712},
232 {189, 148224},
233 {191, 148736},
234 {193, 149248},
235 {195, 149760},
236 {197, 150272},
237 {199, 150784},
238 {201, 151296},
239 {203, 151808},
240 {205, 152320},
241 {207, 152832},
242 {209, 153344},
243 {211, 153856},
244 {213, 154368},
245 {215, 154880},
246 {217, 155392},
247 {219, 155904},
248 {221, 156416},
249 {223, 156928},
250 {225, 157440},
251 {227, 157952},
252 {229, 158464},
253 {231, 158976},
254 {233, 159488},
255 {235, 160000},
256 {237, 160512},
257 {239, 161024},
258 {241, 161536},
259 {243, 162048},
260 {245, 162560},
261 {247, 163072},
262 {249, 163584},
263 {251, 164096},
264 {253, 164608},
265 {255, 165120},
266 {B65K_EOF, 5376},
267 {1, 13568}
268 }; /* autogenerated with generate_struct.c */
269
270 struct block* get_block_by_index (struct block* tree, int index, int len, int pos) {
271 if (pos >= len) return NULL;
272 if (tree[pos].num == index) return &tree[pos];
273 if (index < tree[pos].num) return get_block_by_index (tree, index, len, 2*pos+1);
274 if (index > tree[pos].num) return get_block_by_index (tree, index, len, 2*pos+2);
275
276 return NULL; //will never be reached
277 }
278
279 struct block* get_block_by_start (struct block* tree, int start, int len, int pos) {
280 if (pos >= len) return NULL;
281 if (tree[pos].start == start) return &tree[pos];
282 if (start < tree[pos].start) return get_block_by_start (tree, start, len, 2*pos+1);
283 if (start > tree[pos].start) return get_block_by_start (tree, start, len, 2*pos+2);
284
285 return NULL; //will never be reached
286 }
287
288 //convenience functions (hardcoded tree-struct):
289 int tree_find_index (int block) {
290 struct block* node = get_block_by_start (b65k_tree, block, B65536_TREE_SIZE, 0);
291 if (node == NULL) return NOT_FOUND;
292 return node->num;
293 }
294
295 int tree_find_block(int index) {
296 struct block* node = get_block_by_index (b65k_tree, index, B65536_TREE_SIZE, 0);
297 if (node == NULL) return NOT_FOUND;
298 return node->start;
299 }
300
301
302 /* base65536_encode_char: convert two bytes to base65536 (unicode-codepoint).
303 in_buf: bytes to encode casted to int. [1] may be EOF.
304 is_eof: if true, b2 will be B65K_EOF.
305 returns: number of bytes processed.
306 */
307 int base65536_encode_char (const int* in_buf) {
308 int b1 = in_buf[0];
309 int b2 = in_buf[1];
310
311 return tree_find_block (b2) + b1;
312 }
313
314 /* base65536_decode_char: decode base65536-codepoint to bytes.
315 out_buf: expects an array of size of two integers to write to.
316 returns: number of bytes processed, zero on error.
317 */
318 int base65536_decode_char (const int in_cp, int* out_buf) {
319 int b1, b2;
320
321 b1 = in_cp & ((1 << 8) -1);
322 b2 = tree_find_index (in_cp-b1);
323
324 out_buf[0] = b1;
325 out_buf[1] = b2;
326
327 if (b2 == NOT_FOUND) return 0;
328 return (b2 != B65K_EOF) + 1;
329 }
330
331 /* codepoint_to_utf8: utf8-encode given codepoint.
332 unicode: codepoint to covert.
333 buf: buffer to write to; expected to be of size 5.
334 returns: number of bytes written or zero on error.
335 */
336 int codepoint_to_utf8 (int unicode, char* buf) {
337 if (unicode < 0x80) {
338 buf[0] = unicode;
339 buf[1] = '\0';
340 return 1;
341 } else if (unicode < 0x800) {
342 buf[0] = 0xc0 | (unicode>> 6 & 0x1f);
343 buf[1] = 0x80 | (unicode & 0x3f);
344 buf[2] = '\0';
345 return 2;
346 } else if (unicode >=0xd800 && unicode < 0x8000) {
347 return 0; /*is invalid code block*/
348 } else if (unicode < 0x10000) {
349 buf[0] = 0xe0 | (unicode>>12 & 0x0f);
350 buf[1] = 0x80 | (unicode>> 6 & 0x3f);
351 buf[2] = 0x80 | (unicode & 0x3f);
352 buf[3] = '\0';
353 return 3;
354 } else {
355 buf[0] = 0xf0 | (unicode>>18 & 0x07);
356 buf[1] = 0x80 | (unicode>>12 & 0x3f);
357 buf[2] = 0x80 | (unicode>> 6 & 0x3f);
358 buf[3] = 0x80 | (unicode & 0x3f);
359 buf[4] = '\0';
360 return 4;
361
362 }
363 return 0;
364 }
365
366 /* utf8_to_codepoint: decodes a utf-8 character to its unicode-codepoint.
367 buf: buffer to read from, expected to be of size 5.
368 returns: unicode codepoint.
369 WARN: will not detect non-utf8-sequence!
370 */
371 int utf8_to_codepoint (char* buf) {
372 int codepoint = 0; //cummulative
373 int l = 0;
374
375 for (char first = buf[0];;) {
376 //this loop counts the number of consecutive binary 1s at the
377 //start of the byte to determine, how many follow-up bytes
378 //(l-1) will come.
379 if (first>>7) l++;
380 else break;
381 first <<= 1;
382 }
383
384 if (l == 0) { //ascii chars are 01xxxxxx
385 codepoint = buf[0];
386 } else {
387 for (int i = l-1; i; i--) {
388 //take lower 6bits of every byte and stick them end to end.
389 codepoint += (buf[i] & ((1<<6)-1)) << ((l-1)-i)*6;
390 }
391 //add the bits from the first byte to the beginning, but not
392 //the 1s that determine the number of follow-up-bytes
393 codepoint += (buf[0] & ((1<<(7-l))-1)) << ((l-1))*6;
394 }
395 return codepoint;
396 }
Imprint / Impressum