/* base65536 semi-library. (C) 2016 Tobias Girstmair, http://isticktoit.net/ Released under the GNU GPL v3. See LICENSE for details. */ #include #include #include "base65536.h" struct block b65k_tree[] = { {128, 132608}, {64, 29952}, {192, 148992}, {32, 21760}, {96, 38144}, {160, 140800}, {224, 157184}, {16, 17408}, {48, 25856}, {80, 34048}, {112, 73984}, {144, 136704}, {176, 144896}, {208, 153088}, {240, 161280}, {8, 15360}, {24, 19456}, {40, 23808}, {56, 27904}, {72, 32000}, {88, 36096}, {104, 40192}, {120, 92160}, {136, 134656}, {152, 138752}, {168, 142848}, {184, 146944}, {200, 151040}, {216, 155136}, {232, 159232}, {248, 163328}, {4, 14336}, {12, 16384}, {20, 18432}, {28, 20736}, {36, 22784}, {44, 24832}, {52, 26880}, {60, 28928}, {68, 30976}, {76, 33024}, {84, 35072}, {92, 37120}, {100, 39168}, {108, 41728}, {116, 78336}, {124, 131584}, {132, 133632}, {140, 135680}, {148, 137728}, {156, 139776}, {164, 141824}, {172, 143872}, {180, 145920}, {188, 147968}, {196, 150016}, {204, 152064}, {212, 154112}, {220, 156160}, {228, 158208}, {236, 160256}, {244, 162304}, {252, 164352}, {2, 13824}, {6, 14848}, {10, 15872}, {14, 16896}, {18, 17920}, {22, 18944}, {26, 20224}, {30, 21248}, {34, 22272}, {38, 23296}, {42, 24320}, {46, 25344}, {50, 26368}, {54, 27392}, {58, 28416}, {62, 29440}, {66, 30464}, {70, 31488}, {74, 32512}, {78, 33536}, {82, 34560}, {86, 35584}, {90, 36608}, {94, 37632}, {98, 38656}, {102, 39680}, {106, 41216}, {110, 67072}, {114, 77824}, {118, 82944}, {122, 131072}, {126, 132096}, {130, 133120}, {134, 134144}, {138, 135168}, {142, 136192}, {146, 137216}, {150, 138240}, {154, 139264}, {158, 140288}, {162, 141312}, {166, 142336}, {170, 143360}, {174, 144384}, {178, 145408}, {182, 146432}, {186, 147456}, {190, 148480}, {194, 149504}, {198, 150528}, {202, 151552}, {206, 152576}, {210, 153600}, {214, 154624}, {218, 155648}, {222, 156672}, {226, 157696}, {230, 158720}, {234, 159744}, {238, 160768}, {242, 161792}, {246, 162816}, {250, 163840}, {254, 164864}, {0, 13312}, {3, 14080}, {5, 14592}, {7, 15104}, {9, 15616}, {11, 16128}, {13, 16640}, {15, 17152}, {17, 17664}, {19, 18176}, {21, 18688}, {23, 19200}, {25, 19968}, {27, 20480}, {29, 20992}, {31, 21504}, {33, 22016}, {35, 22528}, {37, 23040}, {39, 23552}, {41, 24064}, {43, 24576}, {45, 25088}, {47, 25600}, {49, 26112}, {51, 26624}, {53, 27136}, {55, 27648}, {57, 28160}, {59, 28672}, {61, 29184}, {63, 29696}, {65, 30208}, {67, 30720}, {69, 31232}, {71, 31744}, {73, 32256}, {75, 32768}, {77, 33280}, {79, 33792}, {81, 34304}, {83, 34816}, {85, 35328}, {87, 35840}, {89, 36352}, {91, 36864}, {93, 37376}, {95, 37888}, {97, 38400}, {99, 38912}, {101, 39424}, {103, 39936}, {105, 40448}, {107, 41472}, {109, 42240}, {111, 73728}, {113, 74240}, {115, 78080}, {117, 78592}, {119, 83200}, {121, 92416}, {123, 131328}, {125, 131840}, {127, 132352}, {129, 132864}, {131, 133376}, {133, 133888}, {135, 134400}, {137, 134912}, {139, 135424}, {141, 135936}, {143, 136448}, {145, 136960}, {147, 137472}, {149, 137984}, {151, 138496}, {153, 139008}, {155, 139520}, {157, 140032}, {159, 140544}, {161, 141056}, {163, 141568}, {165, 142080}, {167, 142592}, {169, 143104}, {171, 143616}, {173, 144128}, {175, 144640}, {177, 145152}, {179, 145664}, {181, 146176}, {183, 146688}, {185, 147200}, {187, 147712}, {189, 148224}, {191, 148736}, {193, 149248}, {195, 149760}, {197, 150272}, {199, 150784}, {201, 151296}, {203, 151808}, {205, 152320}, {207, 152832}, {209, 153344}, {211, 153856}, {213, 154368}, {215, 154880}, {217, 155392}, {219, 155904}, {221, 156416}, {223, 156928}, {225, 157440}, {227, 157952}, {229, 158464}, {231, 158976}, {233, 159488}, {235, 160000}, {237, 160512}, {239, 161024}, {241, 161536}, {243, 162048}, {245, 162560}, {247, 163072}, {249, 163584}, {251, 164096}, {253, 164608}, {255, 165120}, {B65K_EOF, 5376}, {1, 13568} }; /* autogenerated with generate_struct.c */ struct block* get_block_by_index (struct block* tree, int index, int len, int pos) { if (pos >= len) return NULL; if (tree[pos].num == index) return &tree[pos]; if (index < tree[pos].num) return get_block_by_index (tree, index, len, 2*pos+1); if (index > tree[pos].num) return get_block_by_index (tree, index, len, 2*pos+2); return NULL; //will never be reached } struct block* get_block_by_start (struct block* tree, int start, int len, int pos) { if (pos >= len) return NULL; if (tree[pos].start == start) return &tree[pos]; if (start < tree[pos].start) return get_block_by_start (tree, start, len, 2*pos+1); if (start > tree[pos].start) return get_block_by_start (tree, start, len, 2*pos+2); return NULL; //will never be reached } //convenience functions (hardcoded tree-struct): int tree_find_index (int block) { struct block* node = get_block_by_start (b65k_tree, block, B65536_TREE_SIZE, 0); if (node == NULL) return NOT_FOUND; return node->num; } int tree_find_block(int index) { struct block* node = get_block_by_index (b65k_tree, index, B65536_TREE_SIZE, 0); if (node == NULL) return NOT_FOUND; return node->start; } /* base65536_encode_char: convert two bytes to base65536 (unicode-codepoint). in_buf: bytes to encode casted to int. [1] may be EOF. is_eof: if true, b2 will be B65K_EOF. returns: number of bytes processed. */ int base65536_encode_char (const int* in_buf) { int b1 = in_buf[0]; int b2 = in_buf[1]; return tree_find_block (b2) + b1; } /* base65536_decode_char: decode base65536-codepoint to bytes. out_buf: expects an array of size of two integers to write to. returns: number of bytes processed, zero on error. */ int base65536_decode_char (const int in_cp, int* out_buf) { int b1, b2; b1 = in_cp & ((1 << 8) -1); b2 = tree_find_index (in_cp-b1); out_buf[0] = b1; out_buf[1] = b2; if (b2 == NOT_FOUND) return 0; return (b2 != B65K_EOF) + 1; } /* codepoint_to_utf8: utf8-encode given codepoint. unicode: codepoint to covert. buf: buffer to write to; expected to be of size 5. returns: number of bytes written or zero on error. */ int codepoint_to_utf8 (int unicode, char* buf) { if (unicode < 0x80) { buf[0] = unicode; buf[1] = '\0'; return 1; } else if (unicode < 0x800) { buf[0] = 0xc0 | (unicode>> 6 & 0x1f); buf[1] = 0x80 | (unicode & 0x3f); buf[2] = '\0'; return 2; } else if (unicode >=0xd800 && unicode < 0x8000) { return 0; /*is invalid code block*/ } else if (unicode < 0x10000) { buf[0] = 0xe0 | (unicode>>12 & 0x0f); buf[1] = 0x80 | (unicode>> 6 & 0x3f); buf[2] = 0x80 | (unicode & 0x3f); buf[3] = '\0'; return 3; } else { buf[0] = 0xf0 | (unicode>>18 & 0x07); buf[1] = 0x80 | (unicode>>12 & 0x3f); buf[2] = 0x80 | (unicode>> 6 & 0x3f); buf[3] = 0x80 | (unicode & 0x3f); buf[4] = '\0'; return 4; } return 0; } /* utf8_to_codepoint: decodes a utf-8 character to its unicode-codepoint. buf: buffer to read from, expected to be of size 5. returns: unicode codepoint. WARN: will not detect non-utf8-sequence! */ int utf8_to_codepoint (char* buf) { int codepoint = 0; //cummulative int l = 0; for (char first = buf[0];;) { //this loop counts the number of consecutive binary 1s at the //start of the byte to determine, how many follow-up bytes //(l-1) will come. if (first>>7) l++; else break; first <<= 1; } if (l == 0) { //ascii chars are 01xxxxxx codepoint = buf[0]; } else { for (int i = l-1; i; i--) { //take lower 6bits of every byte and stick them end to end. codepoint += (buf[i] & ((1<<6)-1)) << ((l-1)-i)*6; } //add the bits from the first byte to the beginning, but not //the 1s that determine the number of follow-up-bytes codepoint += (buf[0] & ((1<<(7-l))-1)) << ((l-1))*6; } return codepoint; }