/* $Id: utf8.c,v 1.3 2013/06/09 20:11:30 steffen Exp $ */ /* utf8.c * Copyright (C) Steffen Pohle 2013 * * spOSMroute is free software: you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * spOSMroute is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include #include #include "system.h" #include "utf8.h" /* * 84218421 84218421 84218421 84218421 * ----------------------------------- * 00000000 00000000 00000000 0aaaaaaa = FF FF FF 80 > 0 * 00000000 00000000 00000bbb bbaaaaaa = FF FF F8 00 > 1 * 00000000 00000000 ccccbbbb bbaaaaaa = FF FF 00 00 > 2 * 00000000 000dddcc ccccbbbb bbaaaaaa = FF E0 00 00 > 3 * 000000ee ddddddcc ccccbbbb bbaaaaaa = FC 00 00 00 > 4 * 0feeeeee ddddddcc ccccbbbb bbaaaaaa > 5 */ void u8_encode (char *dest, uint32_t key) { uint8_t maskor = 0; uint8_t maskand = 0; int blocks = 0; if ((key & 0xFFFFFF80) == 0) { blocks = 0; maskand = 0x7F; maskor = 0x00; } else if ((key & 0xFFFFF800) == 0) { blocks = 1; maskand = 0x1F; maskor = 0xC0; } else if ((key & 0xFFFF0000) == 0) { blocks = 2; maskand = 0x0F; maskor = 0xE0; } else if ((key & 0xFFE00000) == 0) { blocks = 3; maskand = 0x07; maskor = 0xF0; } else if ((key & 0xFC000000) == 0) { blocks = 4; maskand = 0x03; maskor = 0xF8; } else { blocks = 5; maskand = 0x01; maskor = 0xFC; } dest[blocks+1] = 0; while (blocks>0) { dest[blocks] = (uint8_t)((key & 0x3F) | 0x80); key = key >> 6; blocks--; } *dest = (key & maskand) | maskor; }; uint32_t u8_decode (char *str, int *charsize) { uint32_t chr = 0; int block = 0; uint8_t c = 0; /* check how many elements */ c = *str; if ((c & 0x80) == 0) { block = 0; chr = c; } else if ((c & 0xE0) == 0xC0) { // (str[0] & 111x xxxx) == 110x xxxx block = 1; chr = c & 0x1F; } else if ((c & 0xF0) == 0xE0) { // (str[0] & 1111 xxxx) == 1110 xxxx block = 2; chr = c & 0x0F; } else if ((c & 0xF8) == 0xF0) { // (str[0] & 1111 1xxx) == 1111 0xxx block = 3; chr = c & 0x07; } else if ((c & 0xFC) == 0xF8) { // (str[0] & 1111 11xx) == 1111 10xx block = 4; chr = c & 0x03; } else if ((c & 0xFE) == 0xFC) { // (str[0] & 1111 111x) == 1111 110x block = 5; chr = c & 0x01; } else { /* error: no valid code */ if (charsize != NULL) *charsize = 0; return 0; } if (charsize != NULL) *charsize = block+1; /* return size in bytes for utf8 char */ str++; for (;block > 0; block--) { c = *str; if ((c & 0xC0) != 0X80) { /* error: no valid code */ *charsize = 0; return 0; } chr <<= 6; chr |= (c & 0x3F); str++; } return chr; }; /* return the size in chars not in bytes. */ int u8_strlen (char *str) { int cnt, i, j; int bytelen = strlen (str); for (cnt = 0, i = 0; i < bytelen;) { if (u8_decode (str+i, &j) == 0) { d_printf ("u8_strlen: something went wrong. str:%s i:%d str+i:%s", str, i, str+i); return 0; } cnt++; i += j; } return cnt; }; void u8_strcpy (char *dest, char *src, int pos, int len) { int curpos = 0; int size = 0; while (*src != 0 && curpos < pos+len) { u8_decode (src, &size); if (curpos >= pos) { memcpy (dest, src, size); dest += size; } curpos++; src += size; } *dest = 0; }; void u8_strncat (char *dest, int size, char *src) { int len; len = strlen (src)+1; if (len + strlen(dest) > size) len = size - strlen(dest) - 1; memcpy (dest+strlen(dest), src, len); dest[size-1] = 0; }; void u8_strdel (char *str, int start, int len) { int curpos = 0; char *nstr = str; int slen = u8_strlen (str); int size = 0; int clen = strlen (str); while (*nstr != 0 && curpos < slen) { u8_decode (nstr, &size); if (curpos >= start && curpos < start+len) memmove (nstr, nstr+size, clen-(nstr-str)); else nstr += size; curpos++; } *nstr = 0; }; void u8_strninsert (char *str, int size, char *text, int pos) { char *nstr = str; int curpos = 0; int chrsize = 0; int len = u8_strlen (str); int clen = strlen (str); while (curpos <= len && curpos <= pos) { // no need to move if (curpos == pos) { int cnt = strlen (text); if (nstr-str + cnt >= size) { memcpy (nstr, text, size-(nstr-str)-1); str[size-1] = 0; break; } else if (clen + strlen(text) >= size) { // need to move only part memmove (nstr+cnt, nstr, size-(nstr-str)-strlen(text)); str[size-1] = 0; } else { // no overrun memmove (nstr+cnt, nstr, clen-(nstr-str)); str[clen+cnt] = 0; } memcpy (nstr, text, strlen(text)); break; } u8_decode (nstr, &chrsize); curpos++; nstr += chrsize; } };