/* $Id: utf8.c,v 1.2 2013/06/06 21:53:06 steffen Exp $ */ /* utf8.c * Copyright (C) Steffen Pohle 2013 * * spOSMroute is free software: you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * spOSMroute is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include #include #include "system.h" #include "utf8.h" uint32_t u8_decode (char *str, int *charsize) { uint32_t chr = 0; int block = 0; uint8_t c = 0; /* check how many elements */ c = *str; if ((c & 0x80) == 0) { block = 0; chr = c; } else if ((c & 0xE0) == 0xC0) { // (str[0] & 111x xxxx) == 110x xxxx block = 1; chr = c & 0x1F; } else if ((c & 0xF0) == 0xE0) { // (str[0] & 1111 xxxx) == 1110 xxxx block = 2; chr = c & 0x0F; } else if ((c & 0xF8) == 0xF0) { // (str[0] & 1111 1xxx) == 1111 0xxx block = 3; chr = c & 0x07; } else if ((c & 0xFC) == 0xF8) { // (str[0] & 1111 11xx) == 1111 10xx block = 4; chr = c & 0x03; } else if ((c & 0xFE) == 0xFC) { // (str[0] & 1111 111x) == 1111 110x block = 5; chr = c & 0x01; } else { /* error: no valid code */ *charsize = 0; return 0; } *charsize = block+1; /* return size in bytes for utf8 char */ str++; for (;block > 0; block--) { c = *str; if ((c & 0xC0) != 0X80) { /* error: no valid code */ *charsize = 0; return 0; } chr <<= 6; chr |= (c & 0x3F); str++; } return chr; }; /* return the size in chars not in bytes. */ int u8_strlen (char *str) { int cnt, i, j; int bytelen = strlen (str); for (cnt = 0, i = 0; i < bytelen;) { u8_decode (str+i, &j); cnt++; i += j; } return cnt; }; void u8_strcpy (char *dest, char *src, int pos, int size);