You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
spOSMroute/base/utf8.c

97 lines
2.1 KiB

/* $Id: utf8.c,v 1.2 2013/06/06 21:53:06 steffen Exp $ */
/* utf8.c
* Copyright (C) Steffen Pohle 2013 <steffen@gulpe.de>
*
* spOSMroute is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* spOSMroute is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <strings.h>
#include "system.h"
#include "utf8.h"
uint32_t u8_decode (char *str, int *charsize) {
uint32_t chr = 0;
int block = 0;
uint8_t c = 0;
/* check how many elements */
c = *str;
if ((c & 0x80) == 0) {
block = 0;
chr = c;
}
else if ((c & 0xE0) == 0xC0) { // (str[0] & 111x xxxx) == 110x xxxx
block = 1;
chr = c & 0x1F;
}
else if ((c & 0xF0) == 0xE0) { // (str[0] & 1111 xxxx) == 1110 xxxx
block = 2;
chr = c & 0x0F;
}
else if ((c & 0xF8) == 0xF0) { // (str[0] & 1111 1xxx) == 1111 0xxx
block = 3;
chr = c & 0x07;
}
else if ((c & 0xFC) == 0xF8) { // (str[0] & 1111 11xx) == 1111 10xx
block = 4;
chr = c & 0x03;
}
else if ((c & 0xFE) == 0xFC) { // (str[0] & 1111 111x) == 1111 110x
block = 5;
chr = c & 0x01;
}
else {
/* error: no valid code */
*charsize = 0;
return 0;
}
*charsize = block+1; /* return size in bytes for utf8 char */
str++;
for (;block > 0; block--) {
c = *str;
if ((c & 0xC0) != 0X80) {
/* error: no valid code */
*charsize = 0;
return 0;
}
chr <<= 6;
chr |= (c & 0x3F);
str++;
}
return chr;
};
/* return the size in chars not in bytes. */
int u8_strlen (char *str) {
int cnt, i, j;
int bytelen = strlen (str);
for (cnt = 0, i = 0; i < bytelen;) {
u8_decode (str+i, &j);
cnt++;
i += j;
}
return cnt;
};
void u8_strcpy (char *dest, char *src, int pos, int size);