You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
97 lines
2.1 KiB
97 lines
2.1 KiB
/* $Id: utf8.c,v 1.2 2013/06/06 21:53:06 steffen Exp $ */
|
|
/* utf8.c
|
|
* Copyright (C) Steffen Pohle 2013 <steffen@gulpe.de>
|
|
*
|
|
* spOSMroute is free software: you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by the
|
|
* Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* spOSMroute is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
* See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
|
|
#include <stdlib.h>
|
|
#include <strings.h>
|
|
#include "system.h"
|
|
#include "utf8.h"
|
|
|
|
uint32_t u8_decode (char *str, int *charsize) {
|
|
uint32_t chr = 0;
|
|
int block = 0;
|
|
uint8_t c = 0;
|
|
|
|
/* check how many elements */
|
|
c = *str;
|
|
if ((c & 0x80) == 0) {
|
|
block = 0;
|
|
chr = c;
|
|
}
|
|
else if ((c & 0xE0) == 0xC0) { // (str[0] & 111x xxxx) == 110x xxxx
|
|
block = 1;
|
|
chr = c & 0x1F;
|
|
}
|
|
else if ((c & 0xF0) == 0xE0) { // (str[0] & 1111 xxxx) == 1110 xxxx
|
|
block = 2;
|
|
chr = c & 0x0F;
|
|
}
|
|
else if ((c & 0xF8) == 0xF0) { // (str[0] & 1111 1xxx) == 1111 0xxx
|
|
block = 3;
|
|
chr = c & 0x07;
|
|
}
|
|
else if ((c & 0xFC) == 0xF8) { // (str[0] & 1111 11xx) == 1111 10xx
|
|
block = 4;
|
|
chr = c & 0x03;
|
|
}
|
|
else if ((c & 0xFE) == 0xFC) { // (str[0] & 1111 111x) == 1111 110x
|
|
block = 5;
|
|
chr = c & 0x01;
|
|
}
|
|
else {
|
|
/* error: no valid code */
|
|
*charsize = 0;
|
|
return 0;
|
|
}
|
|
|
|
*charsize = block+1; /* return size in bytes for utf8 char */
|
|
str++;
|
|
|
|
for (;block > 0; block--) {
|
|
c = *str;
|
|
if ((c & 0xC0) != 0X80) {
|
|
/* error: no valid code */
|
|
*charsize = 0;
|
|
return 0;
|
|
}
|
|
chr <<= 6;
|
|
chr |= (c & 0x3F);
|
|
str++;
|
|
}
|
|
|
|
return chr;
|
|
};
|
|
|
|
|
|
/* return the size in chars not in bytes. */
|
|
int u8_strlen (char *str) {
|
|
int cnt, i, j;
|
|
int bytelen = strlen (str);
|
|
|
|
for (cnt = 0, i = 0; i < bytelen;) {
|
|
u8_decode (str+i, &j);
|
|
cnt++;
|
|
i += j;
|
|
}
|
|
|
|
return cnt;
|
|
};
|
|
|
|
|
|
void u8_strcpy (char *dest, char *src, int pos, int size);
|