You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
239 lines
5.0 KiB
239 lines
5.0 KiB
/* $Id: utf8.c,v 1.3 2013/06/09 20:11:30 steffen Exp $ */
|
|
/* utf8.c
|
|
* Copyright (C) Steffen Pohle 2013 <steffen@gulpe.de>
|
|
*
|
|
* spOSMroute is free software: you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by the
|
|
* Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* spOSMroute is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
* See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
|
|
#include <stdlib.h>
|
|
#include <strings.h>
|
|
#include "system.h"
|
|
#include "utf8.h"
|
|
|
|
|
|
/*
|
|
* 84218421 84218421 84218421 84218421
|
|
* -----------------------------------
|
|
* 00000000 00000000 00000000 0aaaaaaa = FF FF FF 80 > 0
|
|
* 00000000 00000000 00000bbb bbaaaaaa = FF FF F8 00 > 1
|
|
* 00000000 00000000 ccccbbbb bbaaaaaa = FF FF 00 00 > 2
|
|
* 00000000 000dddcc ccccbbbb bbaaaaaa = FF E0 00 00 > 3
|
|
* 000000ee ddddddcc ccccbbbb bbaaaaaa = FC 00 00 00 > 4
|
|
* 0feeeeee ddddddcc ccccbbbb bbaaaaaa > 5
|
|
*/
|
|
|
|
|
|
void u8_encode (char *dest, uint32_t key) {
|
|
uint8_t maskor = 0;
|
|
uint8_t maskand = 0;
|
|
int blocks = 0;
|
|
|
|
if ((key & 0xFFFFFF80) == 0) {
|
|
blocks = 0;
|
|
maskand = 0x7F;
|
|
maskor = 0x00;
|
|
}
|
|
else if ((key & 0xFFFFF800) == 0) {
|
|
blocks = 1;
|
|
maskand = 0x1F;
|
|
maskor = 0xC0;
|
|
}
|
|
else if ((key & 0xFFFF0000) == 0) {
|
|
blocks = 2;
|
|
maskand = 0x0F;
|
|
maskor = 0xE0;
|
|
}
|
|
else if ((key & 0xFFE00000) == 0) {
|
|
blocks = 3;
|
|
maskand = 0x07;
|
|
maskor = 0xF0;
|
|
}
|
|
else if ((key & 0xFC000000) == 0) {
|
|
blocks = 4;
|
|
maskand = 0x03;
|
|
maskor = 0xF8;
|
|
}
|
|
else {
|
|
blocks = 5;
|
|
maskand = 0x01;
|
|
maskor = 0xFC;
|
|
}
|
|
|
|
dest[blocks+1] = 0;
|
|
while (blocks>0) {
|
|
dest[blocks] = (uint8_t)((key & 0x3F) | 0x80);
|
|
key = key >> 6;
|
|
blocks--;
|
|
}
|
|
|
|
*dest = (key & maskand) | maskor;
|
|
};
|
|
|
|
|
|
|
|
uint32_t u8_decode (char *str, int *charsize) {
|
|
uint32_t chr = 0;
|
|
int block = 0;
|
|
uint8_t c = 0;
|
|
|
|
/* check how many elements */
|
|
c = *str;
|
|
if ((c & 0x80) == 0) {
|
|
block = 0;
|
|
chr = c;
|
|
}
|
|
else if ((c & 0xE0) == 0xC0) { // (str[0] & 111x xxxx) == 110x xxxx
|
|
block = 1;
|
|
chr = c & 0x1F;
|
|
}
|
|
else if ((c & 0xF0) == 0xE0) { // (str[0] & 1111 xxxx) == 1110 xxxx
|
|
block = 2;
|
|
chr = c & 0x0F;
|
|
}
|
|
else if ((c & 0xF8) == 0xF0) { // (str[0] & 1111 1xxx) == 1111 0xxx
|
|
block = 3;
|
|
chr = c & 0x07;
|
|
}
|
|
else if ((c & 0xFC) == 0xF8) { // (str[0] & 1111 11xx) == 1111 10xx
|
|
block = 4;
|
|
chr = c & 0x03;
|
|
}
|
|
else if ((c & 0xFE) == 0xFC) { // (str[0] & 1111 111x) == 1111 110x
|
|
block = 5;
|
|
chr = c & 0x01;
|
|
}
|
|
else {
|
|
/* error: no valid code */
|
|
if (charsize != NULL) *charsize = 0;
|
|
return 0;
|
|
}
|
|
|
|
if (charsize != NULL) *charsize = block+1; /* return size in bytes for utf8 char */
|
|
str++;
|
|
|
|
for (;block > 0; block--) {
|
|
c = *str;
|
|
if ((c & 0xC0) != 0X80) {
|
|
/* error: no valid code */
|
|
*charsize = 0;
|
|
return 0;
|
|
}
|
|
chr <<= 6;
|
|
chr |= (c & 0x3F);
|
|
str++;
|
|
}
|
|
|
|
return chr;
|
|
};
|
|
|
|
|
|
/* return the size in chars not in bytes. */
|
|
int u8_strlen (char *str) {
|
|
int cnt, i, j;
|
|
int bytelen = strlen (str);
|
|
|
|
for (cnt = 0, i = 0; i < bytelen;) {
|
|
if (u8_decode (str+i, &j) == 0) {
|
|
d_printf ("u8_strlen: something went wrong. str:%s i:%d str+i:%s", str, i, str+i);
|
|
return 0;
|
|
}
|
|
cnt++;
|
|
i += j;
|
|
}
|
|
|
|
return cnt;
|
|
};
|
|
|
|
|
|
void u8_strcpy (char *dest, char *src, int pos, int len) {
|
|
int curpos = 0;
|
|
int size = 0;
|
|
|
|
while (*src != 0 && curpos < pos+len) {
|
|
u8_decode (src, &size);
|
|
if (curpos >= pos) {
|
|
memcpy (dest, src, size);
|
|
dest += size;
|
|
}
|
|
curpos++;
|
|
src += size;
|
|
}
|
|
*dest = 0;
|
|
};
|
|
|
|
|
|
void u8_strncat (char *dest, int size, char *src) {
|
|
int len;
|
|
len = strlen (src)+1;
|
|
|
|
if (len + strlen(dest) > size)
|
|
len = size - strlen(dest) - 1;
|
|
memcpy (dest+strlen(dest), src, len);
|
|
dest[size-1] = 0;
|
|
};
|
|
|
|
|
|
void u8_strdel (char *str, int start, int len) {
|
|
int curpos = 0;
|
|
char *nstr = str;
|
|
int slen = u8_strlen (str);
|
|
int size = 0;
|
|
int clen = strlen (str);
|
|
|
|
while (*nstr != 0 && curpos < slen) {
|
|
u8_decode (nstr, &size);
|
|
if (curpos >= start && curpos < start+len) memmove (nstr, nstr+size, clen-(nstr-str));
|
|
else nstr += size;
|
|
curpos++;
|
|
}
|
|
*nstr = 0;
|
|
};
|
|
|
|
|
|
void u8_strninsert (char *str, int size, char *text, int pos) {
|
|
char *nstr = str;
|
|
int curpos = 0;
|
|
int chrsize = 0;
|
|
int len = u8_strlen (str);
|
|
int clen = strlen (str);
|
|
|
|
while (curpos <= len && curpos <= pos) { // no need to move
|
|
if (curpos == pos) {
|
|
int cnt = strlen (text);
|
|
|
|
if (nstr-str + cnt >= size) {
|
|
memcpy (nstr, text, size-(nstr-str)-1);
|
|
str[size-1] = 0;
|
|
break;
|
|
}
|
|
else if (clen + strlen(text) >= size) { // need to move only part
|
|
memmove (nstr+cnt, nstr, size-(nstr-str)-strlen(text));
|
|
str[size-1] = 0;
|
|
}
|
|
else { // no overrun
|
|
memmove (nstr+cnt, nstr, clen-(nstr-str));
|
|
str[clen+cnt] = 0;
|
|
}
|
|
memcpy (nstr, text, strlen(text));
|
|
|
|
break;
|
|
}
|
|
u8_decode (nstr, &chrsize);
|
|
curpos++;
|
|
nstr += chrsize;
|
|
}
|
|
};
|
|
|