You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
spOSMroute/base/utf8.c

239 lines
5.0 KiB

/* $Id: utf8.c,v 1.3 2013/06/09 20:11:30 steffen Exp $ */
/* utf8.c
* Copyright (C) Steffen Pohle 2013 <steffen@gulpe.de>
*
* spOSMroute is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* spOSMroute is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <strings.h>
#include "system.h"
#include "utf8.h"
/*
* 84218421 84218421 84218421 84218421
* -----------------------------------
* 00000000 00000000 00000000 0aaaaaaa = FF FF FF 80 > 0
* 00000000 00000000 00000bbb bbaaaaaa = FF FF F8 00 > 1
* 00000000 00000000 ccccbbbb bbaaaaaa = FF FF 00 00 > 2
* 00000000 000dddcc ccccbbbb bbaaaaaa = FF E0 00 00 > 3
* 000000ee ddddddcc ccccbbbb bbaaaaaa = FC 00 00 00 > 4
* 0feeeeee ddddddcc ccccbbbb bbaaaaaa > 5
*/
void u8_encode (char *dest, uint32_t key) {
uint8_t maskor = 0;
uint8_t maskand = 0;
int blocks = 0;
if ((key & 0xFFFFFF80) == 0) {
blocks = 0;
maskand = 0x7F;
maskor = 0x00;
}
else if ((key & 0xFFFFF800) == 0) {
blocks = 1;
maskand = 0x1F;
maskor = 0xC0;
}
else if ((key & 0xFFFF0000) == 0) {
blocks = 2;
maskand = 0x0F;
maskor = 0xE0;
}
else if ((key & 0xFFE00000) == 0) {
blocks = 3;
maskand = 0x07;
maskor = 0xF0;
}
else if ((key & 0xFC000000) == 0) {
blocks = 4;
maskand = 0x03;
maskor = 0xF8;
}
else {
blocks = 5;
maskand = 0x01;
maskor = 0xFC;
}
dest[blocks+1] = 0;
while (blocks>0) {
dest[blocks] = (uint8_t)((key & 0x3F) | 0x80);
key = key >> 6;
blocks--;
}
*dest = (key & maskand) | maskor;
};
uint32_t u8_decode (char *str, int *charsize) {
uint32_t chr = 0;
int block = 0;
uint8_t c = 0;
/* check how many elements */
c = *str;
if ((c & 0x80) == 0) {
block = 0;
chr = c;
}
else if ((c & 0xE0) == 0xC0) { // (str[0] & 111x xxxx) == 110x xxxx
block = 1;
chr = c & 0x1F;
}
else if ((c & 0xF0) == 0xE0) { // (str[0] & 1111 xxxx) == 1110 xxxx
block = 2;
chr = c & 0x0F;
}
else if ((c & 0xF8) == 0xF0) { // (str[0] & 1111 1xxx) == 1111 0xxx
block = 3;
chr = c & 0x07;
}
else if ((c & 0xFC) == 0xF8) { // (str[0] & 1111 11xx) == 1111 10xx
block = 4;
chr = c & 0x03;
}
else if ((c & 0xFE) == 0xFC) { // (str[0] & 1111 111x) == 1111 110x
block = 5;
chr = c & 0x01;
}
else {
/* error: no valid code */
if (charsize != NULL) *charsize = 0;
return 0;
}
if (charsize != NULL) *charsize = block+1; /* return size in bytes for utf8 char */
str++;
for (;block > 0; block--) {
c = *str;
if ((c & 0xC0) != 0X80) {
/* error: no valid code */
*charsize = 0;
return 0;
}
chr <<= 6;
chr |= (c & 0x3F);
str++;
}
return chr;
};
/* return the size in chars not in bytes. */
int u8_strlen (char *str) {
int cnt, i, j;
int bytelen = strlen (str);
for (cnt = 0, i = 0; i < bytelen;) {
if (u8_decode (str+i, &j) == 0) {
d_printf ("u8_strlen: something went wrong. str:%s i:%d str+i:%s", str, i, str+i);
return 0;
}
cnt++;
i += j;
}
return cnt;
};
void u8_strcpy (char *dest, char *src, int pos, int len) {
int curpos = 0;
int size = 0;
while (*src != 0 && curpos < pos+len) {
u8_decode (src, &size);
if (curpos >= pos) {
memcpy (dest, src, size);
dest += size;
}
curpos++;
src += size;
}
*dest = 0;
};
void u8_strncat (char *dest, int size, char *src) {
int len;
len = strlen (src)+1;
if (len + strlen(dest) > size)
len = size - strlen(dest) - 1;
memcpy (dest+strlen(dest), src, len);
dest[size-1] = 0;
};
void u8_strdel (char *str, int start, int len) {
int curpos = 0;
char *nstr = str;
int slen = u8_strlen (str);
int size = 0;
int clen = strlen (str);
while (*nstr != 0 && curpos < slen) {
u8_decode (nstr, &size);
if (curpos >= start && curpos < start+len) memmove (nstr, nstr+size, clen-(nstr-str));
else nstr += size;
curpos++;
}
*nstr = 0;
};
void u8_strninsert (char *str, int size, char *text, int pos) {
char *nstr = str;
int curpos = 0;
int chrsize = 0;
int len = u8_strlen (str);
int clen = strlen (str);
while (curpos <= len && curpos <= pos) { // no need to move
if (curpos == pos) {
int cnt = strlen (text);
if (nstr-str + cnt >= size) {
memcpy (nstr, text, size-(nstr-str)-1);
str[size-1] = 0;
break;
}
else if (clen + strlen(text) >= size) { // need to move only part
memmove (nstr+cnt, nstr, size-(nstr-str)-strlen(text));
str[size-1] = 0;
}
else { // no overrun
memmove (nstr+cnt, nstr, clen-(nstr-str));
str[clen+cnt] = 0;
}
memcpy (nstr, text, strlen(text));
break;
}
u8_decode (nstr, &chrsize);
curpos++;
nstr += chrsize;
}
};