2013-07-10 09:26:01 -04:00
|
|
|
/*******************************************************************************
|
2012-01-18 16:06:10 -05:00
|
|
|
|
2013-07-10 09:26:01 -04:00
|
|
|
Copyright(C) Jonas 'Sortie' Termansen 2012.
|
2012-01-18 16:06:10 -05:00
|
|
|
|
2013-07-10 09:26:01 -04:00
|
|
|
This file is part of Sortix.
|
2012-01-18 16:06:10 -05:00
|
|
|
|
2013-07-10 09:26:01 -04:00
|
|
|
Sortix is free software: you can redistribute it and/or modify it under the
|
|
|
|
terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation, either version 3 of the License, or (at your option) any later
|
|
|
|
version.
|
2012-01-18 16:06:10 -05:00
|
|
|
|
2013-07-10 09:26:01 -04:00
|
|
|
Sortix is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
|
|
details.
|
2012-01-18 16:06:10 -05:00
|
|
|
|
2013-07-10 09:26:01 -04:00
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
Sortix. If not, see <http://www.gnu.org/licenses/>.
|
2012-01-18 16:06:10 -05:00
|
|
|
|
2013-07-10 09:26:01 -04:00
|
|
|
utf8.cpp
|
|
|
|
Encodes UTF-32 strings in UTF-8.
|
2012-01-18 16:06:10 -05:00
|
|
|
|
2013-07-10 09:26:01 -04:00
|
|
|
*******************************************************************************/
|
2012-01-18 16:06:10 -05:00
|
|
|
|
2012-03-21 19:52:29 -04:00
|
|
|
#include <sortix/kernel/platform.h>
|
2012-09-22 10:44:50 -04:00
|
|
|
#include <errno.h>
|
2012-01-18 16:06:10 -05:00
|
|
|
#include "utf8.h"
|
|
|
|
|
|
|
|
namespace Sortix
|
|
|
|
{
|
|
|
|
namespace UTF8
|
|
|
|
{
|
|
|
|
unsigned Encode(uint32_t unicode, char* dest)
|
|
|
|
{
|
|
|
|
uint8_t* buf = (uint8_t*) dest;
|
|
|
|
unsigned bytes = 1;
|
|
|
|
unsigned bits = 7;
|
|
|
|
if ( (1U<<7U) <= unicode ) { bytes = 2; bits = 11; }
|
|
|
|
if ( (1U<<11U) <= unicode ) { bytes = 3; bits = 16; }
|
|
|
|
if ( (1U<<16U) <= unicode ) { bytes = 4; bits = 21; }
|
|
|
|
if ( (1U<<21U) <= unicode ) { bytes = 5; bits = 26; }
|
|
|
|
if ( (1U<<26U) <= unicode ) { bytes = 6; bits = 31; }
|
2012-09-22 10:44:50 -04:00
|
|
|
if ( (1U<<31U) <= unicode ) { errno = EINVAL; return 0; }
|
2012-01-18 16:06:10 -05:00
|
|
|
|
|
|
|
uint8_t prefix;
|
|
|
|
unsigned prefixavai;
|
|
|
|
switch ( bytes )
|
|
|
|
{
|
|
|
|
case 1: prefixavai = 7; prefix = 0b0U << prefixavai; break;
|
|
|
|
case 2: prefixavai = 5; prefix = 0b110U << prefixavai; break;
|
|
|
|
case 3: prefixavai = 4; prefix = 0b1110U << prefixavai; break;
|
|
|
|
case 4: prefixavai = 3; prefix = 0b11110U << prefixavai; break;
|
|
|
|
case 5: prefixavai = 2; prefix = 0b111110U << prefixavai; break;
|
|
|
|
case 6: prefixavai = 1; prefix = 0b1111110U << prefixavai; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Put the first bits in the unused area of the prefix.
|
|
|
|
prefix |= unicode >> (bits - prefixavai);
|
|
|
|
*buf++ = prefix;
|
|
|
|
unsigned bitsleft = bits - prefixavai;
|
|
|
|
|
|
|
|
while ( bitsleft )
|
|
|
|
{
|
|
|
|
bitsleft -= 6;
|
|
|
|
uint8_t elembits = (unicode>>bitsleft) & ((1U<<6U)-1U);
|
|
|
|
uint8_t elem = (0b10U<<6U) | elembits;
|
|
|
|
*buf++ = elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bytes;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|