mirror of
				https://gitlab.com/sortix/sortix.git
				synced 2023-02-13 20:55:38 -05:00 
			
		
		
		
	Added a simple utf8 encoder in the kernel.
This commit is contained in:
		
							parent
							
								
									ac51e98cb8
								
							
						
					
					
						commit
						1f3f85b609
					
				
					 3 changed files with 114 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -66,6 +66,7 @@ descriptor_tables.o \
 | 
			
		|||
interrupt.o \
 | 
			
		||||
time.o \
 | 
			
		||||
log.o \
 | 
			
		||||
utf8.o \
 | 
			
		||||
panic.o \
 | 
			
		||||
keyboard.o \
 | 
			
		||||
scheduler.o \
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										76
									
								
								sortix/utf8.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								sortix/utf8.cpp
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,76 @@
 | 
			
		|||
/******************************************************************************
 | 
			
		||||
 | 
			
		||||
	COPYRIGHT(C) JONAS 'SORTIE' TERMANSEN 2012.
 | 
			
		||||
 | 
			
		||||
	This file is part of Sortix.
 | 
			
		||||
 | 
			
		||||
	Sortix is free software: you can redistribute it and/or modify it under the
 | 
			
		||||
	terms of the GNU General Public License as published by the Free Software
 | 
			
		||||
	Foundation, either version 3 of the License, or (at your option) any later
 | 
			
		||||
	version.
 | 
			
		||||
 | 
			
		||||
	Sortix is distributed in the hope that it will be useful, but WITHOUT ANY
 | 
			
		||||
	WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | 
			
		||||
	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 | 
			
		||||
	details.
 | 
			
		||||
 | 
			
		||||
	You should have received a copy of the GNU General Public License along
 | 
			
		||||
	with Sortix. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
	utf8.cpp
 | 
			
		||||
	Encodes UTF-32 strings in UTF-8.
 | 
			
		||||
 | 
			
		||||
******************************************************************************/
 | 
			
		||||
 | 
			
		||||
#include "platform.h"
 | 
			
		||||
#include <libmaxsi/error.h>
 | 
			
		||||
#include "utf8.h"
 | 
			
		||||
 | 
			
		||||
using namespace Maxsi;
 | 
			
		||||
 | 
			
		||||
namespace Sortix
 | 
			
		||||
{
 | 
			
		||||
	namespace UTF8
 | 
			
		||||
	{
 | 
			
		||||
		unsigned Encode(uint32_t unicode, char* dest)
 | 
			
		||||
		{
 | 
			
		||||
			uint8_t* buf = (uint8_t*) dest;
 | 
			
		||||
			unsigned bytes = 1;
 | 
			
		||||
			unsigned bits = 7;
 | 
			
		||||
			if ( (1U<<7U) <= unicode ) { bytes = 2; bits = 11; }
 | 
			
		||||
			if ( (1U<<11U) <= unicode ) { bytes = 3; bits = 16; }
 | 
			
		||||
			if ( (1U<<16U) <= unicode ) { bytes = 4; bits = 21; }
 | 
			
		||||
			if ( (1U<<21U) <= unicode ) { bytes = 5; bits = 26; }
 | 
			
		||||
			if ( (1U<<26U) <= unicode ) { bytes = 6; bits = 31; }
 | 
			
		||||
			if ( (1U<<31U) <= unicode ) { Error::Set(EINVAL); return 0; }
 | 
			
		||||
 | 
			
		||||
			uint8_t prefix;
 | 
			
		||||
			unsigned prefixavai;
 | 
			
		||||
			switch ( bytes )
 | 
			
		||||
			{
 | 
			
		||||
			case 1: prefixavai = 7; prefix = 0b0U << prefixavai; break;
 | 
			
		||||
			case 2: prefixavai = 5; prefix = 0b110U << prefixavai; break;
 | 
			
		||||
			case 3: prefixavai = 4; prefix = 0b1110U << prefixavai; break;
 | 
			
		||||
			case 4: prefixavai = 3; prefix = 0b11110U << prefixavai; break;
 | 
			
		||||
			case 5: prefixavai = 2; prefix = 0b111110U << prefixavai; break;
 | 
			
		||||
			case 6: prefixavai = 1; prefix = 0b1111110U << prefixavai; break;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			// Put the first bits in the unused area of the prefix.
 | 
			
		||||
			prefix |= unicode >> (bits - prefixavai);
 | 
			
		||||
			*buf++ = prefix;
 | 
			
		||||
			unsigned bitsleft = bits - prefixavai;
 | 
			
		||||
 | 
			
		||||
			while ( bitsleft )
 | 
			
		||||
			{
 | 
			
		||||
				bitsleft -= 6;
 | 
			
		||||
				uint8_t elembits = (unicode>>bitsleft) & ((1U<<6U)-1U);
 | 
			
		||||
				uint8_t elem = (0b10U<<6U) | elembits;
 | 
			
		||||
				*buf++ = elem;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			return bytes;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										37
									
								
								sortix/utf8.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								sortix/utf8.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,37 @@
 | 
			
		|||
/******************************************************************************
 | 
			
		||||
 | 
			
		||||
	COPYRIGHT(C) JONAS 'SORTIE' TERMANSEN 2012.
 | 
			
		||||
 | 
			
		||||
	This file is part of Sortix.
 | 
			
		||||
 | 
			
		||||
	Sortix is free software: you can redistribute it and/or modify it under the
 | 
			
		||||
	terms of the GNU General Public License as published by the Free Software
 | 
			
		||||
	Foundation, either version 3 of the License, or (at your option) any later
 | 
			
		||||
	version.
 | 
			
		||||
 | 
			
		||||
	Sortix is distributed in the hope that it will be useful, but WITHOUT ANY
 | 
			
		||||
	WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | 
			
		||||
	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 | 
			
		||||
	details.
 | 
			
		||||
 | 
			
		||||
	You should have received a copy of the GNU General Public License along
 | 
			
		||||
	with Sortix. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
	utf8.h
 | 
			
		||||
	Encodes UTF-32 strings in UTF-8.
 | 
			
		||||
 | 
			
		||||
******************************************************************************/
 | 
			
		||||
 | 
			
		||||
#ifndef SORTIX_UTF8_H
 | 
			
		||||
#define SORTIX_UTF8_H
 | 
			
		||||
 | 
			
		||||
namespace Sortix
 | 
			
		||||
{
 | 
			
		||||
	namespace UTF8
 | 
			
		||||
	{
 | 
			
		||||
		unsigned Encode(uint32_t unicode, char* dest);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue