1
0
Fork 0
mirror of https://gitlab.com/sortix/sortix.git synced 2023-02-13 20:55:38 -05:00

Fix mbrtowc decoding surrogates.

This commit is contained in:
Jonas 'Sortie' Termansen 2015-12-27 21:41:52 +01:00
parent 8fabfc5f7b
commit ffe59b9c7c

View file

@ -1,6 +1,6 @@
/******************************************************************************* /*******************************************************************************
Copyright(C) Jonas 'Sortie' Termansen 2012, 2014. Copyright(C) Jonas 'Sortie' Termansen 2012, 2014, 2015.
This file is part of the Sortix C Library. This file is part of the Sortix C Library.
@ -114,6 +114,11 @@ size_t utf8_mbrtowc(wchar_t* restrict pwc,
return errno = EILSEQ, (size_t) -1; return errno = EILSEQ, (size_t) -1;
#endif #endif
// The definition of UTF-8 prohibits encoding character numbers between
// U+D800 and U+DFFF, which are reserved for use with the UTF-16 encoding
// form (as surrogate pairs) and do not directly represent characters.
if ( 0xD800 <= ps->wch && ps->wch <= 0xDFFF )
return errno = EILSEQ, (size_t) -1;
// RFC 3629 limits UTF-8 to 0x0 through 0x10FFFF. // RFC 3629 limits UTF-8 to 0x0 through 0x10FFFF.
if ( 0x10FFFF <= ps->wch ) if ( 0x10FFFF <= ps->wch )
return errno = EILSEQ, (size_t) -1; return errno = EILSEQ, (size_t) -1;