1
0
Fork 0
mirror of https://gitlab.com/sortix/sortix.git synced 2023-02-13 20:55:38 -05:00
sortix--sortix/libc/stdio/vscanf_callback.c
Jonas 'Sortie' Termansen 0756a7ee96 Add scanf(3) %n support.
2016-09-25 22:28:18 +02:00

354 lines
8.8 KiB
C

/*
* Copyright (c) 2012, 2014, 2016 Jonas 'Sortie' Termansen.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* stdio/vscanf_callback.c
* Input format conversion.
*/
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
enum scanmode
{
MODE_INIT,
MODE_CONVSPEC,
MODE_SCANINT,
MODE_SCANINT_REAL,
MODE_SCANSTRING,
MODE_SCANSTRING_REAL,
MODE_SCANREPORT,
};
enum scantype
{
TYPE_SHORT,
TYPE_SHORTSHORT,
TYPE_INT,
TYPE_LONG,
TYPE_LONGLONG,
TYPE_SIZE,
TYPE_PTRDIFF,
TYPE_MAX,
};
static bool IsTypeModifier(char c)
{
return c == 'h' || c == 'j' || c == 'l' || c == 'L' || c == 't' || c == 'z';
}
static int debase(char c, int base)
{
if ( c == '0' )
return 0;
int ret = -1;
if ( '0' <= c && c <= '9' ) { ret = c - '0' + 0; }
if ( 'a' <= c && c <= 'f' ) { ret = c - 'a' + 10; }
if ( 'A' <= c && c <= 'F' ) { ret = c - 'A' + 10; }
if ( base <= ret )
return -1;
return ret;
}
int vscanf_callback(void* fp,
int (*fgetc)(void*),
int (*ungetc)(int, void*),
const char* restrict format,
va_list ap)
{
int matcheditems = 0;
size_t fieldwidth = 0;
bool escaped = false;
bool discard = false;
bool negint = false;
bool intunsigned = false;
bool leadingzero = false;
bool hasprefix = false;
bool string = false;
size_t intparsed = 0;
uintmax_t intvalue = 0;
int ic;
int base = 0;
int cval;
const size_t UNDO_MAX = 4;
int undodata[UNDO_MAX];
size_t undoable = 0;
size_t strwritten = 0;
char* strdest = NULL;
char convc;
int bytesparsed = 0;
enum scantype scantype = TYPE_INT;
enum scanmode scanmode = MODE_INIT;
while ( true )
{
ic = fgetc(fp);
if ( ic != EOF && bytesparsed != INT_MAX )
bytesparsed++;
unsigned char uc = ic; char c = uc;
switch (scanmode)
{
case MODE_INIT:
if ( !*format )
{
ungetc(ic, fp);
if ( ic != EOF )
bytesparsed--;
goto break_loop;
}
if ( isspace((unsigned char) *format) )
{
if ( isspace(ic) )
continue;
else
do format++;
while ( isspace((unsigned char) *format) );
}
if ( *format == '%' && !escaped )
{
format++;
scanmode = MODE_CONVSPEC;
ungetc(ic, fp);
if ( ic != EOF )
bytesparsed--;
continue;
}
escaped = false;
if ( *format != c )
{
ungetc(ic, fp);
if ( ic != EOF )
bytesparsed--;
goto break_loop;
}
format++;
break;
case MODE_CONVSPEC:
discard = false;
if ( *format == '*' ) { discard = true; format++; }
fieldwidth = 0;
while ( '0'<= *format && *format <= '9' )
fieldwidth = fieldwidth * 10 + *format++ - '0';
scantype = TYPE_INT;
while ( IsTypeModifier(*format) )
switch ( *format++ )
{
case 'h': scantype = scantype == TYPE_SHORT ? TYPE_SHORTSHORT
: TYPE_SHORT; break;
case 'j': scantype = TYPE_MAX; break;
case 'l': scantype = scantype == TYPE_LONG ? TYPE_LONGLONG
: TYPE_LONG; break;
case 'L': scantype = TYPE_LONGLONG; break;
case 't': scantype = TYPE_PTRDIFF; break;
case 'z': scantype = TYPE_SIZE; break;
}
switch ( (convc = *format++) )
{
case '%':
escaped = true;
default:
fprintf(stderr, "Warning: scanf does not support %c (%i)\n",
convc, convc);
fprintf(stderr, "Bailing out to prevent problems.\n");
errno = ENOTSUP;
return -1;
continue;
case 'd':
base = 10; scanmode = MODE_SCANINT; intunsigned = false; break;
case 'i':
base = 0; scanmode = MODE_SCANINT; intunsigned = false; break;
case 'o':
base = 0; scanmode = MODE_SCANINT; intunsigned = true; break;
case 'u':
base = 10; scanmode = MODE_SCANINT; intunsigned = true; break;
case 'x':
case 'X':
base = 16; scanmode = MODE_SCANINT; intunsigned = true; break;
case 'c':
string = false; scanmode = MODE_SCANSTRING; break;
case 's':
string = true; scanmode = MODE_SCANSTRING; break;
case 'n':
scanmode = MODE_SCANREPORT; break;
}
ungetc(ic, fp);
if ( ic != EOF )
bytesparsed--;
continue;
case MODE_SCANINT:
intparsed = 0;
intvalue = 0;
leadingzero = false;
negint = false;
hasprefix = false;
undoable = 0;
scanmode = MODE_SCANINT_REAL;
case MODE_SCANINT_REAL:
if ( fieldwidth )
{
fprintf(stderr, "Error: field width not supported for integers in scanf.\n");
errno = ENOTSUP;
return -1;
}
if ( !undoable && isspace(ic) )
continue;
if ( undoable < UNDO_MAX )
undodata[undoable++] = ic;
if ( !intparsed && c == '-' && !intunsigned && !negint )
{
negint = true;
continue;
}
if ( !intparsed && c == '0' && !hasprefix &&
(!base || base == 8 || base == 16) && !leadingzero )
leadingzero = true;
if ( intparsed == 1 && (c == 'x' || c == 'X') && !hasprefix &&
(!base || base == 16) && leadingzero )
{
base = 16;
leadingzero = false;
hasprefix = true;
intparsed = 0;
continue;
}
else if ( intparsed == 1 && '1' <= c && c <= '7' && !hasprefix &&
(!base || base == 8) && leadingzero )
{
base = 8;
hasprefix = true;
leadingzero = false;
}
else if ( !intparsed && '0' <= c && c <= '9' && !hasprefix &&
(!base || base == 10) && !leadingzero )
{
base = 10;
leadingzero = false;
hasprefix = true;
}
cval = debase(c, base);
if ( cval < 0 )
{
if ( !intparsed )
{
while ( undoable )
{
ungetc(undodata[--undoable], fp);
bytesparsed--;
}
goto break_loop;
}
scanmode = MODE_INIT;
undoable = 0;
ungetc(ic, fp);
if ( ic != EOF )
bytesparsed--;
if ( discard ) { discard = false; continue; }
uintmax_t uintmaxval = intvalue;
// TODO: Possible truncation of INTMAX_MIN!
intmax_t intmaxval = uintmaxval;
if ( negint ) intmaxval = -intmaxval;
bool un = intunsigned;
switch ( scantype )
{
case TYPE_SHORTSHORT:
if ( un ) *va_arg(ap, unsigned char*) = uintmaxval;
else *va_arg(ap, signed char*) = intmaxval;
break;
case TYPE_SHORT:
if ( un ) *va_arg(ap, unsigned short*) = uintmaxval;
else *va_arg(ap, signed short*) = intmaxval;
break;
case TYPE_INT:
if ( un ) *va_arg(ap, unsigned int*) = uintmaxval;
else *va_arg(ap, signed int*) = intmaxval;
break;
case TYPE_LONG:
if ( un ) *va_arg(ap, unsigned long*) = uintmaxval;
else *va_arg(ap, signed long*) = intmaxval;
break;
case TYPE_LONGLONG:
if ( un ) *va_arg(ap, unsigned long long*) = uintmaxval;
else *va_arg(ap, signed long long*) = intmaxval;
break;
case TYPE_PTRDIFF:
*va_arg(ap, ptrdiff_t*) = intmaxval;
break;
case TYPE_SIZE:
if ( un ) *va_arg(ap, size_t*) = uintmaxval;
else *va_arg(ap, ssize_t*) = intmaxval;
break;
case TYPE_MAX:
if ( un ) *va_arg(ap, uintmax_t*) = uintmaxval;
else *va_arg(ap, intmax_t*) = intmaxval;
break;
}
matcheditems++;
continue;
}
intvalue = intvalue * (uintmax_t) base + (uintmax_t) cval;
intparsed++;
continue;
case MODE_SCANSTRING:
if ( !fieldwidth )
fieldwidth = string ? SIZE_MAX : 1;
scanmode = MODE_SCANSTRING_REAL;
strwritten = 0;
strdest = discard ? NULL : va_arg(ap, char*);
case MODE_SCANSTRING_REAL:
if ( string && !strwritten && isspace(ic) )
continue;
if ( string && strwritten &&
(ic == EOF || isspace(ic) || strwritten == fieldwidth) )
{
ungetc(ic, fp);
if ( ic != EOF )
bytesparsed--;
if ( !discard )
strdest[strwritten] = '\0';
matcheditems++;
scanmode = MODE_INIT;
continue;
}
if ( !string && strwritten == fieldwidth )
{
ungetc(ic, fp);
if ( ic != EOF )
bytesparsed--;
scanmode = MODE_INIT;
continue;
}
if ( ic == EOF )
goto break_loop;
if ( !discard )
strdest[strwritten++] = c;
continue;
case MODE_SCANREPORT:
ungetc(ic, fp);
if ( ic != EOF )
bytesparsed--;
if ( !discard )
*va_arg(ap, int*) = bytesparsed;
scanmode = MODE_INIT;
continue;
}
}
break_loop:
return matcheditems;
}