Add regex(3).
This commit is contained in:
parent
5a3e181975
commit
75808c637d
|
@ -62,6 +62,10 @@ malloc/heap_init.o \
|
|||
malloc/__heap_lock.o \
|
||||
malloc/__heap_unlock.o \
|
||||
malloc/__heap_verify.o \
|
||||
regex/regcomp.o \
|
||||
regex/regerror.o \
|
||||
regex/regexec.o \
|
||||
regex/regfree.o \
|
||||
signal/sigaddset.o \
|
||||
signal/sigandset.o \
|
||||
signal/sigdelset.o \
|
||||
|
|
|
@ -0,0 +1,177 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Copyright(C) Jonas 'Sortie' Termansen 2014, 2015.
|
||||
|
||||
This file is part of the Sortix C Library.
|
||||
|
||||
The Sortix C Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The Sortix C Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
regex.h
|
||||
Regular expressions.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _REGEX_H
|
||||
#define _REGEX_H
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
|
||||
#include <sys/__/types.h>
|
||||
|
||||
#if defined(__is_sortix_libc)
|
||||
#include <pthread.h>
|
||||
#else
|
||||
#include <__/pthread.h>
|
||||
#endif
|
||||
|
||||
#ifndef __size_t_defined
|
||||
#define __size_t_defined
|
||||
#define __need_size_t
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
|
||||
typedef __ssize_t regoff_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
#if defined(__is_sortix_libc)
|
||||
enum re_type
|
||||
{
|
||||
RE_TYPE_BOL,
|
||||
RE_TYPE_EOL,
|
||||
RE_TYPE_CHAR,
|
||||
RE_TYPE_ANY_CHAR,
|
||||
RE_TYPE_SET,
|
||||
RE_TYPE_SUBEXPRESSION,
|
||||
RE_TYPE_SUBEXPRESSION_END,
|
||||
RE_TYPE_ALTERNATIVE,
|
||||
RE_TYPE_OPTIONAL,
|
||||
RE_TYPE_LOOP,
|
||||
RE_TYPE_REPETITION,
|
||||
/* TODO: Back-references. */
|
||||
};
|
||||
|
||||
struct re;
|
||||
|
||||
struct re_char
|
||||
{
|
||||
char c;
|
||||
};
|
||||
|
||||
struct re_set
|
||||
{
|
||||
unsigned char set[32];
|
||||
};
|
||||
|
||||
struct re_subexpression
|
||||
{
|
||||
struct re* re_owner;
|
||||
size_t index;
|
||||
};
|
||||
|
||||
struct re_split
|
||||
{
|
||||
struct re* re;
|
||||
struct re* re_owner;
|
||||
};
|
||||
|
||||
struct re_repetition
|
||||
{
|
||||
struct re* re;
|
||||
size_t min;
|
||||
size_t max;
|
||||
};
|
||||
|
||||
struct re
|
||||
{
|
||||
enum re_type re_type;
|
||||
union
|
||||
{
|
||||
struct re_char re_char;
|
||||
struct re_set re_set;
|
||||
struct re_subexpression re_subexpression;
|
||||
struct re_split re_split;
|
||||
struct re_repetition re_repetition;
|
||||
};
|
||||
struct re* re_next;
|
||||
struct re* re_next_owner;
|
||||
struct re* re_current_state_prev;
|
||||
struct re* re_current_state_next;
|
||||
struct re* re_upcoming_state_next;
|
||||
unsigned char re_is_currently_done;
|
||||
unsigned char re_is_current;
|
||||
unsigned char re_is_upcoming;
|
||||
regmatch_t* re_matches;
|
||||
};
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
size_t re_nsub;
|
||||
#if defined(__is_sortix_libc)
|
||||
pthread_mutex_t re_lock;
|
||||
struct re* re;
|
||||
regmatch_t* re_matches;
|
||||
size_t re_state_count;
|
||||
int re_cflags;
|
||||
#else
|
||||
__pthread_mutex_t __re_lock;
|
||||
void* __re;
|
||||
regmatch_t* __re_matches;
|
||||
size_t __re_state_count;
|
||||
int __re_cflags;
|
||||
#endif
|
||||
} regex_t;
|
||||
|
||||
#define REG_EXTENDED (1 << 0)
|
||||
#define REG_ICASE (1 << 1)
|
||||
#define REG_NOSUB (1 << 2)
|
||||
#define REG_NEWLINE (1 << 3)
|
||||
|
||||
#define REG_NOTBOL (1 << 0)
|
||||
#define REG_NOTEOL (1 << 1)
|
||||
|
||||
#define REG_NOMATCH 1
|
||||
#define REG_BADPAT 2
|
||||
#define REG_ECOLLATE 3
|
||||
#define REG_ECTYPE 4
|
||||
#define REG_EESCAPE 5
|
||||
#define REG_ESUBREG 6
|
||||
#define REG_EBRACK 7
|
||||
#define REG_EPAREN 8
|
||||
#define REG_EBRACE 9
|
||||
#define REG_BADBR 10
|
||||
#define REG_ERANGE 11
|
||||
#define REG_ESPACE 12
|
||||
#define REG_BADRPT 13
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int regcomp(regex_t* __restrict, const char* __restrict, int);
|
||||
size_t regerror(int, const regex_t* __restrict, char* __restrict, size_t);
|
||||
int regexec(const regex_t* __restrict, const char* __restrict, size_t,
|
||||
regmatch_t* __restrict, int);
|
||||
void regfree(regex_t*);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,727 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Copyright(C) Jonas 'Sortie' Termansen 2014, 2015.
|
||||
|
||||
This file is part of the Sortix C Library.
|
||||
|
||||
The Sortix C Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The Sortix C Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
regex/regcomp.cpp
|
||||
Regular expression compiler.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <regex.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct re_parse_subexpr
|
||||
{
|
||||
struct re_parse_subexpr* next;
|
||||
struct re** prev_next_ptr;
|
||||
struct re** primary_next_ptr;
|
||||
};
|
||||
|
||||
struct re_parse
|
||||
{
|
||||
struct re_parse_subexpr* subexpr;
|
||||
size_t subexpr_num;
|
||||
};
|
||||
|
||||
static inline bool re_basic_well_defined_escape(char c)
|
||||
{
|
||||
return c == '\\' || c == '(' || c == ')' || c == '{' || c == '}' ||
|
||||
c == '.' || c == '*' || c == '[' || c == ']' || c == '^' ||
|
||||
c == '$' || c == '+' || c == '?' || c == '|' ||
|
||||
('0' <= c && c <= '9');
|
||||
}
|
||||
|
||||
static inline bool re_extended_well_defined_escape(char c)
|
||||
{
|
||||
return c == '\\' || c == '(' || c == ')' || c == '{' || c == '}' ||
|
||||
c == '.' || c == '*' || c == '[' || c == ']' || c == '^' ||
|
||||
c == '$' || c == '+' || c == '?' || c == '|';
|
||||
}
|
||||
|
||||
static inline void re_free(struct re* re)
|
||||
{
|
||||
regex_t regex;
|
||||
memset(®ex, 0, sizeof(regex));
|
||||
pthread_mutex_init(®ex.re_lock, NULL);
|
||||
regex.re = re;
|
||||
regfree(®ex);
|
||||
}
|
||||
|
||||
static inline int re_parse(struct re_parse* parse,
|
||||
struct re** restrict prev_next_ptr,
|
||||
const char* restrict pattern,
|
||||
int cflags)
|
||||
{
|
||||
*prev_next_ptr = NULL;
|
||||
|
||||
bool is_extended = cflags & REG_EXTENDED;
|
||||
bool is_basic = !is_extended;
|
||||
|
||||
struct re** primary_next_ptr = prev_next_ptr;
|
||||
struct re* re;
|
||||
|
||||
size_t pattern_index = 0;
|
||||
//size_t alternative_begun_at = pattern_index;
|
||||
while ( true )
|
||||
{
|
||||
size_t c_pattern_index = pattern_index++;
|
||||
char c = pattern[c_pattern_index];
|
||||
|
||||
if ( c == '\0' )
|
||||
{
|
||||
if ( parse->subexpr )
|
||||
return REG_EPAREN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool escaped = false;
|
||||
if ( c == '\\' )
|
||||
{
|
||||
c_pattern_index = pattern_index++;
|
||||
c = pattern[c_pattern_index];
|
||||
if ( c == '\0' )
|
||||
return REG_BADPAT;
|
||||
if ( is_basic && !re_basic_well_defined_escape(c) )
|
||||
return REG_BADPAT;
|
||||
if ( is_extended && !re_extended_well_defined_escape(c) )
|
||||
return REG_BADPAT;
|
||||
escaped = true;
|
||||
}
|
||||
|
||||
bool escaped_for_basic = (is_basic && escaped) ||
|
||||
(is_extended && !escaped);
|
||||
|
||||
if ( escaped_for_basic && c == ')' )
|
||||
{
|
||||
struct re_parse_subexpr* subexpr = parse->subexpr;
|
||||
if ( !subexpr )
|
||||
return REG_EPAREN;
|
||||
*prev_next_ptr = NULL;
|
||||
prev_next_ptr = subexpr->prev_next_ptr;
|
||||
primary_next_ptr = subexpr->primary_next_ptr;
|
||||
//alternative_begun_at = subexpr->alternative_begun_at;
|
||||
parse->subexpr = subexpr->next;
|
||||
free(subexpr);
|
||||
re = *prev_next_ptr;
|
||||
goto subexpression_done;
|
||||
}
|
||||
|
||||
// TODO: Properly reject anchors in the basic regular expression cases
|
||||
// where they aren't appropriate. Mind that we implement the
|
||||
// extension where all ERE features are available in BRE mode if
|
||||
// accessed through backslashes.
|
||||
//if ( !escaped && c == '^' &&
|
||||
// (0 < parse->subexpr_depth || c_pattern_index != alternative_begun_at) )
|
||||
// return REG_BADRPT;
|
||||
//if ( !escaped && c == '$' &&
|
||||
// (0 < parse->subexpr_depth || pattern[pattern_index] != '0') )
|
||||
// return REG_BADRPT;
|
||||
if ( !escaped && c == '*' )
|
||||
return REG_BADRPT;
|
||||
if ( escaped_for_basic && c == '{' )
|
||||
return REG_BADBR;
|
||||
if ( (is_basic && escaped && c == '+') ||
|
||||
(is_extended && !escaped && c == '+') )
|
||||
return REG_BADBR;
|
||||
if ( (is_basic && escaped && c == '?') ||
|
||||
(is_extended && !escaped && c == '?') )
|
||||
return REG_BADBR;
|
||||
|
||||
if ( !(re = (struct re*) calloc(1, sizeof(struct re))) )
|
||||
return REG_ESPACE;
|
||||
|
||||
if ( escaped_for_basic && c == '|' )
|
||||
{
|
||||
re->re_type = RE_TYPE_ALTERNATIVE;
|
||||
re->re_next_owner = *primary_next_ptr;
|
||||
re->re_split.re_owner = NULL;
|
||||
*primary_next_ptr = re;
|
||||
prev_next_ptr = primary_next_ptr = &re->re_split.re_owner;
|
||||
continue;
|
||||
}
|
||||
// TODO: Check if this anchor logic is the right one. This uses them as
|
||||
// special characters in BRE mode in cases they shouldn't be.
|
||||
else if ( !escaped && c == '^' )
|
||||
{
|
||||
re->re_type = RE_TYPE_BOL;
|
||||
*prev_next_ptr = re;
|
||||
prev_next_ptr = &re->re_next_owner;
|
||||
continue;
|
||||
}
|
||||
else if ( !escaped && c == '$' )
|
||||
{
|
||||
re->re_type = RE_TYPE_EOL;
|
||||
*prev_next_ptr = re;
|
||||
prev_next_ptr = &re->re_next_owner;
|
||||
continue;
|
||||
}
|
||||
else if ( escaped_for_basic && c == '(' )
|
||||
{
|
||||
re->re_type = RE_TYPE_SUBEXPRESSION;
|
||||
re->re_subexpression.index = parse->subexpr_num++;
|
||||
re->re_subexpression.re_owner = NULL;
|
||||
*prev_next_ptr = re;
|
||||
struct re* end = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !end )
|
||||
return REG_ESPACE;
|
||||
end->re_type = RE_TYPE_SUBEXPRESSION_END;
|
||||
end->re_subexpression.index = re->re_subexpression.index;
|
||||
re->re_next_owner = end;
|
||||
struct re_parse_subexpr* subexpr = (struct re_parse_subexpr*)
|
||||
calloc(sizeof(struct re_parse_subexpr), 1);
|
||||
if ( !subexpr )
|
||||
return REG_ESPACE;
|
||||
subexpr->prev_next_ptr = prev_next_ptr;
|
||||
subexpr->primary_next_ptr = primary_next_ptr;
|
||||
//subexpr->alternative_begun_at = alternative_begun_at;
|
||||
subexpr->next = parse->subexpr;
|
||||
parse->subexpr = subexpr;
|
||||
prev_next_ptr = &re->re_subexpression.re_owner;
|
||||
primary_next_ptr = &re->re_subexpression.re_owner;
|
||||
//alternative_begun_at = pattern_index;
|
||||
continue;
|
||||
}
|
||||
// TODO: This is not properly implemented.
|
||||
// TODO: This is not properly unicode-aware.
|
||||
else if ( c == '[' )
|
||||
{
|
||||
re->re_type = RE_TYPE_SET;
|
||||
bool negate = false;
|
||||
if ( pattern[pattern_index] == '^' )
|
||||
{
|
||||
pattern_index += 1;
|
||||
negate = true;
|
||||
}
|
||||
while ( pattern[pattern_index] != ']' )
|
||||
{
|
||||
if ( pattern[pattern_index] == '\0' )
|
||||
return free(re), REG_EBRACK;
|
||||
// TODO: This is wrong and fragile.
|
||||
unsigned char c_from;
|
||||
unsigned char c_to;
|
||||
if ( pattern[pattern_index + 1] == '-' )
|
||||
{
|
||||
c_from = (unsigned char) pattern[pattern_index + 0];
|
||||
c_to = (unsigned char) pattern[pattern_index + 2];
|
||||
pattern_index += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
c_from = (unsigned char) pattern[pattern_index + 0];
|
||||
c_to = (unsigned char) pattern[pattern_index + 0];
|
||||
pattern_index += 1;
|
||||
}
|
||||
for ( unsigned int uc = c_from; uc <= c_to; uc++ )
|
||||
{
|
||||
size_t byte_index = uc / 8;
|
||||
size_t bit_index = uc % 8;
|
||||
re->re_set.set[byte_index] |= (1 << bit_index);
|
||||
}
|
||||
}
|
||||
if ( negate )
|
||||
{
|
||||
for ( size_t i = 0; i < 32; i++ )
|
||||
re->re_set.set[i] = ~re->re_set.set[i];
|
||||
}
|
||||
if ( pattern[pattern_index++] != ']' )
|
||||
return free(re), REG_EBRACK;
|
||||
}
|
||||
else if ( escaped && ('0' <= c && c <= '9') )
|
||||
{
|
||||
// TODO: This isn't implemented yet (not part of ERE).
|
||||
return free(re), REG_BADPAT;
|
||||
}
|
||||
else if ( !escaped && c == '.' )
|
||||
re->re_type = RE_TYPE_ANY_CHAR;
|
||||
else
|
||||
{
|
||||
re->re_type = RE_TYPE_CHAR;
|
||||
re->re_char.c = c;
|
||||
}
|
||||
|
||||
*prev_next_ptr = re;
|
||||
|
||||
subexpression_done:
|
||||
if ( (is_basic && pattern[pattern_index + 0] == '\\' &&
|
||||
pattern[pattern_index + 1] == '{') ||
|
||||
(is_extended && pattern[pattern_index] == '{' ) )
|
||||
{
|
||||
pattern_index += is_extended ? 1 : 2;
|
||||
if ( pattern[pattern_index] < '0' ||
|
||||
pattern[pattern_index] > '9' )
|
||||
return REG_BADBR;
|
||||
uintmax_t repeat_min;
|
||||
uintmax_t repeat_max;
|
||||
const char* value;
|
||||
const char* value_end;
|
||||
int saved_errno = errno;
|
||||
value = (char*) (pattern + pattern_index);
|
||||
repeat_min = strtoumax((char*) value, (char**) &value_end, 10);
|
||||
int parse_errno = errno;
|
||||
errno = saved_errno;
|
||||
if ( parse_errno == ERANGE || SIZE_MAX < repeat_min )
|
||||
return REG_BADBR;
|
||||
pattern_index += value_end - value;
|
||||
if ( pattern[pattern_index] == ',' )
|
||||
{
|
||||
repeat_max = SIZE_MAX;
|
||||
pattern_index += 1;
|
||||
if ( pattern[pattern_index] >= '0' &&
|
||||
pattern[pattern_index] <= '9' )
|
||||
{
|
||||
saved_errno = errno;
|
||||
value = (char*) (pattern + pattern_index);
|
||||
repeat_max = strtoumax((char*) value, (char**) &value_end, 10);
|
||||
parse_errno = errno;
|
||||
errno = saved_errno;
|
||||
if ( parse_errno == ERANGE || SIZE_MAX < repeat_max )
|
||||
return REG_BADBR;
|
||||
if ( repeat_max < repeat_min )
|
||||
return REG_BADBR;
|
||||
pattern_index += value_end - value;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
repeat_max = repeat_min;
|
||||
}
|
||||
if ( (is_basic && pattern[pattern_index++] != '\\') ||
|
||||
pattern[pattern_index++] != '}' )
|
||||
return REG_BADBR;
|
||||
struct re* re_repetition = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !re_repetition )
|
||||
return REG_ESPACE;
|
||||
re_repetition->re_type = RE_TYPE_REPETITION;
|
||||
re_repetition->re_repetition.re = re;
|
||||
re_repetition->re_repetition.min = (size_t) repeat_min;
|
||||
re_repetition->re_repetition.max = (size_t) repeat_max;
|
||||
*prev_next_ptr = re_repetition;
|
||||
re = re_repetition;
|
||||
}
|
||||
else if ( pattern[pattern_index] == '*' )
|
||||
{
|
||||
pattern_index += 1;
|
||||
struct re* re_repetition = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !re_repetition )
|
||||
return REG_ESPACE;
|
||||
re_repetition->re_type = RE_TYPE_REPETITION;
|
||||
re_repetition->re_repetition.re = re;
|
||||
re_repetition->re_repetition.min = 0;
|
||||
re_repetition->re_repetition.max = SIZE_MAX;
|
||||
*prev_next_ptr = re_repetition;
|
||||
re = re_repetition;
|
||||
}
|
||||
else if ( (is_basic && pattern[pattern_index + 0] == '\\' &&
|
||||
pattern[pattern_index + 1] == '?') ||
|
||||
(is_extended && pattern[pattern_index] == '?' ) )
|
||||
{
|
||||
pattern_index += is_extended ? 1 : 2;
|
||||
struct re* re_repetition = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !re_repetition )
|
||||
return REG_ESPACE;
|
||||
re_repetition->re_type = RE_TYPE_REPETITION;
|
||||
re_repetition->re_repetition.re = re;
|
||||
re_repetition->re_repetition.min = 0;
|
||||
re_repetition->re_repetition.max = 1;
|
||||
*prev_next_ptr = re_repetition;
|
||||
re = re_repetition;
|
||||
}
|
||||
else if ( (is_basic && pattern[pattern_index + 0] == '\\' &&
|
||||
pattern[pattern_index + 1] == '+') ||
|
||||
(is_extended && pattern[pattern_index] == '+' ) )
|
||||
{
|
||||
pattern_index += is_extended ? 1 : 2;
|
||||
struct re* re_repetition = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !re_repetition )
|
||||
return REG_ESPACE;
|
||||
re_repetition->re_type = RE_TYPE_REPETITION;
|
||||
re_repetition->re_repetition.re = re;
|
||||
re_repetition->re_repetition.min = 1;
|
||||
re_repetition->re_repetition.max = SIZE_MAX;
|
||||
*prev_next_ptr = re_repetition;
|
||||
re = re_repetition;
|
||||
}
|
||||
|
||||
if ( re->re_type == RE_TYPE_SUBEXPRESSION )
|
||||
re = re->re_next_owner; // RE_TYPE_SUBEXPRESSION_END.
|
||||
|
||||
prev_next_ptr = &re->re_next_owner;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool re_duplicate(struct re* templ, struct re** re_ptr)
|
||||
{
|
||||
struct re* copy;
|
||||
struct re* parent_templ = NULL;
|
||||
struct re* parent_copy = NULL;
|
||||
while ( true )
|
||||
{
|
||||
if ( !templ )
|
||||
{
|
||||
if ( parent_templ )
|
||||
{
|
||||
templ = parent_templ;
|
||||
copy = parent_copy;
|
||||
parent_templ = templ->re_upcoming_state_next;
|
||||
parent_copy = copy->re_upcoming_state_next;
|
||||
templ = templ->re_next_owner;
|
||||
re_ptr = ©->re_next_owner;
|
||||
continue;
|
||||
}
|
||||
return *re_ptr = NULL, true;
|
||||
}
|
||||
if ( !(copy = (struct re*) calloc(1, sizeof(struct re))) )
|
||||
return false;
|
||||
*re_ptr = copy;
|
||||
copy->re_type = templ->re_type;
|
||||
if ( templ->re_type == RE_TYPE_BOL )
|
||||
;
|
||||
else if ( templ->re_type == RE_TYPE_BOL )
|
||||
;
|
||||
else if ( templ->re_type == RE_TYPE_CHAR )
|
||||
copy->re_char.c = templ->re_char.c;
|
||||
else if ( templ->re_type == RE_TYPE_ANY_CHAR )
|
||||
;
|
||||
else if ( templ->re_type == RE_TYPE_SET )
|
||||
memcpy(copy->re_set.set, templ->re_set.set, 32);
|
||||
else if ( templ->re_type == RE_TYPE_SUBEXPRESSION )
|
||||
{
|
||||
copy->re_subexpression.index = templ->re_subexpression.index;
|
||||
templ->re_upcoming_state_next = parent_templ;
|
||||
copy->re_upcoming_state_next = parent_copy;
|
||||
parent_templ = templ;
|
||||
parent_copy = copy;
|
||||
templ = templ->re_subexpression.re_owner;
|
||||
re_ptr = ©->re_subexpression.re_owner;
|
||||
continue;
|
||||
}
|
||||
else if ( templ->re_type == RE_TYPE_SUBEXPRESSION_END )
|
||||
copy->re_subexpression.index = templ->re_subexpression.index;
|
||||
else if ( templ->re_type == RE_TYPE_ALTERNATIVE ||
|
||||
templ->re_type == RE_TYPE_OPTIONAL ||
|
||||
templ->re_type == RE_TYPE_LOOP )
|
||||
{
|
||||
templ->re_upcoming_state_next = parent_templ;
|
||||
copy->re_upcoming_state_next = parent_copy;
|
||||
parent_templ = templ;
|
||||
parent_copy = copy;
|
||||
templ = templ->re_split.re_owner;
|
||||
re_ptr = ©->re_split.re_owner;
|
||||
continue;
|
||||
}
|
||||
else if ( templ->re_type == RE_TYPE_REPETITION )
|
||||
{
|
||||
copy->re_repetition.min = templ->re_repetition.min;
|
||||
copy->re_repetition.max = templ->re_repetition.max;
|
||||
templ->re_upcoming_state_next = parent_templ;
|
||||
copy->re_upcoming_state_next = parent_copy;
|
||||
parent_templ = templ;
|
||||
parent_copy = copy;
|
||||
templ = templ->re_split.re;
|
||||
re_ptr = ©->re_split.re;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
templ = templ->re_next_owner;
|
||||
re_ptr = ©->re_next_owner;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool re_repetition(struct re* templ,
|
||||
struct re** re_ptr,
|
||||
size_t min,
|
||||
size_t max,
|
||||
struct re* after)
|
||||
{
|
||||
while ( true )
|
||||
{
|
||||
if ( !max )
|
||||
return *re_ptr = after, true;
|
||||
struct re* copy = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !copy )
|
||||
return false;
|
||||
*re_ptr = copy;
|
||||
copy->re_type = templ->re_type;
|
||||
if ( templ->re_type == RE_TYPE_BOL )
|
||||
;
|
||||
else if ( templ->re_type == RE_TYPE_BOL )
|
||||
;
|
||||
else if ( templ->re_type == RE_TYPE_CHAR )
|
||||
copy->re_char.c = templ->re_char.c;
|
||||
else if ( templ->re_type == RE_TYPE_ANY_CHAR )
|
||||
;
|
||||
else if ( templ->re_type == RE_TYPE_SET )
|
||||
memcpy(copy->re_set.set, templ->re_set.set, 32);
|
||||
else if ( templ->re_type == RE_TYPE_SUBEXPRESSION )
|
||||
{
|
||||
copy->re_subexpression.index = templ->re_subexpression.index;
|
||||
if ( !re_duplicate(templ->re_subexpression.re_owner,
|
||||
©->re_subexpression.re_owner) )
|
||||
return false;
|
||||
struct re* templ_end = templ->re_next_owner;
|
||||
assert(templ_end && templ_end->re_type == RE_TYPE_SUBEXPRESSION_END);
|
||||
struct re* end = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !end )
|
||||
return false;
|
||||
end->re_type = RE_TYPE_SUBEXPRESSION_END;
|
||||
end->re_subexpression.index = templ_end->re_subexpression.index;
|
||||
copy->re_next_owner = end;
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
if ( 1 <= min )
|
||||
{
|
||||
while ( copy->re_next_owner )
|
||||
copy = copy->re_next_owner;
|
||||
re_ptr = ©->re_next_owner;
|
||||
if ( max != SIZE_MAX )
|
||||
max--;
|
||||
min--;
|
||||
}
|
||||
else if ( max < SIZE_MAX )
|
||||
{
|
||||
struct re* wrap = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !wrap )
|
||||
return false;
|
||||
wrap->re_type = RE_TYPE_OPTIONAL;
|
||||
wrap->re_split.re_owner = copy;
|
||||
*re_ptr = wrap;
|
||||
re_ptr = &wrap->re_next_owner;
|
||||
max--;
|
||||
}
|
||||
else
|
||||
{
|
||||
struct re* wrap = (struct re*) calloc(1, sizeof(struct re));
|
||||
if ( !wrap )
|
||||
return false;
|
||||
wrap->re_type = RE_TYPE_LOOP;
|
||||
wrap->re_split.re_owner = copy;
|
||||
*re_ptr = wrap;
|
||||
re_ptr = &wrap->re_next_owner;
|
||||
max = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool re_transform(struct re** re_ptr, size_t* state_count_ptr)
|
||||
{
|
||||
if ( !*re_ptr )
|
||||
{
|
||||
struct re* re;
|
||||
if ( !(re = (struct re*) calloc(1, sizeof(struct re))) )
|
||||
return false;
|
||||
re->re_type = RE_TYPE_BOL;
|
||||
*re_ptr = re;
|
||||
}
|
||||
|
||||
struct re** parent_ptr = NULL;
|
||||
while ( *re_ptr )
|
||||
{
|
||||
struct re* re = *re_ptr;
|
||||
|
||||
if ( re->re_type == RE_TYPE_REPETITION )
|
||||
{
|
||||
struct re* templ = re->re_repetition.re;
|
||||
size_t min = re->re_repetition.min;
|
||||
size_t max = re->re_repetition.max;
|
||||
struct re* after = re->re_next_owner;
|
||||
struct re* replacement = NULL;
|
||||
re->re_next_owner = NULL;
|
||||
re_repetition(templ, &replacement, min, max, after);
|
||||
re_free(re);
|
||||
*re_ptr = re = replacement;
|
||||
continue;
|
||||
}
|
||||
|
||||
(*state_count_ptr)++;
|
||||
|
||||
if ( re->re_type == RE_TYPE_SUBEXPRESSION &&
|
||||
re->re_subexpression.re_owner )
|
||||
{
|
||||
re->re_current_state_prev = (struct re*) parent_ptr;
|
||||
parent_ptr = re_ptr;
|
||||
re_ptr = &re->re_subexpression.re_owner;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( (re->re_type == RE_TYPE_ALTERNATIVE ||
|
||||
re->re_type == RE_TYPE_OPTIONAL ||
|
||||
re->re_type == RE_TYPE_LOOP) && re->re_split.re_owner )
|
||||
{
|
||||
re->re_current_state_prev = (struct re*) parent_ptr;
|
||||
parent_ptr = re_ptr;
|
||||
re_ptr = &re->re_split.re_owner;
|
||||
continue;
|
||||
}
|
||||
|
||||
re_ptr = &re->re_next_owner;
|
||||
while ( !*re_ptr && parent_ptr )
|
||||
{
|
||||
re_ptr = parent_ptr;
|
||||
parent_ptr = (struct re**) (*re_ptr)->re_current_state_prev;
|
||||
re_ptr = &(*re_ptr)->re_next_owner;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void re_control_flow(struct re* re,
|
||||
regmatch_t* matches,
|
||||
size_t matches_per_state,
|
||||
size_t* state_count_ptr)
|
||||
{
|
||||
struct re* parent = NULL;
|
||||
struct re* parent_link = NULL;
|
||||
while ( re )
|
||||
{
|
||||
size_t re_index = (*state_count_ptr)++;
|
||||
size_t offset = re_index * matches_per_state;
|
||||
re->re_matches = matches + offset;
|
||||
|
||||
if ( re->re_type == RE_TYPE_ALTERNATIVE )
|
||||
{
|
||||
if ( !re->re_split.re_owner )
|
||||
re->re_split.re = parent_link;
|
||||
if ( !re->re_next_owner )
|
||||
re->re_next = parent_link;
|
||||
if ( re->re_split.re_owner && re->re_next_owner )
|
||||
{
|
||||
re->re_next = re->re_next_owner;
|
||||
re->re_current_state_prev = parent;
|
||||
re->re_current_state_next = parent_link;
|
||||
re->re_upcoming_state_next = re->re_next_owner;
|
||||
parent = re;
|
||||
re = re->re_split.re = re->re_split.re_owner;
|
||||
}
|
||||
else if ( re->re_split.re_owner )
|
||||
re = re->re_split.re = re->re_split.re_owner;
|
||||
else if ( re->re_next_owner )
|
||||
re = re->re_next = re->re_next_owner;
|
||||
else if ( parent )
|
||||
{
|
||||
re = parent;
|
||||
parent = re->re_current_state_prev;
|
||||
parent_link = re->re_current_state_next;
|
||||
re = re->re_upcoming_state_next;
|
||||
}
|
||||
else
|
||||
re = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( !re->re_next_owner && parent_link )
|
||||
re->re_next = parent_link;
|
||||
else
|
||||
re->re_next = re->re_next_owner;
|
||||
|
||||
if ( re->re_type == RE_TYPE_LOOP || re->re_type == RE_TYPE_OPTIONAL )
|
||||
{
|
||||
struct re* inner = re->re_split.re_owner;
|
||||
struct re* after = re->re_next;
|
||||
re->re_split.re = after;
|
||||
re->re_next = inner;
|
||||
if ( re->re_next_owner )
|
||||
{
|
||||
re->re_current_state_prev = parent;
|
||||
re->re_current_state_next = parent_link;
|
||||
re->re_upcoming_state_next = after;
|
||||
parent = re;
|
||||
}
|
||||
if ( re->re_type == RE_TYPE_LOOP )
|
||||
parent_link = re;
|
||||
else
|
||||
parent_link = after;
|
||||
re = inner;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( re->re_type == RE_TYPE_SUBEXPRESSION )
|
||||
{
|
||||
if ( re->re_subexpression.re_owner )
|
||||
{
|
||||
re->re_current_state_prev = parent;
|
||||
re->re_current_state_next = parent_link;
|
||||
re->re_upcoming_state_next = re->re_next_owner;
|
||||
parent = re;
|
||||
parent_link = re->re_next;
|
||||
re->re_next = re->re_subexpression.re_owner;
|
||||
re = re->re_subexpression.re_owner;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !re->re_next_owner && parent )
|
||||
{
|
||||
re = parent;
|
||||
parent = re->re_current_state_prev;
|
||||
parent_link = re->re_current_state_next;
|
||||
}
|
||||
|
||||
re = re->re_next_owner;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int regcomp(regex_t* restrict regex,
|
||||
const char* restrict pattern,
|
||||
int cflags)
|
||||
{
|
||||
// TODO: Verify cflags.
|
||||
// TODO: Implement REG_ICASE.
|
||||
// TODO: Implement REG_NOSUB.
|
||||
// TODO: Implement REG_NEWLINE.
|
||||
memset(regex, 0, sizeof(*regex));
|
||||
pthread_mutex_init(®ex->re_lock, NULL);
|
||||
regex->re_cflags = cflags;
|
||||
struct re_parse parse;
|
||||
memset(&parse, 0, sizeof(parse));
|
||||
parse.subexpr_num = 1;
|
||||
int ret = re_parse(&parse, ®ex->re, pattern, cflags);
|
||||
while ( parse.subexpr )
|
||||
{
|
||||
struct re_parse_subexpr* todelete = parse.subexpr;
|
||||
parse.subexpr = todelete->next;
|
||||
free(todelete);
|
||||
}
|
||||
if ( ret != 0 )
|
||||
return regfree(regex), ret;
|
||||
size_t state_count = 0;
|
||||
if ( !re_transform(®ex->re, &state_count) )
|
||||
return regfree(regex), REG_ESPACE;
|
||||
size_t matches_length;
|
||||
if ( __builtin_mul_overflow(parse.subexpr_num, state_count, &matches_length) )
|
||||
return regfree(regex), REG_ESPACE;
|
||||
regex->re_matches = (regmatch_t*)
|
||||
reallocarray(NULL, matches_length, sizeof(regmatch_t));
|
||||
if ( !regex->re_matches )
|
||||
return regfree(regex), REG_ESPACE;
|
||||
size_t state_recount = 0;
|
||||
re_control_flow(regex->re, regex->re_matches, parse.subexpr_num, &state_recount);
|
||||
assert(state_count == state_recount);
|
||||
if ( !(cflags & REG_NOSUB) )
|
||||
regex->re_nsub = parse.subexpr_num - 1;
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Copyright(C) Jonas 'Sortie' Termansen 2014, 2015.
|
||||
|
||||
This file is part of the Sortix C Library.
|
||||
|
||||
The Sortix C Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The Sortix C Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
regex/regerror.cpp
|
||||
Regular expression error reporting.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include <regex.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
extern "C"
|
||||
size_t regerror(int errnum,
|
||||
const regex_t* restrict regex,
|
||||
char* restrict errbuf,
|
||||
size_t errbuf_size)
|
||||
{
|
||||
(void) regex;
|
||||
const char* msg = "Unknown regular expression error";
|
||||
switch ( errnum )
|
||||
{
|
||||
case REG_NOMATCH: msg = "Regular expression does not match"; break;
|
||||
case REG_BADPAT: msg = "Invalid regular expression"; break;
|
||||
case REG_ECOLLATE: msg = "Invalid collating element referenced"; break;
|
||||
case REG_ECTYPE: msg = "Invalid character class type referenced"; break;
|
||||
case REG_EESCAPE: msg = "Trailing <backslash> character in pattern"; break;
|
||||
case REG_ESUBREG: msg = "Number in \\digit invalid or in error"; break;
|
||||
case REG_EBRACK: msg = "\"[]\" imbalance"; break;
|
||||
case REG_EPAREN: msg = "\"\\(\\)\" or \"()\" imbalance"; break;
|
||||
case REG_EBRACE: msg = "\"\\{\\}\" imbalance"; break;
|
||||
case REG_BADBR: msg = "Content of \"\\{\\}\" invalid: not a number, number too large, more than two numbers, first larger than second"; break;
|
||||
case REG_ERANGE: msg = "Invalid endpoint in range expression"; break;
|
||||
case REG_ESPACE: msg = "Out of memory"; break;
|
||||
case REG_BADRPT: msg = "'?', '*', or '+' not preceded by valid regular expression"; break;
|
||||
}
|
||||
if ( errbuf_size )
|
||||
strlcpy(errbuf, msg, errbuf_size);
|
||||
return strlen(msg) + 1;
|
||||
}
|
|
@ -0,0 +1,253 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Copyright(C) Jonas 'Sortie' Termansen 2014, 2015.
|
||||
|
||||
This file is part of the Sortix C Library.
|
||||
|
||||
The Sortix C Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The Sortix C Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
regex/regexec.cpp
|
||||
Regular expression execution.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include <assert.h>
|
||||
#include <regex.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#define QUEUE_CURRENT_STATE(new_state) \
|
||||
{ \
|
||||
if ( !new_state ) \
|
||||
{ \
|
||||
match = true; \
|
||||
for ( struct re* re = state->re_current_state_next; \
|
||||
re; \
|
||||
re = re->re_current_state_next ) \
|
||||
re->re_is_current = 0; \
|
||||
state->re_current_state_next = NULL; \
|
||||
current_states_last = state; \
|
||||
} \
|
||||
else if ( !(new_state->re_is_current && new_state->re_is_currently_done) ) \
|
||||
{ \
|
||||
if ( new_state->re_is_current ) \
|
||||
{ \
|
||||
if ( new_state->re_current_state_prev ) \
|
||||
new_state->re_current_state_prev->re_current_state_next = \
|
||||
new_state->re_current_state_next; \
|
||||
else \
|
||||
current_states = new_state->re_current_state_next; \
|
||||
if ( new_state->re_current_state_next ) \
|
||||
new_state->re_current_state_next->re_current_state_prev = \
|
||||
new_state->re_current_state_prev; \
|
||||
else \
|
||||
current_states_last = new_state->re_current_state_prev; \
|
||||
} \
|
||||
new_state->re_current_state_prev = state; \
|
||||
new_state->re_current_state_next = state->re_current_state_next; \
|
||||
if ( state->re_current_state_next ) \
|
||||
state->re_current_state_next->re_current_state_prev = new_state; \
|
||||
else \
|
||||
current_states_last = new_state; \
|
||||
state->re_current_state_next = new_state; \
|
||||
new_state->re_is_currently_done = 0; \
|
||||
new_state->re_is_current = 1; \
|
||||
new_state->re_is_upcoming = 0; \
|
||||
for ( size_t m = 0; m < nmatch; m++ ) \
|
||||
new_state->re_matches[m] = state->re_matches[m]; \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define QUEUE_UPCOMING_STATE(new_state) \
|
||||
{ \
|
||||
if ( !new_state ) \
|
||||
{ \
|
||||
consumed_char = true; \
|
||||
match = true; \
|
||||
for ( struct re* re = state->re_current_state_next; \
|
||||
re; \
|
||||
re = re->re_current_state_next ) \
|
||||
re->re_is_current = 0; \
|
||||
state->re_current_state_next = NULL; \
|
||||
current_states_last = state; \
|
||||
} \
|
||||
else if ( !new_state->re_is_upcoming ) \
|
||||
{ \
|
||||
if ( !upcoming_states ) \
|
||||
upcoming_states = new_state; \
|
||||
if ( upcoming_states_last ) \
|
||||
upcoming_states_last->re_upcoming_state_next = new_state; \
|
||||
upcoming_states_last = new_state; \
|
||||
new_state->re_upcoming_state_next = NULL; \
|
||||
new_state->re_is_upcoming = 1; \
|
||||
for ( size_t m = 0; m < nmatch; m++ ) \
|
||||
new_state->re_matches[m] = state->re_matches[m]; \
|
||||
} \
|
||||
} \
|
||||
|
||||
extern "C"
|
||||
int regexec(const regex_t* restrict regex_const,
|
||||
const char* restrict string,
|
||||
size_t nmatch,
|
||||
regmatch_t* restrict pmatch,
|
||||
int eflags)
|
||||
{
|
||||
// TODO: Sanitize eflags.
|
||||
|
||||
regex_t* regex = (regex_t*) regex_const;
|
||||
pthread_mutex_lock(®ex->re_lock);
|
||||
|
||||
if ( regex->re_cflags & REG_NOSUB )
|
||||
nmatch = 0;
|
||||
|
||||
for ( size_t i = 0; i < nmatch; i++ )
|
||||
{
|
||||
pmatch[i].rm_so = -1;
|
||||
pmatch[i].rm_eo = -1;
|
||||
}
|
||||
|
||||
if ( regex->re_nsub + 1 < nmatch )
|
||||
nmatch = regex->re_nsub + 1;
|
||||
|
||||
int result = REG_NOMATCH;
|
||||
|
||||
struct re* current_states = NULL;
|
||||
struct re* current_states_last = NULL;
|
||||
struct re* upcoming_states = NULL;
|
||||
struct re* upcoming_states_last = NULL;
|
||||
|
||||
regex->re->re_is_current = 0;
|
||||
|
||||
for ( size_t i = 0; true; i++ )
|
||||
{
|
||||
if ( !regex->re->re_is_current && result == REG_NOMATCH )
|
||||
{
|
||||
if ( current_states_last )
|
||||
current_states_last->re_current_state_next = regex->re;
|
||||
else
|
||||
current_states = regex->re;
|
||||
regex->re->re_current_state_prev = current_states_last;
|
||||
regex->re->re_current_state_next = NULL;
|
||||
current_states_last = regex->re;
|
||||
regex->re->re_is_currently_done = 0;
|
||||
regex->re->re_is_current = 1;
|
||||
regex->re->re_is_upcoming = 0;
|
||||
for ( size_t m = 0; m < nmatch; m++ )
|
||||
{
|
||||
regex->re->re_matches[m].rm_so = m == 0 ? i : -1;
|
||||
regex->re->re_matches[m].rm_eo = -1;
|
||||
}
|
||||
}
|
||||
char c = string[i];
|
||||
for ( struct re* state = current_states;
|
||||
state;
|
||||
state = state->re_current_state_next )
|
||||
{
|
||||
bool match = false;
|
||||
bool consumed_char = false;
|
||||
if ( state->re_type == RE_TYPE_BOL )
|
||||
{
|
||||
if ( !(eflags & REG_NOTBOL) )
|
||||
QUEUE_CURRENT_STATE(state->re_next);
|
||||
}
|
||||
else if ( state->re_type == RE_TYPE_EOL )
|
||||
{
|
||||
if ( !(eflags & REG_NOTEOL) && c == '\0' )
|
||||
QUEUE_CURRENT_STATE(state->re_next);
|
||||
}
|
||||
else if ( state->re_type == RE_TYPE_CHAR )
|
||||
{
|
||||
if ( c != '\0' && state->re_char.c == c )
|
||||
QUEUE_UPCOMING_STATE(state->re_next);
|
||||
}
|
||||
else if ( state->re_type == RE_TYPE_ANY_CHAR )
|
||||
{
|
||||
if ( c != '\0' )
|
||||
QUEUE_UPCOMING_STATE(state->re_next);
|
||||
}
|
||||
else if ( state->re_type == RE_TYPE_SET )
|
||||
{
|
||||
unsigned char uc = c;
|
||||
if ( c != '\0' && (state->re_set.set[uc / 8] & (1 << (uc % 8))) )
|
||||
QUEUE_UPCOMING_STATE(state->re_next);
|
||||
}
|
||||
else if ( state->re_type == RE_TYPE_SUBEXPRESSION )
|
||||
{
|
||||
size_t index = state->re_subexpression.index;
|
||||
state->re_matches[index].rm_so = i;
|
||||
QUEUE_CURRENT_STATE(state->re_next);
|
||||
}
|
||||
else if ( state->re_type == RE_TYPE_SUBEXPRESSION_END )
|
||||
{
|
||||
size_t index = state->re_subexpression.index;
|
||||
state->re_matches[index].rm_eo = i;
|
||||
QUEUE_CURRENT_STATE(state->re_next);
|
||||
}
|
||||
else if ( state->re_type == RE_TYPE_ALTERNATIVE ||
|
||||
state->re_type == RE_TYPE_OPTIONAL ||
|
||||
state->re_type == RE_TYPE_LOOP )
|
||||
{
|
||||
QUEUE_CURRENT_STATE(state->re_split.re);
|
||||
QUEUE_CURRENT_STATE(state->re_next);
|
||||
}
|
||||
state->re_is_currently_done = 1;
|
||||
if ( match )
|
||||
{
|
||||
state->re_matches[0].rm_eo = i + consumed_char;
|
||||
for ( size_t m = 0; m < nmatch; m++ )
|
||||
pmatch[m] = state->re_matches[m];
|
||||
result = 0;
|
||||
if ( nmatch == 0 )
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for ( struct re* re = current_states; re; re = re->re_current_state_next )
|
||||
re->re_is_current = 0;
|
||||
|
||||
if ( nmatch == 0 && result == 0 )
|
||||
{
|
||||
for ( struct re* re = upcoming_states; re; re = re->re_upcoming_state_next )
|
||||
re->re_is_upcoming = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
current_states = upcoming_states;
|
||||
if ( current_states )
|
||||
current_states->re_current_state_prev = NULL;
|
||||
current_states_last = upcoming_states_last;
|
||||
for ( struct re* re = current_states; re; re = re->re_current_state_next )
|
||||
{
|
||||
re->re_is_currently_done = 0;
|
||||
re->re_is_current = 1;
|
||||
re->re_is_upcoming = 0;
|
||||
re->re_current_state_next = re->re_upcoming_state_next;
|
||||
if ( re->re_current_state_next )
|
||||
re->re_current_state_next->re_current_state_prev = re;
|
||||
}
|
||||
upcoming_states = NULL;
|
||||
upcoming_states_last = NULL;
|
||||
|
||||
eflags |= REG_NOTBOL;
|
||||
|
||||
if ( current_states == NULL && result == 0 )
|
||||
break;
|
||||
|
||||
if ( c == '\0' )
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(®ex->re_lock);
|
||||
|
||||
return result;
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Copyright(C) Jonas 'Sortie' Termansen 2014, 2015.
|
||||
|
||||
This file is part of the Sortix C Library.
|
||||
|
||||
The Sortix C Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The Sortix C Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
regex/regfree.cpp
|
||||
Regular expression freeing.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include <regex.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
extern "C" void regfree(regex_t* regex)
|
||||
{
|
||||
struct re* parent = NULL;
|
||||
struct re* re = regex->re;
|
||||
while ( re )
|
||||
{
|
||||
if ( re->re_type == RE_TYPE_SUBEXPRESSION && re->re_subexpression.re_owner )
|
||||
{
|
||||
re->re_next = parent;
|
||||
parent = re;
|
||||
re = parent->re_subexpression.re_owner;
|
||||
parent->re_subexpression.re_owner = NULL;
|
||||
continue;
|
||||
}
|
||||
if ( (re->re_type == RE_TYPE_ALTERNATIVE ||
|
||||
re->re_type == RE_TYPE_OPTIONAL ||
|
||||
re->re_type == RE_TYPE_LOOP) &&
|
||||
re->re_split.re_owner )
|
||||
{
|
||||
re->re_next = parent;
|
||||
parent = re;
|
||||
re = parent->re_split.re_owner;
|
||||
parent->re_split.re_owner = NULL;
|
||||
continue;
|
||||
}
|
||||
if ( re->re_type == RE_TYPE_REPETITION && re->re_repetition.re )
|
||||
{
|
||||
re->re_next = parent;
|
||||
parent = re;
|
||||
re = parent->re_repetition.re;
|
||||
parent->re_repetition.re = NULL;
|
||||
continue;
|
||||
}
|
||||
struct re* todelete = re;
|
||||
re = re->re_next_owner;
|
||||
if ( !re && parent )
|
||||
{
|
||||
re = parent;
|
||||
parent = re->re_next;
|
||||
}
|
||||
free(todelete);
|
||||
}
|
||||
free(regex->re_matches);
|
||||
pthread_mutex_destroy(®ex->re_lock);
|
||||
}
|
|
@ -24,6 +24,7 @@
|
|||
#include <error.h>
|
||||
#include <inttypes.h>
|
||||
#include <locale.h>
|
||||
#include <regex.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -43,6 +44,14 @@ char* strdup_or_die(const char* str)
|
|||
return result;
|
||||
}
|
||||
|
||||
char* strndup_or_die(const char* str, size_t n)
|
||||
{
|
||||
char* result = strndup(str, n);
|
||||
if ( !str )
|
||||
error(2, errno, "strndup");
|
||||
return result;
|
||||
}
|
||||
|
||||
char* print_intmax_or_die(intmax_t value)
|
||||
{
|
||||
char value_string[sizeof(intmax_t) * 3];
|
||||
|
@ -282,16 +291,50 @@ char* evaluate_mod(const char* a, const char* b)
|
|||
return evaluate_integer_function(a, b, integer_mod);
|
||||
}
|
||||
|
||||
// TODO: Implement regular expression pattern matching!
|
||||
char* evaluate_match(const char* a, const char* b)
|
||||
{
|
||||
size_t b_length = strlen(b);
|
||||
for ( size_t i = 0; i < b_length; i++ )
|
||||
regex_t regex;
|
||||
int status = regcomp(®ex, b, 0);
|
||||
if ( status != 0 )
|
||||
{
|
||||
if ( b[i] != a[i] )
|
||||
return strdup_or_die("0");
|
||||
char errbuf[256];
|
||||
const char* errmsg = errbuf;
|
||||
char* erralloc = NULL;
|
||||
size_t errbuf_needed;
|
||||
if ( sizeof(errbuf) < (errbuf_needed = regerror(status, ®ex, errbuf,
|
||||
sizeof(errbuf))) )
|
||||
{
|
||||
if ( (erralloc = (char*) malloc(errbuf_needed)) )
|
||||
{
|
||||
errmsg = erralloc;
|
||||
regerror(status, ®ex, erralloc, errbuf_needed);
|
||||
}
|
||||
return print_intmax_or_die((intmax_t) strlen(a));
|
||||
}
|
||||
error(2, 0, "compiling regular expression: %s", errmsg);
|
||||
free(erralloc);
|
||||
}
|
||||
|
||||
char* result;
|
||||
|
||||
regmatch_t rm[2];
|
||||
if ( regexec(®ex, a, 2, rm, 0) == 0 && rm[0].rm_so == 0 )
|
||||
{
|
||||
if ( 0 <= rm[1].rm_so )
|
||||
result = strndup_or_die(a + rm[1].rm_so, rm[1].rm_eo - rm[1].rm_so);
|
||||
else
|
||||
result = print_intmax_or_die(rm[0].rm_eo);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( 0 < regex.re_nsub )
|
||||
result = strdup_or_die("");
|
||||
else
|
||||
result = strdup_or_die("0");
|
||||
}
|
||||
|
||||
regfree(®ex);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct binary_operator
|
||||
|
|
Loading…
Reference in New Issue