mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
9a938987cb
* ext/date/date_strptime.c (date__strptime_internal): unset case-insensitive flag for [:alpha:], which already implies both cases, to get rid of backtrack explosion. [ruby-core:58984] [Bug #9221] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@44126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
699 lines
12 KiB
C
699 lines
12 KiB
C
/*
|
|
date_strptime.c: Coded by Tadayoshi Funaba 2011,2012
|
|
*/
|
|
|
|
#include "ruby.h"
|
|
#include "ruby/encoding.h"
|
|
#include "ruby/re.h"
|
|
#include <ctype.h>
|
|
|
|
static const char *day_names[] = {
|
|
"Sunday", "Monday", "Tuesday", "Wednesday",
|
|
"Thursday", "Friday", "Saturday",
|
|
"Sun", "Mon", "Tue", "Wed",
|
|
"Thu", "Fri", "Sat"
|
|
};
|
|
|
|
static const char *month_names[] = {
|
|
"January", "February", "March", "April",
|
|
"May", "June", "July", "August", "September",
|
|
"October", "November", "December",
|
|
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
|
|
};
|
|
|
|
static const char *merid_names[] = {
|
|
"am", "pm",
|
|
"a.m.", "p.m."
|
|
};
|
|
|
|
static const char *extz_pats[] = {
|
|
":z",
|
|
"::z",
|
|
":::z"
|
|
};
|
|
|
|
#define sizeof_array(o) (sizeof o / sizeof o[0])
|
|
|
|
#define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
|
|
#define f_add(x,y) rb_funcall(x, '+', 1, y)
|
|
#define f_sub(x,y) rb_funcall(x, '-', 1, y)
|
|
#define f_mul(x,y) rb_funcall(x, '*', 1, y)
|
|
#define f_div(x,y) rb_funcall(x, '/', 1, y)
|
|
#define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y)
|
|
#define f_mod(x,y) rb_funcall(x, '%', 1, y)
|
|
#define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
|
|
|
|
#define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
|
|
#define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
|
|
#define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
|
|
#define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
|
|
|
|
#define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
|
|
#define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
|
|
#define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
|
|
|
|
#define issign(c) ((c) == '-' || (c) == '+')
|
|
|
|
static int
|
|
num_pattern_p(const char *s)
|
|
{
|
|
if (isdigit((unsigned char)*s))
|
|
return 1;
|
|
if (*s == '%') {
|
|
s++;
|
|
if (*s == 'E' || *s == 'O')
|
|
s++;
|
|
if (*s &&
|
|
(strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) ||
|
|
isdigit((unsigned char)*s)))
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
|
|
|
|
static long
|
|
read_digits(const char *s, VALUE *n, size_t width)
|
|
{
|
|
size_t l;
|
|
|
|
l = strspn(s, "0123456789");
|
|
|
|
if (l == 0)
|
|
return 0;
|
|
|
|
if (width < l)
|
|
l = width;
|
|
|
|
if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
|
|
const char *os = s;
|
|
long v;
|
|
|
|
v = 0;
|
|
while ((size_t)(s - os) < l) {
|
|
v *= 10;
|
|
v += *s - '0';
|
|
s++;
|
|
}
|
|
if (os == s)
|
|
return 0;
|
|
*n = LONG2NUM(v);
|
|
return l;
|
|
}
|
|
else {
|
|
char *s2 = ALLOCA_N(char, l + 1);
|
|
memcpy(s2, s, l);
|
|
s2[l] = '\0';
|
|
*n = rb_cstr_to_inum(s2, 10, 0);
|
|
return l;
|
|
}
|
|
}
|
|
|
|
#define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
|
|
#define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
|
|
#define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
|
|
|
|
#define fail() \
|
|
{ \
|
|
set_hash("_fail", Qtrue); \
|
|
return 0; \
|
|
}
|
|
|
|
#define fail_p() (!NIL_P(ref_hash("_fail")))
|
|
|
|
#define READ_DIGITS(n,w) \
|
|
{ \
|
|
size_t l; \
|
|
l = read_digits(&str[si], &n, w); \
|
|
if (l == 0) \
|
|
fail(); \
|
|
si += l; \
|
|
}
|
|
|
|
#define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
|
|
|
|
static int
|
|
valid_range_p(VALUE v, int a, int b)
|
|
{
|
|
if (FIXNUM_P(v)) {
|
|
int vi = FIX2INT(v);
|
|
return !(vi < a || vi > b);
|
|
}
|
|
return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
|
|
}
|
|
|
|
#define recur(fmt) \
|
|
{ \
|
|
size_t l; \
|
|
l = date__strptime_internal(&str[si], slen - si, \
|
|
fmt, sizeof fmt - 1, hash); \
|
|
if (fail_p()) \
|
|
return 0; \
|
|
si += l; \
|
|
}
|
|
|
|
VALUE date_zone_to_diff(VALUE);
|
|
|
|
static size_t
|
|
date__strptime_internal(const char *str, size_t slen,
|
|
const char *fmt, size_t flen, VALUE hash)
|
|
{
|
|
size_t si, fi;
|
|
int c;
|
|
|
|
si = fi = 0;
|
|
|
|
while (fi < flen) {
|
|
|
|
switch (fmt[fi]) {
|
|
case '%':
|
|
|
|
again:
|
|
fi++;
|
|
c = fmt[fi];
|
|
|
|
switch (c) {
|
|
case 'E':
|
|
if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
|
|
goto again;
|
|
fi--;
|
|
goto ordinal;
|
|
case 'O':
|
|
if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
|
|
goto again;
|
|
fi--;
|
|
goto ordinal;
|
|
case ':':
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < (int)sizeof_array(extz_pats); i++)
|
|
if (strncmp(extz_pats[i], &fmt[fi],
|
|
strlen(extz_pats[i])) == 0) {
|
|
fi += i;
|
|
goto again;
|
|
}
|
|
fail();
|
|
}
|
|
|
|
case 'A':
|
|
case 'a':
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < (int)sizeof_array(day_names); i++) {
|
|
size_t l = strlen(day_names[i]);
|
|
if (strncasecmp(day_names[i], &str[si], l) == 0) {
|
|
si += l;
|
|
set_hash("wday", INT2FIX(i % 7));
|
|
goto matched;
|
|
}
|
|
}
|
|
fail();
|
|
}
|
|
case 'B':
|
|
case 'b':
|
|
case 'h':
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < (int)sizeof_array(month_names); i++) {
|
|
size_t l = strlen(month_names[i]);
|
|
if (strncasecmp(month_names[i], &str[si], l) == 0) {
|
|
si += l;
|
|
set_hash("mon", INT2FIX((i % 12) + 1));
|
|
goto matched;
|
|
}
|
|
}
|
|
fail();
|
|
}
|
|
|
|
case 'C':
|
|
{
|
|
VALUE n;
|
|
|
|
if (NUM_PATTERN_P())
|
|
READ_DIGITS(n, 2)
|
|
else
|
|
READ_DIGITS_MAX(n)
|
|
set_hash("_cent", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'c':
|
|
recur("%a %b %e %H:%M:%S %Y");
|
|
goto matched;
|
|
|
|
case 'D':
|
|
recur("%m/%d/%y");
|
|
goto matched;
|
|
|
|
case 'd':
|
|
case 'e':
|
|
{
|
|
VALUE n;
|
|
|
|
if (str[si] == ' ') {
|
|
si++;
|
|
READ_DIGITS(n, 1);
|
|
} else {
|
|
READ_DIGITS(n, 2);
|
|
}
|
|
if (!valid_range_p(n, 1, 31))
|
|
fail();
|
|
set_hash("mday", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'F':
|
|
recur("%Y-%m-%d");
|
|
goto matched;
|
|
|
|
case 'G':
|
|
{
|
|
VALUE n;
|
|
|
|
if (NUM_PATTERN_P())
|
|
READ_DIGITS(n, 4)
|
|
else
|
|
READ_DIGITS_MAX(n)
|
|
set_hash("cwyear", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'g':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 2);
|
|
if (!valid_range_p(n, 0, 99))
|
|
fail();
|
|
set_hash("cwyear",n);
|
|
set_hash("_cent",
|
|
INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
|
|
goto matched;
|
|
}
|
|
|
|
case 'H':
|
|
case 'k':
|
|
{
|
|
VALUE n;
|
|
|
|
if (str[si] == ' ') {
|
|
si++;
|
|
READ_DIGITS(n, 1);
|
|
} else {
|
|
READ_DIGITS(n, 2);
|
|
}
|
|
if (!valid_range_p(n, 0, 24))
|
|
fail();
|
|
set_hash("hour", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'I':
|
|
case 'l':
|
|
{
|
|
VALUE n;
|
|
|
|
if (str[si] == ' ') {
|
|
si++;
|
|
READ_DIGITS(n, 1);
|
|
} else {
|
|
READ_DIGITS(n, 2);
|
|
}
|
|
if (!valid_range_p(n, 1, 12))
|
|
fail();
|
|
set_hash("hour", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'j':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 3);
|
|
if (!valid_range_p(n, 1, 366))
|
|
fail();
|
|
set_hash("yday", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'L':
|
|
case 'N':
|
|
{
|
|
VALUE n;
|
|
int sign = 1;
|
|
size_t osi;
|
|
|
|
if (issign(str[si])) {
|
|
if (str[si] == '-')
|
|
sign = -1;
|
|
si++;
|
|
}
|
|
osi = si;
|
|
if (NUM_PATTERN_P())
|
|
READ_DIGITS(n, c == 'L' ? 3 : 9)
|
|
else
|
|
READ_DIGITS_MAX(n)
|
|
if (sign == -1)
|
|
n = f_negate(n);
|
|
set_hash("sec_fraction",
|
|
rb_rational_new2(n,
|
|
f_expt(INT2FIX(10),
|
|
ULONG2NUM(si - osi))));
|
|
goto matched;
|
|
}
|
|
|
|
case 'M':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 2);
|
|
if (!valid_range_p(n, 0, 59))
|
|
fail();
|
|
set_hash("min", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'm':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 2);
|
|
if (!valid_range_p(n, 1, 12))
|
|
fail();
|
|
set_hash("mon", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'n':
|
|
case 't':
|
|
recur(" ");
|
|
goto matched;
|
|
|
|
case 'P':
|
|
case 'p':
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
size_t l = strlen(merid_names[i]);
|
|
if (strncasecmp(merid_names[i], &str[si], l) == 0) {
|
|
si += l;
|
|
set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
|
|
goto matched;
|
|
}
|
|
}
|
|
fail();
|
|
}
|
|
|
|
case 'Q':
|
|
{
|
|
VALUE n;
|
|
int sign = 1;
|
|
|
|
if (str[si] == '-') {
|
|
sign = -1;
|
|
si++;
|
|
}
|
|
READ_DIGITS_MAX(n);
|
|
if (sign == -1)
|
|
n = f_negate(n);
|
|
set_hash("seconds",
|
|
rb_rational_new2(n,
|
|
f_expt(INT2FIX(10),
|
|
INT2FIX(3))));
|
|
goto matched;
|
|
}
|
|
|
|
case 'R':
|
|
recur("%H:%M");
|
|
goto matched;
|
|
|
|
case 'r':
|
|
recur("%I:%M:%S %p");
|
|
goto matched;
|
|
|
|
case 'S':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 2);
|
|
if (!valid_range_p(n, 0, 60))
|
|
fail();
|
|
set_hash("sec", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 's':
|
|
{
|
|
VALUE n;
|
|
int sign = 1;
|
|
|
|
if (str[si] == '-') {
|
|
sign = -1;
|
|
si++;
|
|
}
|
|
READ_DIGITS_MAX(n);
|
|
if (sign == -1)
|
|
n = f_negate(n);
|
|
set_hash("seconds", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'T':
|
|
recur("%H:%M:%S");
|
|
goto matched;
|
|
|
|
case 'U':
|
|
case 'W':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 2);
|
|
if (!valid_range_p(n, 0, 53))
|
|
fail();
|
|
set_hash(c == 'U' ? "wnum0" : "wnum1", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'u':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 1);
|
|
if (!valid_range_p(n, 1, 7))
|
|
fail();
|
|
set_hash("cwday", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'V':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 2);
|
|
if (!valid_range_p(n, 1, 53))
|
|
fail();
|
|
set_hash("cweek", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'v':
|
|
recur("%e-%b-%Y");
|
|
goto matched;
|
|
|
|
case 'w':
|
|
{
|
|
VALUE n;
|
|
|
|
READ_DIGITS(n, 1);
|
|
if (!valid_range_p(n, 0, 6))
|
|
fail();
|
|
set_hash("wday", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'X':
|
|
recur("%H:%M:%S");
|
|
goto matched;
|
|
|
|
case 'x':
|
|
recur("%m/%d/%y");
|
|
goto matched;
|
|
|
|
case 'Y':
|
|
{
|
|
VALUE n;
|
|
int sign = 1;
|
|
|
|
if (issign(str[si])) {
|
|
if (str[si] == '-')
|
|
sign = -1;
|
|
si++;
|
|
}
|
|
if (NUM_PATTERN_P())
|
|
READ_DIGITS(n, 4)
|
|
else
|
|
READ_DIGITS_MAX(n)
|
|
if (sign == -1)
|
|
n = f_negate(n);
|
|
set_hash("year", n);
|
|
goto matched;
|
|
}
|
|
|
|
case 'y':
|
|
{
|
|
VALUE n;
|
|
int sign = 1;
|
|
|
|
READ_DIGITS(n, 2);
|
|
if (!valid_range_p(n, 0, 99))
|
|
fail();
|
|
if (sign == -1)
|
|
n = f_negate(n);
|
|
set_hash("year", n);
|
|
set_hash("_cent",
|
|
INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
|
|
goto matched;
|
|
}
|
|
|
|
case 'Z':
|
|
case 'z':
|
|
{
|
|
static const char pat_source[] =
|
|
"\\A("
|
|
"(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
|
|
"|(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\s+time\\b"
|
|
"|(?-i:[[:alpha:]]+)(?:\\s+dst)?\\b"
|
|
")";
|
|
static VALUE pat = Qnil;
|
|
VALUE m, b;
|
|
|
|
if (NIL_P(pat)) {
|
|
pat = rb_reg_new(pat_source, sizeof pat_source - 1,
|
|
ONIG_OPTION_IGNORECASE);
|
|
rb_gc_register_mark_object(pat);
|
|
}
|
|
|
|
b = rb_backref_get();
|
|
rb_match_busy(b);
|
|
m = f_match(pat, rb_usascii_str_new2(&str[si]));
|
|
|
|
if (!NIL_P(m)) {
|
|
VALUE s, l, o;
|
|
|
|
s = rb_reg_nth_match(1, m);
|
|
l = f_end(m, INT2FIX(0));
|
|
o = date_zone_to_diff(s);
|
|
si += NUM2LONG(l);
|
|
set_hash("zone", s);
|
|
set_hash("offset", o);
|
|
rb_backref_set(b);
|
|
goto matched;
|
|
}
|
|
rb_backref_set(b);
|
|
fail();
|
|
}
|
|
|
|
case '%':
|
|
if (str[si] != '%')
|
|
fail();
|
|
si++;
|
|
goto matched;
|
|
|
|
case '+':
|
|
recur("%a %b %e %H:%M:%S %Z %Y");
|
|
goto matched;
|
|
|
|
default:
|
|
if (str[si] != '%')
|
|
fail();
|
|
si++;
|
|
if (fi < flen)
|
|
if (str[si] != fmt[fi])
|
|
fail();
|
|
si++;
|
|
goto matched;
|
|
}
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
case '\v':
|
|
case '\f':
|
|
case '\r':
|
|
while (isspace((unsigned char)str[si]))
|
|
si++;
|
|
fi++;
|
|
break;
|
|
default:
|
|
ordinal:
|
|
if (str[si] != fmt[fi])
|
|
fail();
|
|
si++;
|
|
fi++;
|
|
break;
|
|
matched:
|
|
fi++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return si;
|
|
}
|
|
|
|
VALUE
|
|
date__strptime(const char *str, size_t slen,
|
|
const char *fmt, size_t flen, VALUE hash)
|
|
{
|
|
size_t si;
|
|
VALUE cent, merid;
|
|
|
|
si = date__strptime_internal(str, slen, fmt, flen, hash);
|
|
|
|
if (slen > si) {
|
|
VALUE s;
|
|
|
|
s = rb_usascii_str_new(&str[si], slen - si);
|
|
set_hash("leftover", s);
|
|
}
|
|
|
|
if (fail_p())
|
|
return Qnil;
|
|
|
|
cent = ref_hash("_cent");
|
|
if (!NIL_P(cent)) {
|
|
VALUE year;
|
|
|
|
year = ref_hash("cwyear");
|
|
if (!NIL_P(year))
|
|
set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
|
|
year = ref_hash("year");
|
|
if (!NIL_P(year))
|
|
set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
|
|
del_hash("_cent");
|
|
}
|
|
|
|
merid = ref_hash("_merid");
|
|
if (!NIL_P(merid)) {
|
|
VALUE hour;
|
|
|
|
hour = ref_hash("hour");
|
|
if (!NIL_P(hour)) {
|
|
hour = f_mod(hour, INT2FIX(12));
|
|
set_hash("hour", f_add(hour, merid));
|
|
}
|
|
del_hash("_merid");
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
/*
|
|
Local variables:
|
|
c-file-style: "ruby"
|
|
End:
|
|
*/
|