2012-03-11 13:36:06 +00:00
#include "../fbuffer/fbuffer.h"
2010-04-26 06:27:27 +00:00
#include "parser.h"
/* unicode */
2011-02-05 01:13:41 +00:00
static const char digit_values[256] = {
2010-04-26 06:27:27 +00:00
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
-1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1
static UTF32 unescape_unicode(const unsigned char *p)
char b;
UTF32 result = 0;
b = digit_values[p[0]];
if (b < 0) return UNI_REPLACEMENT_CHAR;
2014-12-26 06:13:15 +00:00
result = (result << 4) | (unsigned char)b;
2010-04-26 06:27:27 +00:00
b = digit_values[p[1]];
if (b < 0) return UNI_REPLACEMENT_CHAR;
2014-12-26 06:13:15 +00:00
result = (result << 4) | (unsigned char)b;
2010-04-26 06:27:27 +00:00
b = digit_values[p[2]];
if (b < 0) return UNI_REPLACEMENT_CHAR;
2014-12-26 06:13:15 +00:00
result = (result << 4) | (unsigned char)b;
2010-04-26 06:27:27 +00:00
b = digit_values[p[3]];
if (b < 0) return UNI_REPLACEMENT_CHAR;
2014-12-26 06:13:15 +00:00
result = (result << 4) | (unsigned char)b;
2010-04-26 06:27:27 +00:00
return result;
2010-04-26 04:34:36 +00:00
2011-02-05 01:13:41 +00:00
static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
2010-04-26 06:27:27 +00:00
int len = 1;
if (ch <= 0x7F) {
buf[0] = (char) ch;
} else if (ch <= 0x07FF) {
buf[0] = (char) ((ch >> 6) | 0xC0);
buf[1] = (char) ((ch & 0x3F) | 0x80);
} else if (ch <= 0xFFFF) {
buf[0] = (char) ((ch >> 12) | 0xE0);
buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
buf[2] = (char) ((ch & 0x3F) | 0x80);
len += 2;
} else if (ch <= 0x1fffff) {
buf[0] =(char) ((ch >> 18) | 0xF0);
buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
buf[3] =(char) ((ch & 0x3F) | 0x80);
len += 3;
} else {
buf[0] = '?';
return len;
2009-05-05 02:32:49 +00:00
2010-04-26 06:27:27 +00:00
static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
2011-07-08 07:39:09 +00:00
static ID i_encoding, i_encode;
2009-05-05 02:32:49 +00:00
2010-04-26 06:27:27 +00:00
static ID i_iconv;
2009-05-05 02:32:49 +00:00
2007-06-04 12:31:26 +00:00
static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
2007-07-07 17:15:30 +00:00
static VALUE CNaN, CInfinity, CMinusInfinity;
2007-06-04 12:31:26 +00:00
2007-11-28 09:22:57 +00:00
static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
2011-08-30 02:23:12 +00:00
i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_quirks_mode,
i_object_class, i_array_class, i_key_p, i_deep_const_get, i_match,
2012-03-11 13:36:06 +00:00
i_match_string, i_aset, i_aref, i_leftshift;
2007-06-04 12:31:26 +00:00
machine JSON_common;
cr = '\n';
cr_neg = [^\n];
ws = [ \t\r\n];
c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
cpp_comment = '//' cr_neg* cr;
comment = c_comment | cpp_comment;
ignore = ws | comment;
name_separator = ':';
value_separator = ',';
Vnull = 'null';
Vfalse = 'false';
Vtrue = 'true';
2007-07-07 17:15:30 +00:00
VNaN = 'NaN';
VInfinity = 'Infinity';
VMinusInfinity = '-Infinity';
2011-02-05 01:13:41 +00:00
begin_value = [nft\"\-\[\{NI] | digit;
2007-06-04 12:31:26 +00:00
begin_object = '{';
end_object = '}';
begin_array = '[';
end_array = ']';
begin_string = '"';
begin_name = begin_string;
begin_number = digit | '-';
machine JSON_object;
include JSON_common;
write data;
action parse_value {
VALUE v = Qnil;
2011-02-05 01:13:41 +00:00
char *np = JSON_parse_value(json, fpc, pe, &v);
2007-06-04 12:31:26 +00:00
if (np == NULL) {
2008-09-20 17:41:14 +00:00
fhold; fbreak;
2007-06-04 12:31:26 +00:00
} else {
2011-07-10 08:01:04 +00:00
if (NIL_P(json->object_class)) {
rb_hash_aset(*result, last_name, v);
} else {
rb_funcall(*result, i_aset, 2, last_name, v);
2007-06-04 12:31:26 +00:00
fexec np;
action parse_name {
2010-04-26 06:27:27 +00:00
char *np;
json->parsing_name = 1;
np = JSON_parse_string(json, fpc, pe, &last_name);
json->parsing_name = 0;
2008-09-20 17:41:14 +00:00
if (np == NULL) { fhold; fbreak; } else fexec np;
2007-06-04 12:31:26 +00:00
2008-09-20 17:41:14 +00:00
action exit { fhold; fbreak; }
2007-06-04 12:31:26 +00:00
2011-08-30 02:23:12 +00:00
pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value;
next_pair = ignore* value_separator pair;
2007-06-04 12:31:26 +00:00
2011-08-30 02:23:12 +00:00
main := (
(pair (next_pair)*)? ignore*
) @exit;
2007-06-04 12:31:26 +00:00
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
int cs = EVIL;
VALUE last_name = Qnil;
2009-09-01 16:17:56 +00:00
VALUE object_class = json->object_class;
2007-06-04 12:31:26 +00:00
if (json->max_nesting && json->current_nesting > json->max_nesting) {
2009-09-01 16:17:56 +00:00
rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
2007-06-04 12:31:26 +00:00
2009-09-01 16:17:56 +00:00
*result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
2007-06-04 12:31:26 +00:00
%% write init;
%% write exec;
if (cs >= JSON_object_first_final) {
2011-07-10 08:01:04 +00:00
if (json->create_additions) {
2012-03-11 13:36:06 +00:00
VALUE klassname;
if (NIL_P(json->object_class)) {
klassname = rb_hash_aref(*result, json->create_id);
} else {
klassname = rb_funcall(*result, i_aref, 1, json->create_id);
2007-11-28 09:22:57 +00:00
if (!NIL_P(klassname)) {
2010-04-26 06:27:27 +00:00
VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
2011-07-08 09:00:51 +00:00
if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
2007-11-28 09:22:57 +00:00
*result = rb_funcall(klass, i_json_create, 1, *result);
2007-06-04 12:31:26 +00:00
return p + 1;
} else {
return NULL;
2011-08-30 02:23:12 +00:00
2007-06-04 12:31:26 +00:00
machine JSON_value;
include JSON_common;
write data;
action parse_null {
*result = Qnil;
action parse_false {
*result = Qfalse;
action parse_true {
*result = Qtrue;
2007-07-07 17:15:30 +00:00
action parse_nan {
if (json->allow_nan) {
*result = CNaN;
} else {
2008-09-20 17:41:14 +00:00
rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2);
2007-07-07 17:15:30 +00:00
action parse_infinity {
if (json->allow_nan) {
*result = CInfinity;
} else {
2008-09-20 17:41:14 +00:00
rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8);
2007-07-07 17:15:30 +00:00
2007-06-04 12:31:26 +00:00
action parse_string {
char *np = JSON_parse_string(json, fpc, pe, result);
2008-09-20 17:41:14 +00:00
if (np == NULL) { fhold; fbreak; } else fexec np;
2007-06-04 12:31:26 +00:00
action parse_number {
char *np;
2011-08-30 02:23:12 +00:00
if(pe > fpc + 9 - json->quirks_mode && !strncmp(MinusInfinity, fpc, 9)) {
2007-07-07 17:15:30 +00:00
if (json->allow_nan) {
*result = CMinusInfinity;
fexec p + 10;
2008-09-20 17:41:14 +00:00
fhold; fbreak;
2007-07-07 17:15:30 +00:00
} else {
2008-09-20 17:41:14 +00:00
rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
2007-07-07 17:15:30 +00:00
2007-06-04 12:31:26 +00:00
np = JSON_parse_float(json, fpc, pe, result);
if (np != NULL) fexec np;
np = JSON_parse_integer(json, fpc, pe, result);
if (np != NULL) fexec np;
2008-09-20 17:41:14 +00:00
fhold; fbreak;
2007-06-04 12:31:26 +00:00
2011-02-05 01:13:41 +00:00
action parse_array {
2007-06-04 12:31:26 +00:00
char *np;
2008-09-20 17:41:14 +00:00
2007-06-04 12:31:26 +00:00
np = JSON_parse_array(json, fpc, pe, result);
2008-09-20 17:41:14 +00:00
if (np == NULL) { fhold; fbreak; } else fexec np;
2007-06-04 12:31:26 +00:00
2011-02-05 01:13:41 +00:00
action parse_object {
2007-06-04 12:31:26 +00:00
char *np;
2008-09-20 17:41:14 +00:00
2007-06-04 12:31:26 +00:00
np = JSON_parse_object(json, fpc, pe, result);
2008-09-20 17:41:14 +00:00
if (np == NULL) { fhold; fbreak; } else fexec np;
2007-06-04 12:31:26 +00:00
2008-09-20 17:41:14 +00:00
action exit { fhold; fbreak; }
2007-06-04 12:31:26 +00:00
main := (
Vnull @parse_null |
Vfalse @parse_false |
Vtrue @parse_true |
2007-07-07 17:15:30 +00:00
VNaN @parse_nan |
VInfinity @parse_infinity |
2007-06-04 12:31:26 +00:00
begin_number >parse_number |
begin_string >parse_string |
begin_array >parse_array |
begin_object >parse_object
) %*exit;
static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
int cs = EVIL;
%% write init;
%% write exec;
if (cs >= JSON_value_first_final) {
return p;
} else {
return NULL;
machine JSON_integer;
write data;
2008-09-20 17:41:14 +00:00
action exit { fhold; fbreak; }
2007-06-04 12:31:26 +00:00
2011-08-30 02:23:12 +00:00
main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
2007-06-04 12:31:26 +00:00
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
int cs = EVIL;
%% write init;
json->memo = p;
%% write exec;
if (cs >= JSON_integer_first_final) {
long len = p - json->memo;
2012-03-11 13:36:06 +00:00
fbuffer_append(json->fbuffer, json->memo, len);
fbuffer_append_char(json->fbuffer, '\0');
*result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
2007-06-04 12:31:26 +00:00
return p + 1;
} else {
return NULL;
machine JSON_float;
include JSON_common;
write data;
2008-09-20 17:41:14 +00:00
action exit { fhold; fbreak; }
2007-06-04 12:31:26 +00:00
main := '-'? (
(('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
| (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
2011-08-30 02:23:12 +00:00
) (^[0-9Ee.\-]? @exit );
2007-06-04 12:31:26 +00:00
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
int cs = EVIL;
%% write init;
json->memo = p;
%% write exec;
if (cs >= JSON_float_first_final) {
long len = p - json->memo;
2012-03-11 13:36:06 +00:00
fbuffer_append(json->fbuffer, json->memo, len);
fbuffer_append_char(json->fbuffer, '\0');
*result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
2007-06-04 12:31:26 +00:00
return p + 1;
} else {
return NULL;
machine JSON_array;
include JSON_common;
write data;
action parse_value {
VALUE v = Qnil;
2011-02-05 01:13:41 +00:00
char *np = JSON_parse_value(json, fpc, pe, &v);
2007-06-04 12:31:26 +00:00
if (np == NULL) {
2008-09-20 17:41:14 +00:00
fhold; fbreak;
2007-06-04 12:31:26 +00:00
} else {
2011-07-10 08:01:04 +00:00
if (NIL_P(json->array_class)) {
rb_ary_push(*result, v);
} else {
rb_funcall(*result, i_leftshift, 1, v);
2007-06-04 12:31:26 +00:00
fexec np;
2008-09-20 17:41:14 +00:00
action exit { fhold; fbreak; }
2007-06-04 12:31:26 +00:00
next_element = value_separator ignore* begin_value >parse_value;
main := begin_array ignore*
((begin_value >parse_value ignore*)
(ignore* next_element ignore*)*)?
end_array @exit;
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
int cs = EVIL;
2009-09-01 16:17:56 +00:00
VALUE array_class = json->array_class;
2007-06-04 12:31:26 +00:00
if (json->max_nesting && json->current_nesting > json->max_nesting) {
2009-09-01 16:17:56 +00:00
rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
2007-06-04 12:31:26 +00:00
2009-09-01 16:17:56 +00:00
*result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
2007-06-04 12:31:26 +00:00
%% write init;
%% write exec;
if(cs >= JSON_array_first_final) {
return p + 1;
} else {
2008-09-20 17:41:14 +00:00
rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
2010-04-26 06:27:27 +00:00
return NULL;
2007-06-04 12:31:26 +00:00
2010-04-26 06:27:27 +00:00
static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd)
2007-06-04 12:31:26 +00:00
2010-04-26 06:27:27 +00:00
char *p = string, *pe = string, *unescape;
int unescape_len;
2012-01-15 06:52:36 +00:00
char buf[4];
2010-04-26 06:27:27 +00:00
while (pe < stringEnd) {
if (*pe == '\\') {
unescape = (char *) "?";
unescape_len = 1;
if (pe > p) rb_str_buf_cat(result, p, pe - p);
switch (*++pe) {
case 'n':
unescape = (char *) "\n";
case 'r':
unescape = (char *) "\r";
case 't':
unescape = (char *) "\t";
2007-06-04 12:31:26 +00:00
case '"':
2010-04-26 06:27:27 +00:00
unescape = (char *) "\"";
2007-06-04 12:31:26 +00:00
case '\\':
2010-04-26 06:27:27 +00:00
unescape = (char *) "\\";
2007-06-04 12:31:26 +00:00
case 'b':
2010-04-26 06:27:27 +00:00
unescape = (char *) "\b";
2007-06-04 12:31:26 +00:00
case 'f':
2010-04-26 06:27:27 +00:00
unescape = (char *) "\f";
2007-06-04 12:31:26 +00:00
case 'u':
2011-02-05 01:13:41 +00:00
if (pe > stringEnd - 4) {
2007-06-04 12:31:26 +00:00
return Qnil;
} else {
2010-04-26 06:27:27 +00:00
UTF32 ch = unescape_unicode((unsigned char *) ++pe);
pe += 3;
if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
if (pe > stringEnd - 6) return Qnil;
if (pe[0] == '\\' && pe[1] == 'u') {
UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
| (sur & 0x3FF));
pe += 5;
} else {
unescape = (char *) "?";
unescape_len = convert_UTF32_to_UTF8(buf, ch);
unescape = buf;
2007-06-04 12:31:26 +00:00
2010-04-26 06:27:27 +00:00
p = pe;
2007-06-04 12:31:26 +00:00
2010-04-26 06:27:27 +00:00
rb_str_buf_cat(result, unescape, unescape_len);
p = ++pe;
2007-06-04 12:31:26 +00:00
} else {
2010-04-26 06:27:27 +00:00
2007-06-04 12:31:26 +00:00
2010-04-26 06:27:27 +00:00
rb_str_buf_cat(result, p, pe - p);
2007-06-04 12:31:26 +00:00
return result;
machine JSON_string;
include JSON_common;
write data;
action parse_string {
2010-04-26 06:27:27 +00:00
*result = json_string_unescape(*result, json->memo + 1, p);
2009-05-05 02:32:49 +00:00
if (NIL_P(*result)) {
2011-07-08 09:00:54 +00:00
} else {
fexec p + 1;
2007-06-04 12:31:26 +00:00
2008-09-20 17:41:14 +00:00
action exit { fhold; fbreak; }
2007-06-04 12:31:26 +00:00
2011-02-05 01:13:41 +00:00
main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
2007-06-04 12:31:26 +00:00
2011-07-10 08:01:04 +00:00
static int
match_i(VALUE regexp, VALUE klass, VALUE memo)
if (regexp == Qundef) return ST_STOP;
if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
rb_ary_push(memo, klass);
return ST_STOP;
2007-06-04 12:31:26 +00:00
static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
int cs = EVIL;
2011-07-10 08:01:04 +00:00
VALUE match_string;
2007-06-04 12:31:26 +00:00
2010-04-26 06:27:27 +00:00
*result = rb_str_buf_new(0);
2007-06-04 12:31:26 +00:00
%% write init;
json->memo = p;
%% write exec;
2011-07-10 08:01:04 +00:00
if (json->create_additions && RTEST(match_string = json->match_string)) {
VALUE klass;
VALUE memo = rb_ary_new2(2);
rb_ary_push(memo, *result);
rb_hash_foreach(match_string, match_i, memo);
klass = rb_ary_entry(memo, 1);
if (RTEST(klass)) {
*result = rb_funcall(klass, i_json_create, 1, *result);
2010-04-26 06:27:27 +00:00
if (json->symbolize_names && json->parsing_name) {
*result = rb_str_intern(*result);
2007-06-04 12:31:26 +00:00
if (cs >= JSON_string_first_final) {
return p + 1;
} else {
return NULL;
2011-02-05 01:13:41 +00:00
2007-06-04 12:31:26 +00:00
* Document-class: JSON::Ext::Parser
* This is the JSON parser implemented as a C extension. It can be configured
* to be used by setting
* JSON.parser = JSON::Ext::Parser
* with the method parser= in JSON.
2010-04-26 06:27:27 +00:00
static VALUE convert_encoding(VALUE source)
char *ptr = RSTRING_PTR(source);
long len = RSTRING_LEN(source);
if (len < 2) {
rb_raise(eParserError, "A JSON text must at least contain two octets!");
VALUE encoding = rb_funcall(source, i_encoding, 0);
if (encoding == CEncoding_ASCII_8BIT) {
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
2011-07-08 07:39:09 +00:00
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32BE);
2010-04-26 06:27:27 +00:00
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
2011-07-08 07:39:09 +00:00
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16BE);
2010-04-26 06:27:27 +00:00
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
2011-07-08 07:39:09 +00:00
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32LE);
2010-04-26 06:27:27 +00:00
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
2011-07-08 07:39:09 +00:00
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16LE);
2010-04-26 06:27:27 +00:00
} else {
2011-07-08 07:40:41 +00:00
source = rb_str_dup(source);
2010-04-26 06:27:27 +00:00
} else {
source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source);
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source);
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source);
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source);
return source;
2007-06-04 12:31:26 +00:00
* call-seq: new(source, opts => {})
* Creates a new JSON::Ext::Parser instance for the string _source_.
* Creates a new JSON::Ext::Parser instance for the string _source_.
* It will be configured by the _opts_ hash. _opts_ can have the following
* keys:
* _opts_ can have the following keys:
* * *max_nesting*: The maximum depth of nesting allowed in the parsed data
2007-07-07 17:15:30 +00:00
* structures. Disable depth checking with :max_nesting => false|nil|0, it
2013-02-12 03:05:45 +00:00
* defaults to 100.
2007-07-07 17:15:30 +00:00
* * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
* defiance of RFC 4627 to be parsed by the Parser. This option defaults to
* false.
2010-04-26 06:27:27 +00:00
* * *symbolize_names*: If set to true, returns symbols for the names
* (keys) in a JSON object. Otherwise strings are returned, which is also
* the default.
2007-11-28 09:22:57 +00:00
* * *create_additions*: If set to false, the Parser doesn't create
* additions even if a matchin class and create_id was found. This option
* defaults to true.
2009-09-01 16:17:56 +00:00
* * *object_class*: Defaults to Hash
* * *array_class*: Defaults to Array
2007-06-04 12:31:26 +00:00
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
VALUE source, opts;
2011-02-05 01:30:01 +00:00
if (json->Vsource) {
2011-02-05 02:29:18 +00:00
rb_raise(rb_eTypeError, "already initialized instance");
2011-02-05 01:30:01 +00:00
2007-06-04 12:31:26 +00:00
rb_scan_args(argc, argv, "11", &source, &opts);
if (!NIL_P(opts)) {
opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
if (NIL_P(opts)) {
rb_raise(rb_eArgError, "opts needs to be like a hash");
} else {
2007-07-07 17:15:30 +00:00
VALUE tmp = ID2SYM(i_max_nesting);
2010-04-26 06:27:27 +00:00
if (option_given_p(opts, tmp)) {
2007-07-07 17:15:30 +00:00
VALUE max_nesting = rb_hash_aref(opts, tmp);
2007-06-04 12:31:26 +00:00
if (RTEST(max_nesting)) {
Check_Type(max_nesting, T_FIXNUM);
json->max_nesting = FIX2INT(max_nesting);
} else {
json->max_nesting = 0;
2007-11-28 09:22:57 +00:00
} else {
2013-02-12 03:05:45 +00:00
json->max_nesting = 100;
2007-06-04 12:31:26 +00:00
2007-07-07 17:15:30 +00:00
tmp = ID2SYM(i_allow_nan);
2010-04-26 06:27:27 +00:00
if (option_given_p(opts, tmp)) {
2011-07-10 08:01:04 +00:00
json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
2007-11-28 09:22:57 +00:00
} else {
json->allow_nan = 0;
2010-04-26 06:27:27 +00:00
tmp = ID2SYM(i_symbolize_names);
if (option_given_p(opts, tmp)) {
2011-07-10 08:01:04 +00:00
json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
2010-04-26 06:27:27 +00:00
} else {
json->symbolize_names = 0;
2011-08-30 02:23:12 +00:00
tmp = ID2SYM(i_quirks_mode);
if (option_given_p(opts, tmp)) {
VALUE quirks_mode = rb_hash_aref(opts, tmp);
json->quirks_mode = RTEST(quirks_mode) ? 1 : 0;
} else {
json->quirks_mode = 0;
2007-11-28 09:22:57 +00:00
tmp = ID2SYM(i_create_additions);
2010-04-26 06:27:27 +00:00
if (option_given_p(opts, tmp)) {
2011-07-10 08:01:04 +00:00
json->create_additions = RTEST(rb_hash_aref(opts, tmp));
} else {
2013-02-12 03:05:45 +00:00
json->create_additions = 0;
2011-07-10 08:01:04 +00:00
tmp = ID2SYM(i_create_id);
if (option_given_p(opts, tmp)) {
json->create_id = rb_hash_aref(opts, tmp);
2007-11-28 09:22:57 +00:00
} else {
json->create_id = rb_funcall(mJSON, i_create_id, 0);
2007-07-07 17:15:30 +00:00
2009-09-01 16:17:56 +00:00
tmp = ID2SYM(i_object_class);
2010-04-26 06:27:27 +00:00
if (option_given_p(opts, tmp)) {
2009-09-01 16:17:56 +00:00
json->object_class = rb_hash_aref(opts, tmp);
} else {
json->object_class = Qnil;
tmp = ID2SYM(i_array_class);
2010-04-26 06:27:27 +00:00
if (option_given_p(opts, tmp)) {
2009-09-01 16:17:56 +00:00
json->array_class = rb_hash_aref(opts, tmp);
} else {
json->array_class = Qnil;
2011-07-10 08:01:04 +00:00
tmp = ID2SYM(i_match_string);
if (option_given_p(opts, tmp)) {
VALUE match_string = rb_hash_aref(opts, tmp);
json->match_string = RTEST(match_string) ? match_string : Qnil;
} else {
json->match_string = Qnil;
2007-06-04 12:31:26 +00:00
2007-11-28 09:22:57 +00:00
} else {
2013-02-12 03:05:45 +00:00
json->max_nesting = 100;
2007-11-28 09:22:57 +00:00
json->allow_nan = 0;
2011-07-10 08:01:04 +00:00
json->create_additions = 1;
2007-11-28 09:22:57 +00:00
json->create_id = rb_funcall(mJSON, i_create_id, 0);
2009-09-01 16:17:56 +00:00
json->object_class = Qnil;
json->array_class = Qnil;
2007-06-04 12:31:26 +00:00
2012-03-11 13:36:06 +00:00
source = rb_convert_type(source, T_STRING, "String", "to_str");
2011-08-30 02:23:12 +00:00
if (!json->quirks_mode) {
source = convert_encoding(StringValue(source));
2007-06-04 12:31:26 +00:00
json->current_nesting = 0;
2012-05-07 16:00:49 +00:00
2011-08-30 02:23:12 +00:00
json->len = RSTRING_LEN(source);
json->source = RSTRING_PTR(source);;
2007-06-04 12:31:26 +00:00
json->Vsource = source;
return self;
2011-08-30 02:23:12 +00:00
machine JSON;
write data;
include JSON_common;
action parse_object {
char *np;
json->current_nesting = 1;
np = JSON_parse_object(json, fpc, pe, &result);
if (np == NULL) { fhold; fbreak; } else fexec np;
action parse_array {
char *np;
json->current_nesting = 1;
np = JSON_parse_array(json, fpc, pe, &result);
if (np == NULL) { fhold; fbreak; } else fexec np;
main := ignore* (
begin_object >parse_object |
begin_array >parse_array
) ignore*;
static VALUE cParser_parse_strict(VALUE self)
2007-06-04 12:31:26 +00:00
char *p, *pe;
int cs = EVIL;
VALUE result = Qnil;
2010-04-26 06:27:27 +00:00
2007-06-04 12:31:26 +00:00
%% write init;
p = json->source;
pe = p + json->len;
%% write exec;
if (cs >= JSON_first_final && p == pe) {
return result;
} else {
2008-09-20 17:41:14 +00:00
rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
2010-04-26 06:27:27 +00:00
return Qnil;
2007-06-04 12:31:26 +00:00
2011-08-30 02:23:12 +00:00
machine JSON_quirks_mode;
write data;
include JSON_common;
action parse_value {
char *np = JSON_parse_value(json, fpc, pe, &result);
if (np == NULL) { fhold; fbreak; } else fexec np;
main := ignore* (
begin_value >parse_value
) ignore*;
static VALUE cParser_parse_quirks_mode(VALUE self)
char *p, *pe;
int cs = EVIL;
VALUE result = Qnil;
%% write init;
p = json->source;
pe = p + json->len;
%% write exec;
if (cs >= JSON_quirks_mode_first_final && p == pe) {
return result;
} else {
rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
return Qnil;
* call-seq: parse()
* Parses the current JSON text _source_ and returns the complete data
* structure as a result.
static VALUE cParser_parse(VALUE self)
if (json->quirks_mode) {
return cParser_parse_quirks_mode(self);
} else {
return cParser_parse_strict(self);
2014-10-04 23:33:11 +00:00
static JSON_Parser *JSON_allocate(void)
2007-06-04 12:31:26 +00:00
2014-12-27 11:12:58 +00:00
JSON_Parser *json = ZALLOC(JSON_Parser);
2012-03-11 13:36:06 +00:00
json->fbuffer = fbuffer_alloc(0);
2007-06-04 12:31:26 +00:00
return json;
2014-10-04 23:33:11 +00:00
static void JSON_mark(void *ptr)
2007-06-04 12:31:26 +00:00
2014-10-04 23:33:11 +00:00
JSON_Parser *json = ptr;
2007-06-04 12:31:26 +00:00
2009-09-01 16:17:56 +00:00
2011-07-10 08:01:04 +00:00
2007-06-04 12:31:26 +00:00
2014-10-04 23:33:11 +00:00
static void JSON_free(void *ptr)
2007-06-04 12:31:26 +00:00
2014-10-04 23:33:11 +00:00
JSON_Parser *json = ptr;
2012-03-11 13:36:06 +00:00
2008-09-20 17:54:46 +00:00
2007-06-04 12:31:26 +00:00
2014-10-04 23:33:11 +00:00
static size_t JSON_memsize(const void *ptr)
const JSON_Parser *json = ptr;
return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
static const rb_data_type_t JSON_Parser_type = {
{JSON_mark, JSON_free, JSON_memsize,},
2014-12-27 11:12:58 +00:00
2014-12-01 06:38:04 +00:00
0, 0,
2014-10-04 23:33:11 +00:00
2014-12-27 11:12:58 +00:00
2014-10-04 23:33:11 +00:00
2007-06-04 12:31:26 +00:00
static VALUE cJSON_parser_s_allocate(VALUE klass)
JSON_Parser *json = JSON_allocate();
2014-10-04 23:33:11 +00:00
return TypedData_Wrap_Struct(klass, &JSON_Parser_type, json);
2007-06-04 12:31:26 +00:00
* call-seq: source()
* Returns a copy of the current _source_ string, that was used to construct
* this Parser.
static VALUE cParser_source(VALUE self)
2010-04-26 06:27:27 +00:00
2007-06-04 12:31:26 +00:00
return rb_str_dup(json->Vsource);
2011-08-30 02:23:12 +00:00
* call-seq: quirks_mode?()
* Returns a true, if this parser is in quirks_mode, false otherwise.
static VALUE cParser_quirks_mode_p(VALUE self)
return json->quirks_mode ? Qtrue : Qfalse;
2014-12-26 06:13:15 +00:00
void Init_parser(void)
2007-06-04 12:31:26 +00:00
2008-09-20 17:41:14 +00:00
2007-06-04 12:31:26 +00:00
mJSON = rb_define_module("JSON");
mExt = rb_define_module_under(mJSON, "Ext");
cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
eParserError = rb_path2class("JSON::ParserError");
eNestingError = rb_path2class("JSON::NestingError");
rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
rb_define_method(cParser, "initialize", cParser_initialize, -1);
rb_define_method(cParser, "parse", cParser_parse, 0);
rb_define_method(cParser, "source", cParser_source, 0);
2011-08-30 02:23:12 +00:00
rb_define_method(cParser, "quirks_mode?", cParser_quirks_mode_p, 0);
2007-06-04 12:31:26 +00:00
2007-07-07 17:15:30 +00:00
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
2007-06-04 12:31:26 +00:00
i_json_creatable_p = rb_intern("json_creatable?");
i_json_create = rb_intern("json_create");
i_create_id = rb_intern("create_id");
2007-11-28 09:22:57 +00:00
i_create_additions = rb_intern("create_additions");
2007-06-04 12:31:26 +00:00
i_chr = rb_intern("chr");
i_max_nesting = rb_intern("max_nesting");
2007-07-07 17:15:30 +00:00
i_allow_nan = rb_intern("allow_nan");
2010-04-26 06:27:27 +00:00
i_symbolize_names = rb_intern("symbolize_names");
2011-08-30 02:23:12 +00:00
i_quirks_mode = rb_intern("quirks_mode");
2009-09-01 16:17:56 +00:00
i_object_class = rb_intern("object_class");
i_array_class = rb_intern("array_class");
2011-07-10 08:01:04 +00:00
i_match = rb_intern("match");
i_match_string = rb_intern("match_string");
2010-04-26 06:27:27 +00:00
i_key_p = rb_intern("key?");
i_deep_const_get = rb_intern("deep_const_get");
2011-07-10 08:01:04 +00:00
i_aset = rb_intern("[]=");
2012-03-11 13:36:06 +00:00
i_aref = rb_intern("[]");
2011-07-10 08:01:04 +00:00
i_leftshift = rb_intern("<<");
2010-04-26 06:27:27 +00:00
CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
i_encoding = rb_intern("encoding");
i_encode = rb_intern("encode");
i_iconv = rb_intern("iconv");
2007-06-04 12:31:26 +00:00
2011-02-05 01:13:41 +00:00
* Local variables:
* mode: c
* c-file-style: ruby
2011-02-05 01:30:01 +00:00
* indent-tabs-mode: nil
2011-02-05 01:13:41 +00:00
* End: