From 3642494ce5a44938285677669b3e001a1af1ce47 Mon Sep 17 00:00:00 2001 From: naruse Date: Mon, 26 Apr 2010 06:27:27 +0000 Subject: [PATCH] Recommit of JSON; fix mixed declarations. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27501 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 4 - ext/json/ext/generator/extconf.rb | 9 - ext/json/ext/generator/generator.c | 919 ------------------ ext/json/ext/generator/unicode.c | 180 ---- ext/json/ext/generator/unicode.h | 53 -- ext/json/ext/parser/extconf.rb | 9 - ext/json/ext/parser/unicode.c | 154 --- ext/json/ext/parser/unicode.h | 58 -- ext/json/generator/extconf.rb | 4 + ext/json/generator/generator.c | 1341 +++++++++++++++++++++++++++ ext/json/generator/generator.h | 170 ++++ ext/json/lib/json/add/core.rb | 27 +- ext/json/lib/json/add/rails.rb | 4 +- ext/json/lib/json/common.rb | 147 +-- ext/json/lib/json/editor.rb | 32 +- ext/json/lib/json/version.rb | 2 +- ext/json/parser/extconf.rb | 4 + ext/json/{ext => }/parser/parser.c | 482 ++++++---- ext/json/parser/parser.h | 71 ++ ext/json/{ext => }/parser/parser.rl | 334 ++++--- 20 files changed, 2238 insertions(+), 1766 deletions(-) delete mode 100644 ext/json/ext/generator/extconf.rb delete mode 100644 ext/json/ext/generator/generator.c delete mode 100644 ext/json/ext/generator/unicode.c delete mode 100644 ext/json/ext/generator/unicode.h delete mode 100644 ext/json/ext/parser/extconf.rb delete mode 100644 ext/json/ext/parser/unicode.c delete mode 100644 ext/json/ext/parser/unicode.h create mode 100644 ext/json/generator/extconf.rb create mode 100644 ext/json/generator/generator.c create mode 100644 ext/json/generator/generator.h create mode 100644 ext/json/parser/extconf.rb rename ext/json/{ext => }/parser/parser.c (75%) create mode 100644 ext/json/parser/parser.h rename ext/json/{ext => }/parser/parser.rl (63%) diff --git a/ChangeLog b/ChangeLog index 30e5482d35..6b35175e08 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,3 @@ -Mon Apr 26 13:33:39 2010 NAKAMURA Usaku - - * ext/jason: came again after canceling gcc-ism. - Mon Apr 26 13:11:57 2010 Nobuyoshi Nakada * parse.y (ripper_get_value): escape Qundef. diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb deleted file mode 100644 index fc267420f1..0000000000 --- a/ext/json/ext/generator/extconf.rb +++ /dev/null @@ -1,9 +0,0 @@ -require 'mkmf' -require 'rbconfig' - -if CONFIG['GCC'] == 'yes' - $CFLAGS += ' -Wall' - #$CFLAGS += ' -O0 -ggdb' -end - -create_makefile 'json/ext/generator' diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c deleted file mode 100644 index 482938ac47..0000000000 --- a/ext/json/ext/generator/generator.c +++ /dev/null @@ -1,919 +0,0 @@ -#include -#include "ruby.h" -#if HAVE_RUBY_ST_H -#include "ruby/st.h" -#endif -#if HAVE_ST_H -#include "st.h" -#endif -#include "unicode.h" -#include - -#ifndef RHASH_TBL -#define RHASH_TBL(hsh) (RHASH(hsh)->tbl) -#endif - -#ifndef RHASH_SIZE -#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries) -#endif - -#ifndef RFLOAT_VALUE -#define RFLOAT_VALUE(val) (RFLOAT(val)->value) -#endif - -#ifdef HAVE_RUBY_ENCODING_H -#include "ruby/encoding.h" -#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) -#else -#define FORCE_UTF8(obj) -#endif - -#define check_max_nesting(state, depth) do { \ - long current_nesting = 1 + depth; \ - if (state->max_nesting != 0 && current_nesting > state->max_nesting) \ - rb_raise(eNestingError, "nesting of %ld is too deep", current_nesting); \ -} while (0); - -static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject, - mHash, mArray, mInteger, mFloat, mString, mString_Extend, - mTrueClass, mFalseClass, mNilClass, eGeneratorError, - eCircularDatastructure, eNestingError; - -static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, - i_object_nl, i_array_nl, i_check_circular, i_max_nesting, - i_allow_nan, i_pack, i_unpack, i_create_id, i_extend; - -typedef struct JSON_Generator_StateStruct { - VALUE indent; - VALUE space; - VALUE space_before; - VALUE object_nl; - VALUE array_nl; - int check_circular; - VALUE seen; - VALUE memo; - VALUE depth; - long max_nesting; - int flag; - int allow_nan; -} JSON_Generator_State; - -#define GET_STATE(self) \ - JSON_Generator_State *state; \ - Data_Get_Struct(self, JSON_Generator_State, state); - -/* - * Document-module: JSON::Ext::Generator - * - * This is the JSON generator implemented as a C extension. It can be - * configured to be used by setting - * - * JSON.generator = JSON::Ext::Generator - * - * with the method generator= in JSON. - * - */ - -static int hash_to_json_state_i(VALUE key, VALUE value, VALUE Vstate) -{ - VALUE json, buf, Vdepth; - GET_STATE(Vstate); - buf = state->memo; - Vdepth = state->depth; - - if (key == Qundef) return ST_CONTINUE; - if (state->flag) { - state->flag = 0; - rb_str_buf_cat2(buf, ","); - if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(buf, state->object_nl); - } - if (RSTRING_LEN(state->object_nl)) { - rb_str_buf_append(buf, rb_str_times(state->indent, Vdepth)); - } - json = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 2, Vstate, Vdepth); - Check_Type(json, T_STRING); - rb_str_buf_append(buf, json); - OBJ_INFECT(buf, json); - if (RSTRING_LEN(state->space_before)) { - rb_str_buf_append(buf, state->space_before); - } - rb_str_buf_cat2(buf, ":"); - if (RSTRING_LEN(state->space)) rb_str_buf_append(buf, state->space); - json = rb_funcall(value, i_to_json, 2, Vstate, Vdepth); - Check_Type(json, T_STRING); - state->flag = 1; - rb_str_buf_append(buf, json); - OBJ_INFECT(buf, json); - state->depth = Vdepth; - state->memo = buf; - return ST_CONTINUE; -} - -inline static VALUE mHash_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) { - long depth, len = RHASH_SIZE(self); - VALUE result; - GET_STATE(Vstate); - - depth = 1 + FIX2LONG(Vdepth); - result = rb_str_buf_new(len); - state->memo = result; - state->depth = LONG2FIX(depth); - state->flag = 0; - rb_str_buf_cat2(result, "{"); - if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl); - rb_hash_foreach(self, hash_to_json_state_i, Vstate); - if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl); - if (RSTRING_LEN(state->object_nl)) { - rb_str_buf_append(result, rb_str_times(state->indent, Vdepth)); - } - rb_str_buf_cat2(result, "}"); - return result; -} - -static int hash_to_json_i(VALUE key, VALUE value, VALUE buf) -{ - VALUE tmp; - - if (key == Qundef) return ST_CONTINUE; - if (RSTRING_LEN(buf) > 1) rb_str_buf_cat2(buf, ","); - tmp = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 0); - Check_Type(tmp, T_STRING); - rb_str_buf_append(buf, tmp); - OBJ_INFECT(buf, tmp); - rb_str_buf_cat2(buf, ":"); - tmp = rb_funcall(value, i_to_json, 0); - Check_Type(tmp, T_STRING); - rb_str_buf_append(buf, tmp); - OBJ_INFECT(buf, tmp); - - return ST_CONTINUE; -} - -/* - * call-seq: to_json(state = nil, depth = 0) - * - * Returns a JSON string containing a JSON object, that is unparsed from - * this Hash instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - * _depth_ is used to find out nesting depth, to indent accordingly. - */ -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) -{ - VALUE Vstate, Vdepth, result; - long depth; - - rb_scan_args(argc, argv, "02", &Vstate, &Vdepth); - depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth); - if (NIL_P(Vstate)) { - long len = RHASH_SIZE(self); - result = rb_str_buf_new(len); - rb_str_buf_cat2(result, "{"); - rb_hash_foreach(self, hash_to_json_i, result); - rb_str_buf_cat2(result, "}"); - } else { - GET_STATE(Vstate); - check_max_nesting(state, depth); - if (state->check_circular) { - VALUE self_id = rb_obj_id(self); - if (RTEST(rb_hash_aref(state->seen, self_id))) { - rb_raise(eCircularDatastructure, - "circular data structures not supported!"); - } - rb_hash_aset(state->seen, self_id, Qtrue); - result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth)); - rb_hash_delete(state->seen, self_id); - } else { - result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth)); - } - } - OBJ_INFECT(result, self); - FORCE_UTF8(result); - return result; -} - -inline static VALUE mArray_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) { - long i, len = RARRAY_LEN(self); - VALUE shift, result; - long depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth); - VALUE delim = rb_str_new2(","); - GET_STATE(Vstate); - - check_max_nesting(state, depth); - if (state->check_circular) { - VALUE self_id = rb_obj_id(self); - rb_hash_aset(state->seen, self_id, Qtrue); - result = rb_str_buf_new(len); - if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl); - shift = rb_str_times(state->indent, LONG2FIX(depth + 1)); - - rb_str_buf_cat2(result, "["); - OBJ_INFECT(result, self); - rb_str_buf_append(result, state->array_nl); - for (i = 0; i < len; i++) { - VALUE element = RARRAY_PTR(self)[i]; - if (RTEST(rb_hash_aref(state->seen, rb_obj_id(element)))) { - rb_raise(eCircularDatastructure, - "circular data structures not supported!"); - } - OBJ_INFECT(result, element); - if (i > 0) rb_str_buf_append(result, delim); - rb_str_buf_append(result, shift); - element = rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1)); - Check_Type(element, T_STRING); - rb_str_buf_append(result, element); - } - if (RSTRING_LEN(state->array_nl)) { - rb_str_buf_append(result, state->array_nl); - rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth))); - } - rb_str_buf_cat2(result, "]"); - rb_hash_delete(state->seen, self_id); - } else { - result = rb_str_buf_new(len); - OBJ_INFECT(result, self); - if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl); - shift = rb_str_times(state->indent, LONG2FIX(depth + 1)); - - rb_str_buf_cat2(result, "["); - rb_str_buf_append(result, state->array_nl); - for (i = 0; i < len; i++) { - VALUE element = RARRAY_PTR(self)[i]; - OBJ_INFECT(result, element); - if (i > 0) rb_str_buf_append(result, delim); - rb_str_buf_append(result, shift); - element = rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1)); - Check_Type(element, T_STRING); - rb_str_buf_append(result, element); - } - rb_str_buf_append(result, state->array_nl); - if (RSTRING_LEN(state->array_nl)) { - rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth))); - } - rb_str_buf_cat2(result, "]"); - } - return result; -} - -/* - * call-seq: to_json(state = nil, depth = 0) - * - * Returns a JSON string containing a JSON array, that is unparsed from - * this Array instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - * _depth_ is used to find out nesting depth, to indent accordingly. - */ -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { - VALUE Vstate, Vdepth, result; - - rb_scan_args(argc, argv, "02", &Vstate, &Vdepth); - if (NIL_P(Vstate)) { - long i, len = RARRAY_LEN(self); - result = rb_str_buf_new(2 + 2 * len); - rb_str_buf_cat2(result, "["); - OBJ_INFECT(result, self); - for (i = 0; i < len; i++) { - VALUE element = RARRAY_PTR(self)[i]; - OBJ_INFECT(result, element); - if (i > 0) rb_str_buf_cat2(result, ","); - element = rb_funcall(element, i_to_json, 0); - Check_Type(element, T_STRING); - rb_str_buf_append(result, element); - } - rb_str_buf_cat2(result, "]"); - } else { - result = mArray_json_transfrom(self, Vstate, Vdepth); - } - OBJ_INFECT(result, self); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) -{ - VALUE result = rb_funcall(self, i_to_s, 0); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Float number. - */ -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) -{ - JSON_Generator_State *state = NULL; - VALUE Vstate, rest, tmp, result; - double value = RFLOAT_VALUE(self); - rb_scan_args(argc, argv, "01*", &Vstate, &rest); - if (!NIL_P(Vstate)) Data_Get_Struct(Vstate, JSON_Generator_State, state); - if (isinf(value)) { - if (!state || state->allow_nan) { - result = rb_funcall(self, i_to_s, 0); - } else { - tmp = rb_funcall(self, i_to_s, 0); - rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp)); - } - } else if (isnan(value)) { - if (!state || state->allow_nan) { - result = rb_funcall(self, i_to_s, 0); - } else { - tmp = rb_funcall(self, i_to_s, 0); - rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp)); - } - } else { - result = rb_funcall(self, i_to_s, 0); - } - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: String.included(modul) - * - * Extends _modul_ with the String::Extend module. - */ -static VALUE mString_included_s(VALUE self, VALUE modul) { - VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: to_json(*) - * - * This string should be encoded with UTF-8 A call to this method - * returns a JSON string encoded with UTF16 big endian characters as - * \u????. - */ -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) -{ - VALUE result = rb_str_buf_new(RSTRING_LEN(self)); - rb_str_buf_cat2(result, "\""); - JSON_convert_UTF8_to_JSON(result, self, strictConversion); - rb_str_buf_cat2(result, "\""); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: to_json_raw_object() - * - * This method creates a raw object hash, that can be nested into - * other data structures and will be unparsed as a raw string. This - * method should be used, if you want to convert raw strings to JSON - * instead of UTF-8 strings, e. g. binary data. - */ -static VALUE mString_to_json_raw_object(VALUE self) { - VALUE ary; - VALUE result = rb_hash_new(); - rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); - ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); - rb_hash_aset(result, rb_str_new2("raw"), ary); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: to_json_raw(*args) - * - * This method creates a JSON text from the result of a call to - * to_json_raw_object of this String. - */ -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) { - VALUE result, obj = mString_to_json_raw_object(self); - Check_Type(obj, T_HASH); - result = mHash_to_json(argc, argv, obj); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: json_create(o) - * - * Raw Strings are JSON Objects (the raw bytes are stored in an array for the - * key "raw"). The Ruby String can be created by this module method. - */ -static VALUE mString_Extend_json_create(VALUE self, VALUE o) { - VALUE ary; - Check_Type(o, T_HASH); - ary = rb_hash_aref(o, rb_str_new2("raw")); - return rb_funcall(ary, i_pack, 1, rb_str_new2("C*")); -} - -/* - * call-seq: to_json(*) - * - * Returns a JSON string for true: 'true'. - */ -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) -{ - VALUE result = rb_str_new2("true"); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: to_json(*) - * - * Returns a JSON string for false: 'false'. - */ -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) -{ - VALUE result = rb_str_new2("false"); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: to_json(*) - * - */ -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) -{ - VALUE result = rb_str_new2("null"); - FORCE_UTF8(result); - return result; -} - -/* - * call-seq: to_json(*) - * - * Converts this object to a string (calling #to_s), converts - * it to a JSON string, and returns the result. This is a fallback, if no - * special method #to_json was defined for some object. - */ -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) -{ - VALUE result, string = rb_funcall(self, i_to_s, 0); - Check_Type(string, T_STRING); - result = mString_to_json(argc, argv, string); - FORCE_UTF8(result); - return result; -} - -/* - * Document-class: JSON::Ext::Generator::State - * - * This class is used to create State instances, that are use to hold data - * while generating a JSON text from a a Ruby data structure. - */ - -static void State_mark(JSON_Generator_State *state) -{ - rb_gc_mark_maybe(state->indent); - rb_gc_mark_maybe(state->space); - rb_gc_mark_maybe(state->space_before); - rb_gc_mark_maybe(state->object_nl); - rb_gc_mark_maybe(state->array_nl); - rb_gc_mark_maybe(state->seen); - rb_gc_mark_maybe(state->memo); - rb_gc_mark_maybe(state->depth); -} - -static JSON_Generator_State *State_allocate() -{ - JSON_Generator_State *state = ALLOC(JSON_Generator_State); - return state; -} - -static VALUE cState_s_allocate(VALUE klass) -{ - JSON_Generator_State *state = State_allocate(); - return Data_Wrap_Struct(klass, State_mark, -1, state); -} - -/* - * call-seq: configure(opts) - * - * Configure this State instance with the Hash _opts_, and return - * itself. - */ -static VALUE cState_configure(VALUE self, VALUE opts) -{ - VALUE tmp; - GET_STATE(self); - tmp = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); - if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h"); - if (NIL_P(tmp)) { - rb_raise(rb_eArgError, "opts has to be hash like or convertable into a hash"); - } - opts = tmp; - tmp = rb_hash_aref(opts, ID2SYM(i_indent)); - if (RTEST(tmp)) { - Check_Type(tmp, T_STRING); - state->indent = tmp; - } - tmp = rb_hash_aref(opts, ID2SYM(i_space)); - if (RTEST(tmp)) { - Check_Type(tmp, T_STRING); - state->space = tmp; - } - tmp = rb_hash_aref(opts, ID2SYM(i_space_before)); - if (RTEST(tmp)) { - Check_Type(tmp, T_STRING); - state->space_before = tmp; - } - tmp = rb_hash_aref(opts, ID2SYM(i_array_nl)); - if (RTEST(tmp)) { - Check_Type(tmp, T_STRING); - state->array_nl = tmp; - } - tmp = rb_hash_aref(opts, ID2SYM(i_object_nl)); - if (RTEST(tmp)) { - Check_Type(tmp, T_STRING); - state->object_nl = tmp; - } - tmp = ID2SYM(i_check_circular); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { - tmp = rb_hash_aref(opts, ID2SYM(i_check_circular)); - state->check_circular = RTEST(tmp); - } else { - state->check_circular = 1; - } - tmp = ID2SYM(i_max_nesting); - state->max_nesting = 19; - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - state->max_nesting = FIX2LONG(max_nesting); - } else { - state->max_nesting = 0; - } - } - tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan)); - state->allow_nan = RTEST(tmp); - return self; -} - -/* - * call-seq: to_h - * - * Returns the configuration instance variables as a hash, that can be - * passed to the configure method. - */ -static VALUE cState_to_h(VALUE self) -{ - VALUE result = rb_hash_new(); - GET_STATE(self); - rb_hash_aset(result, ID2SYM(i_indent), state->indent); - rb_hash_aset(result, ID2SYM(i_space), state->space); - rb_hash_aset(result, ID2SYM(i_space_before), state->space_before); - rb_hash_aset(result, ID2SYM(i_object_nl), state->object_nl); - rb_hash_aset(result, ID2SYM(i_array_nl), state->array_nl); - rb_hash_aset(result, ID2SYM(i_check_circular), state->check_circular ? Qtrue : Qfalse); - rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse); - rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting)); - return result; -} - - -/* - * call-seq: new(opts = {}) - * - * Instantiates a new State object, configured by _opts_. - * - * _opts_ can have the following keys: - * - * * *indent*: a string used to indent levels (default: ''), - * * *space*: a string that is put after, a : or , delimiter (default: ''), - * * *space_before*: a string that is put before a : pair delimiter (default: ''), - * * *object_nl*: a string that is put at the end of a JSON object (default: ''), - * * *array_nl*: a string that is put at the end of a JSON array (default: ''), - * * *check_circular*: true if checking for circular data structures - * should be done, false (the default) otherwise. - * * *allow_nan*: true if NaN, Infinity, and -Infinity should be - * generated, otherwise an exception is thrown, if these values are - * encountered. This options defaults to false. - */ -static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) -{ - VALUE opts; - GET_STATE(self); - - rb_scan_args(argc, argv, "01", &opts); - state->indent = rb_str_new2(""); - state->space = rb_str_new2(""); - state->space_before = rb_str_new2(""); - state->array_nl = rb_str_new2(""); - state->object_nl = rb_str_new2(""); - if (NIL_P(opts)) { - state->check_circular = 1; - state->allow_nan = 0; - state->max_nesting = 19; - } else { - cState_configure(self, opts); - } - state->seen = rb_hash_new(); - state->memo = Qnil; - state->depth = INT2FIX(0); - return self; -} - -/* - * call-seq: from_state(opts) - * - * Creates a State object from _opts_, which ought to be Hash to create a - * new State instance configured by _opts_, something else to create an - * unconfigured instance. If _opts_ is a State object, it is just returned. - */ -static VALUE cState_from_state_s(VALUE self, VALUE opts) -{ - if (rb_obj_is_kind_of(opts, self)) { - return opts; - } else if (rb_obj_is_kind_of(opts, rb_cHash)) { - return rb_funcall(self, i_new, 1, opts); - } else { - return rb_funcall(self, i_new, 0); - } -} - -/* - * call-seq: indent() - * - * This string is used to indent levels in the JSON text. - */ -static VALUE cState_indent(VALUE self) -{ - GET_STATE(self); - return state->indent; -} - -/* - * call-seq: indent=(indent) - * - * This string is used to indent levels in the JSON text. - */ -static VALUE cState_indent_set(VALUE self, VALUE indent) -{ - GET_STATE(self); - Check_Type(indent, T_STRING); - return state->indent = indent; -} - -/* - * call-seq: space() - * - * This string is used to insert a space between the tokens in a JSON - * string. - */ -static VALUE cState_space(VALUE self) -{ - GET_STATE(self); - return state->space; -} - -/* - * call-seq: space=(space) - * - * This string is used to insert a space between the tokens in a JSON - * string. - */ -static VALUE cState_space_set(VALUE self, VALUE space) -{ - GET_STATE(self); - Check_Type(space, T_STRING); - return state->space = space; -} - -/* - * call-seq: space_before() - * - * This string is used to insert a space before the ':' in JSON objects. - */ -static VALUE cState_space_before(VALUE self) -{ - GET_STATE(self); - return state->space_before; -} - -/* - * call-seq: space_before=(space_before) - * - * This string is used to insert a space before the ':' in JSON objects. - */ -static VALUE cState_space_before_set(VALUE self, VALUE space_before) -{ - GET_STATE(self); - Check_Type(space_before, T_STRING); - return state->space_before = space_before; -} - -/* - * call-seq: object_nl() - * - * This string is put at the end of a line that holds a JSON object (or - * Hash). - */ -static VALUE cState_object_nl(VALUE self) -{ - GET_STATE(self); - return state->object_nl; -} - -/* - * call-seq: object_nl=(object_nl) - * - * This string is put at the end of a line that holds a JSON object (or - * Hash). - */ -static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) -{ - GET_STATE(self); - Check_Type(object_nl, T_STRING); - return state->object_nl = object_nl; -} - -/* - * call-seq: array_nl() - * - * This string is put at the end of a line that holds a JSON array. - */ -static VALUE cState_array_nl(VALUE self) -{ - GET_STATE(self); - return state->array_nl; -} - -/* - * call-seq: array_nl=(array_nl) - * - * This string is put at the end of a line that holds a JSON array. - */ -static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) -{ - GET_STATE(self); - Check_Type(array_nl, T_STRING); - return state->array_nl = array_nl; -} - -/* - * call-seq: check_circular? - * - * Returns true, if circular data structures should be checked, - * otherwise returns false. - */ -static VALUE cState_check_circular_p(VALUE self) -{ - GET_STATE(self); - return state->check_circular ? Qtrue : Qfalse; -} - -/* - * call-seq: max_nesting - * - * This integer returns the maximum level of data structure nesting in - * the generated JSON, max_nesting = 0 if no maximum is checked. - */ -static VALUE cState_max_nesting(VALUE self) -{ - GET_STATE(self); - return LONG2FIX(state->max_nesting); -} - -/* - * call-seq: max_nesting=(depth) - * - * This sets the maximum level of data structure nesting in the generated JSON - * to the integer depth, max_nesting = 0 if no maximum should be checked. - */ -static VALUE cState_max_nesting_set(VALUE self, VALUE depth) -{ - GET_STATE(self); - Check_Type(depth, T_FIXNUM); - state->max_nesting = FIX2LONG(depth); - return Qnil; -} - -/* - * call-seq: allow_nan? - * - * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise - * returns false. - */ -static VALUE cState_allow_nan_p(VALUE self) -{ - GET_STATE(self); - return state->allow_nan ? Qtrue : Qfalse; -} - -/* - * call-seq: seen?(object) - * - * Returns _true_, if _object_ was already seen during this generating run. - */ -static VALUE cState_seen_p(VALUE self, VALUE object) -{ - GET_STATE(self); - return rb_hash_aref(state->seen, rb_obj_id(object)); -} - -/* - * call-seq: remember(object) - * - * Remember _object_, to find out if it was already encountered (if a cyclic - * data structure is rendered). - */ -static VALUE cState_remember(VALUE self, VALUE object) -{ - GET_STATE(self); - return rb_hash_aset(state->seen, rb_obj_id(object), Qtrue); -} - -/* - * call-seq: forget(object) - * - * Forget _object_ for this generating run. - */ -static VALUE cState_forget(VALUE self, VALUE object) -{ - GET_STATE(self); - return rb_hash_delete(state->seen, rb_obj_id(object)); -} - -/* - * - */ -void Init_generator() -{ - rb_require("json/common"); - mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - mGenerator = rb_define_module_under(mExt, "Generator"); - eGeneratorError = rb_path2class("JSON::GeneratorError"); - eCircularDatastructure = rb_path2class("JSON::CircularDatastructure"); - eNestingError = rb_path2class("JSON::NestingError"); - cState = rb_define_class_under(mGenerator, "State", rb_cObject); - rb_define_alloc_func(cState, cState_s_allocate); - rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); - rb_define_method(cState, "initialize", cState_initialize, -1); - - rb_define_method(cState, "indent", cState_indent, 0); - rb_define_method(cState, "indent=", cState_indent_set, 1); - rb_define_method(cState, "space", cState_space, 0); - rb_define_method(cState, "space=", cState_space_set, 1); - rb_define_method(cState, "space_before", cState_space_before, 0); - rb_define_method(cState, "space_before=", cState_space_before_set, 1); - rb_define_method(cState, "object_nl", cState_object_nl, 0); - rb_define_method(cState, "object_nl=", cState_object_nl_set, 1); - rb_define_method(cState, "array_nl", cState_array_nl, 0); - rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); - rb_define_method(cState, "check_circular?", cState_check_circular_p, 0); - rb_define_method(cState, "max_nesting", cState_max_nesting, 0); - rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1); - rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0); - rb_define_method(cState, "seen?", cState_seen_p, 1); - rb_define_method(cState, "remember", cState_remember, 1); - rb_define_method(cState, "forget", cState_forget, 1); - rb_define_method(cState, "configure", cState_configure, 1); - rb_define_method(cState, "to_h", cState_to_h, 0); - - mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); - mObject = rb_define_module_under(mGeneratorMethods, "Object"); - rb_define_method(mObject, "to_json", mObject_to_json, -1); - mHash = rb_define_module_under(mGeneratorMethods, "Hash"); - rb_define_method(mHash, "to_json", mHash_to_json, -1); - mArray = rb_define_module_under(mGeneratorMethods, "Array"); - rb_define_method(mArray, "to_json", mArray_to_json, -1); - mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); - rb_define_method(mInteger, "to_json", mInteger_to_json, -1); - mFloat = rb_define_module_under(mGeneratorMethods, "Float"); - rb_define_method(mFloat, "to_json", mFloat_to_json, -1); - mString = rb_define_module_under(mGeneratorMethods, "String"); - rb_define_singleton_method(mString, "included", mString_included_s, 1); - rb_define_method(mString, "to_json", mString_to_json, -1); - rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1); - rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0); - mString_Extend = rb_define_module_under(mString, "Extend"); - rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1); - mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); - rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); - mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); - rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); - mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); - rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); - - i_to_s = rb_intern("to_s"); - i_to_json = rb_intern("to_json"); - i_new = rb_intern("new"); - i_indent = rb_intern("indent"); - i_space = rb_intern("space"); - i_space_before = rb_intern("space_before"); - i_object_nl = rb_intern("object_nl"); - i_array_nl = rb_intern("array_nl"); - i_check_circular = rb_intern("check_circular"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_pack = rb_intern("pack"); - i_unpack = rb_intern("unpack"); - i_create_id = rb_intern("create_id"); - i_extend = rb_intern("extend"); -} diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c deleted file mode 100644 index 947e2d6bda..0000000000 --- a/ext/json/ext/generator/unicode.c +++ /dev/null @@ -1,180 +0,0 @@ -#include "unicode.h" - -#define unicode_escape(buffer, character) \ - snprintf(buf, 7, "\\u%04x", (unsigned int) (character)); \ - rb_str_buf_cat(buffer, buf, 6); - -/* - * Copyright 2001-2004 Unicode, Inc. - * - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* - * Index into the table below with the first byte of a UTF-8 sequence to - * get the number of trailing bytes that are supposed to follow it. - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is - * left as-is for anyone who may want to do such conversion, which was - * allowed in earlier algorithms. - */ -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; - -/* - * Magic values subtracted from a buffer value during UTF8 conversion. - * This table contains as many values as there might be trailing bytes - * in a UTF-8 sequence. - */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; - -/* - * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed - * into the first byte, depending on how many bytes follow. There are - * as many entries in this table as there are UTF-8 sequence types. - * (I.e., one byte sequence, two byte... etc.). Remember that sequencs - * for *legal* UTF-8 will be 4 or fewer bytes total. - */ -static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - -/* - * Utility routine to tell whether a sequence of bytes is legal UTF-8. - * This must be called with the length pre-determined by the first byte. - * If not calling this from ConvertUTF8to*, then the length can be set by: - * length = trailingBytesForUTF8[*source]+1; - * and the sequence is illegal right away if there aren't that many bytes - * available. - * If presented with a length > 4, this returns 0. The Unicode - * definition of UTF-8 goes up to 4-byte sequences. - */ - -inline static unsigned char isLegalUTF8(const UTF8 *source, int length) -{ - UTF8 a; - const UTF8 *srcptr = source+length; - switch (length) { - default: return 0; - /* Everything else falls through when "1"... */ - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 2: if ((a = (*--srcptr)) > 0xBF) return 0; - - switch (*source) { - /* no fall-through in this inner switch */ - case 0xE0: if (a < 0xA0) return 0; break; - case 0xED: if (a > 0x9F) return 0; break; - case 0xF0: if (a < 0x90) return 0; break; - case 0xF4: if (a > 0x8F) return 0; break; - default: if (a < 0x80) return 0; - } - - case 1: if (*source >= 0x80 && *source < 0xC2) return 0; - } - if (*source > 0xF4) return 0; - return 1; -} - -void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags) -{ - char buf[7]; - const UTF8* source = (UTF8 *) RSTRING_PTR(string); - const UTF8* sourceEnd = source + RSTRING_LEN(string); - - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (source + extraBytesToRead >= sourceEnd) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "partial character in source, but hit end"); - } - if (!isLegalUTF8(source, extraBytesToRead+1)) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed"); - } - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; - } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - if (flags == strictConversion) { - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed"); - } else { - unicode_escape(buffer, UNI_REPLACEMENT_CHAR); - } - } else { - /* normal case */ - if (ch == '"') { - rb_str_buf_cat2(buffer, "\\\""); - } else if (ch == '\\') { - rb_str_buf_cat2(buffer, "\\\\"); - } else if (ch >= 0x20 && ch <= 0x7f) { - rb_str_buf_cat(buffer, (char *) source - 1, 1); - } else if (ch == '\n') { - rb_str_buf_cat2(buffer, "\\n"); - } else if (ch == '\r') { - rb_str_buf_cat2(buffer, "\\r"); - } else if (ch == '\t') { - rb_str_buf_cat2(buffer, "\\t"); - } else if (ch == '\f') { - rb_str_buf_cat2(buffer, "\\f"); - } else if (ch == '\b') { - rb_str_buf_cat2(buffer, "\\b"); - } else if (ch < 0x20) { - unicode_escape(buffer, (UTF16) ch); - } else { - unicode_escape(buffer, (UTF16) ch); - } - } - } else if (ch > UNI_MAX_UTF16) { - if (flags == strictConversion) { - source -= (extraBytesToRead+1); /* return to the start */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed"); - } else { - unicode_escape(buffer, UNI_REPLACEMENT_CHAR); - } - } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - ch -= halfBase; - unicode_escape(buffer, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); - unicode_escape(buffer, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); - } - } -} diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h deleted file mode 100644 index 841474bcea..0000000000 --- a/ext/json/ext/generator/unicode.h +++ /dev/null @@ -1,53 +0,0 @@ -#include "ruby.h" - -#ifndef _GENERATOR_UNICODE_H_ -#define _GENERATOR_UNICODE_H_ - -typedef enum { - conversionOK = 0, /* conversion successful */ - sourceExhausted, /* partial character in source, but hit end */ - targetExhausted, /* insuff. room in target for conversion */ - sourceIllegal /* source sequence is illegal/malformed */ -} ConversionResult; - -typedef enum { - strictConversion = 0, - lenientConversion -} ConversionFlags; - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - -static const int halfShift = 10; /* used for shifting by 10 bits */ - -static const UTF32 halfBase = 0x0010000UL; -static const UTF32 halfMask = 0x3FFUL; - -void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags); - -#ifndef RARRAY_PTR -#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr -#endif -#ifndef RARRAY_LEN -#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len -#endif -#ifndef RSTRING_PTR -#define RSTRING_PTR(string) RSTRING(string)->ptr -#endif -#ifndef RSTRING_LEN -#define RSTRING_LEN(string) RSTRING(string)->len -#endif - -#endif diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb deleted file mode 100644 index e790f6caae..0000000000 --- a/ext/json/ext/parser/extconf.rb +++ /dev/null @@ -1,9 +0,0 @@ -require 'mkmf' -require 'rbconfig' - -if CONFIG['GCC'] == 'yes' - $CFLAGS += ' -Wall' - #$CFLAGS += ' -O0 -ggdb' -end - -create_makefile 'json/ext/parser' diff --git a/ext/json/ext/parser/unicode.c b/ext/json/ext/parser/unicode.c deleted file mode 100644 index 1af2878476..0000000000 --- a/ext/json/ext/parser/unicode.c +++ /dev/null @@ -1,154 +0,0 @@ -#include "unicode.h" - -/* - * Copyright 2001-2004 Unicode, Inc. - * - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* - * Index into the table below with the first byte of a UTF-8 sequence to - * get the number of trailing bytes that are supposed to follow it. - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is - * left as-is for anyone who may want to do such conversion, which was - * allowed in earlier algorithms. - */ -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; - -/* - * Magic values subtracted from a buffer value during UTF8 conversion. - * This table contains as many values as there might be trailing bytes - * in a UTF-8 sequence. - */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; - -/* - * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed - * into the first byte, depending on how many bytes follow. There are - * as many entries in this table as there are UTF-8 sequence types. - * (I.e., one byte sequence, two byte... etc.). Remember that sequencs - * for *legal* UTF-8 will be 4 or fewer bytes total. - */ -static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - -char *JSON_convert_UTF16_to_UTF8 ( - VALUE buffer, - char *source, - char *sourceEnd, - ConversionFlags flags) -{ - UTF16 *tmp, *tmpPtr, *tmpEnd; - char buf[5]; - long n = 0, i; - char *p = source - 1; - - while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') { - p += 6; - n++; - } - p = source + 1; - buf[4] = 0; - tmpPtr = tmp = ALLOC_N(UTF16, n); - tmpEnd = tmp + n; - for (i = 0; i < n; i++) { - buf[0] = *p++; - buf[1] = *p++; - buf[2] = *p++; - buf[3] = *p++; - tmpPtr[i] = (UTF16)strtol(buf, NULL, 16); - p += 2; - } - - while (tmpPtr < tmpEnd) { - UTF32 ch; - unsigned short bytesToWrite = 0; - const UTF32 byteMask = 0xBF; - const UTF32 byteMark = 0x80; - ch = *tmpPtr++; - /* If we have a surrogate pair, convert to UTF32 first. */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { - /* If the 16 bits following the high surrogate are in the source - * buffer... */ - if (tmpPtr < tmpEnd) { - UTF32 ch2 = *tmpPtr; - /* If it's a low surrogate, convert to UTF32. */ - if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { - ch = ((ch - UNI_SUR_HIGH_START) << halfShift) - + (ch2 - UNI_SUR_LOW_START) + halfBase; - ++tmpPtr; - } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ - ruby_xfree(tmp); - rb_raise(rb_path2class("JSON::ParserError"), - "source sequence is illegal/malformed near %s", source); - } - } else { /* We don't have the 16 bits following the high surrogate. */ - ruby_xfree(tmp); - rb_raise(rb_path2class("JSON::ParserError"), - "partial character in source, but hit end near %s", source); - break; - } - } else if (flags == strictConversion) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { - ruby_xfree(tmp); - rb_raise(rb_path2class("JSON::ParserError"), - "source sequence is illegal/malformed near %s", source); - } - } - /* Figure out how many bytes the result will require */ - if (ch < (UTF32) 0x80) { - bytesToWrite = 1; - } else if (ch < (UTF32) 0x800) { - bytesToWrite = 2; - } else if (ch < (UTF32) 0x10000) { - bytesToWrite = 3; - } else if (ch < (UTF32) 0x110000) { - bytesToWrite = 4; - } else { - bytesToWrite = 3; - ch = UNI_REPLACEMENT_CHAR; - } - - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; - buf[3] = 0; - p = buf + bytesToWrite; - switch (bytesToWrite) { /* note: everything falls through. */ - case 4: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; - case 3: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; - case 2: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; - case 1: *--p = (UTF8) (ch | firstByteMark[bytesToWrite]); - } - rb_str_buf_cat(buffer, p, bytesToWrite); - } - ruby_xfree(tmp); - source += 5 + (n - 1) * 6; - return source; -} diff --git a/ext/json/ext/parser/unicode.h b/ext/json/ext/parser/unicode.h deleted file mode 100644 index 155da0ceee..0000000000 --- a/ext/json/ext/parser/unicode.h +++ /dev/null @@ -1,58 +0,0 @@ - -#ifndef _PARSER_UNICODE_H_ -#define _PARSER_UNICODE_H_ - -#include "ruby.h" - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - -static const int halfShift = 10; /* used for shifting by 10 bits */ - -static const UTF32 halfBase = 0x0010000UL; -static const UTF32 halfMask = 0x3FFUL; - -typedef enum { - conversionOK = 0, /* conversion successful */ - sourceExhausted, /* partial character in source, but hit end */ - targetExhausted, /* insuff. room in target for conversion */ - sourceIllegal /* source sequence is illegal/malformed */ -} ConversionResult; - -typedef enum { - strictConversion = 0, - lenientConversion -} ConversionFlags; - -char *JSON_convert_UTF16_to_UTF8 ( - VALUE buffer, - char *source, - char *sourceEnd, - ConversionFlags flags); - -#ifndef RARRAY_PTR -#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr -#endif -#ifndef RARRAY_LEN -#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len -#endif -#ifndef RSTRING_PTR -#define RSTRING_PTR(string) RSTRING(string)->ptr -#endif -#ifndef RSTRING_LEN -#define RSTRING_LEN(string) RSTRING(string)->len -#endif - -#endif diff --git a/ext/json/generator/extconf.rb b/ext/json/generator/extconf.rb new file mode 100644 index 0000000000..195bfde3ff --- /dev/null +++ b/ext/json/generator/extconf.rb @@ -0,0 +1,4 @@ +require 'mkmf' +require 'rbconfig' + +create_makefile 'json/ext/generator' diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c new file mode 100644 index 0000000000..3d3620534b --- /dev/null +++ b/ext/json/generator/generator.c @@ -0,0 +1,1341 @@ +#include "generator.h" + +#ifdef HAVE_RUBY_ENCODING_H +static VALUE CEncoding_UTF_8; +static ID i_encoding, i_encode; +#endif + +static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject, + mHash, mArray, mInteger, mFloat, mString, mString_Extend, + mTrueClass, mFalseClass, mNilClass, eGeneratorError, + eNestingError, CRegexp_MULTILINE, CJSON_SAFE_STATE_PROTOTYPE; + +static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, + i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only, + i_pack, i_unpack, i_create_id, i_extend, i_key_p, i_aref, i_send, + i_respond_to_p, i_match; + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns 0. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ +static unsigned char isLegalUTF8(const UTF8 *source, int length) +{ + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return 0; + /* Everything else falls through when "1"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 2: if ((a = (*--srcptr)) > 0xBF) return 0; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return 0; break; + case 0xED: if (a > 0x9F) return 0; break; + case 0xF0: if (a < 0x90) return 0; break; + case 0xF4: if (a > 0x8F) return 0; break; + default: if (a < 0x80) return 0; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return 0; + } + if (*source > 0xF4) return 0; + return 1; +} + +/* Escapes the UTF16 character and stores the result in the buffer buf. */ +static void unicode_escape(char *buf, UTF16 character) +{ + const char *digits = "0123456789abcdef"; + + buf[2] = digits[character >> 12]; + buf[3] = digits[(character >> 8) & 0xf]; + buf[4] = digits[(character >> 4) & 0xf]; + buf[5] = digits[character & 0xf]; +} + +/* Escapes the UTF16 character and stores the result in the buffer buf, then + * the buffer buf іs appended to the FBuffer buffer. */ +static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 + character) +{ + unicode_escape(buf, character); + fbuffer_append(buffer, buf, 6); +} + +/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII + * and control characters are JSON escaped. */ +static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string) +{ + const UTF8 *source = (UTF8 *) RSTRING_PTR(string); + const UTF8 *sourceEnd = source + RSTRING_LEN(string); + char buf[6] = { '\\', 'u' }; + + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "partial character in source, but hit end"); + } + if (!isLegalUTF8(source, extraBytesToRead+1)) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed utf-8"); + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { +#if UNI_STRICT_CONVERSION + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed utf-8"); +#else + unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); +#endif + } else { + /* normal case */ + if (ch >= 0x20 && ch <= 0x7f) { + switch (ch) { + case '\\': + fbuffer_append(buffer, "\\\\", 2); + break; + case '"': + fbuffer_append(buffer, "\\\"", 2); + break; + default: + fbuffer_append_char(buffer, ch); + break; + } + } else { + switch (ch) { + case '\n': + fbuffer_append(buffer, "\\n", 2); + break; + case '\r': + fbuffer_append(buffer, "\\r", 2); + break; + case '\t': + fbuffer_append(buffer, "\\t", 2); + break; + case '\f': + fbuffer_append(buffer, "\\f", 2); + break; + case '\b': + fbuffer_append(buffer, "\\b", 2); + break; + default: + unicode_escape_to_buffer(buffer, buf, (UTF16) ch); + break; + } + } + } + } else if (ch > UNI_MAX_UTF16) { +#if UNI_STRICT_CONVERSION + source -= (extraBytesToRead+1); /* return to the start */ + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed utf8"); +#else + unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); +#endif + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + ch -= halfBase; + unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); + unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); + } + } +} + +/* Converts string to a JSON string in FBuffer buffer, where only the + * characters required by the JSON standard are JSON escaped. The remaining + * characters (should be UTF8) are just passed through and appended to the + * result. */ +static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string) +{ + const char *ptr = RSTRING_PTR(string), *p; + int len = RSTRING_LEN(string), start = 0, end = 0; + const char *escape = NULL; + int escape_len; + unsigned char c; + char buf[6] = { '\\', 'u' }; + + for (start = 0, end = 0; end < len;) { + p = ptr + end; + c = (unsigned char) *p; + if (c < 0x20) { + switch (c) { + case '\n': + escape = "\\n"; + escape_len = 2; + break; + case '\r': + escape = "\\r"; + escape_len = 2; + break; + case '\t': + escape = "\\t"; + escape_len = 2; + break; + case '\f': + escape = "\\f"; + escape_len = 2; + break; + case '\b': + escape = "\\b"; + escape_len = 2; + break; + default: + unicode_escape(buf, (UTF16) *p); + escape = buf; + escape_len = 6; + break; + } + } else { + switch (c) { + case '\\': + escape = "\\\\"; + escape_len = 2; + break; + case '"': + escape = "\\\""; + escape_len = 2; + break; + default: + end++; + continue; + break; + } + } + fbuffer_append(buffer, ptr + start, end - start); + fbuffer_append(buffer, escape, escape_len); + start = ++end; + escape = NULL; + } + fbuffer_append(buffer, ptr + start, end - start); +} + +static char *fstrndup(const char *ptr, int len) { + char *result; + if (len <= 0) return NULL; + result = ALLOC_N(char, len); + memccpy(result, ptr, 0, len); + return result; +} + +/* fbuffer implementation */ + +static FBuffer *fbuffer_alloc() +{ + FBuffer *fb = ALLOC(FBuffer); + memset((void *) fb, 0, sizeof(FBuffer)); + fb->initial_length = FBUFFER_INITIAL_LENGTH; + return fb; +} + +static FBuffer *fbuffer_alloc_with_length(unsigned int initial_length) +{ + FBuffer *fb; + assert(initial_length > 0); + fb = ALLOC(FBuffer); + memset((void *) fb, 0, sizeof(FBuffer)); + fb->initial_length = initial_length; + return fb; +} + +static void fbuffer_free(FBuffer *fb) +{ + if (fb->ptr) ruby_xfree(fb->ptr); + ruby_xfree(fb); +} + +static void fbuffer_free_only_buffer(FBuffer *fb) +{ + ruby_xfree(fb); +} + +static void fbuffer_clear(FBuffer *fb) +{ + fb->len = 0; +} + +static void fbuffer_inc_capa(FBuffer *fb, unsigned int requested) +{ + unsigned int required; + + if (!fb->ptr) { + fb->ptr = ALLOC_N(char, fb->initial_length); + fb->capa = fb->initial_length; + } + + for (required = fb->capa; requested > required - fb->len; required <<= 1); + + if (required > fb->capa) { + fb->ptr = (char *) REALLOC_N((long*) fb->ptr, char, required); + fb->capa = required; + } +} + +static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len) +{ + if (len > 0) { + fbuffer_inc_capa(fb, len); + MEMCPY(fb->ptr + fb->len, newstr, char, len); + fb->len += len; + } +} + +static void fbuffer_append_char(FBuffer *fb, char newchr) +{ + fbuffer_inc_capa(fb, 1); + *(fb->ptr + fb->len) = newchr; + fb->len++; +} + +static void freverse(char *start, char *end) +{ + char c; + + while (end > start) { + c = *end, *end-- = *start, *start++ = c; + } +} + +static int fltoa(long number, char *buf) +{ + static char digits[] = "0123456789"; + long sign = number; + char* tmp = buf; + + if (sign < 0) number = -number; + do *tmp++ = digits[number % 10]; while (number /= 10); + if (sign < 0) *tmp++ = '-'; + freverse(buf, tmp - 1); + return tmp - buf; +} + +static void fbuffer_append_long(FBuffer *fb, long number) +{ + char buf[20]; + int len = fltoa(number, buf); + fbuffer_append(fb, buf, len); +} + +static FBuffer *fbuffer_dup(FBuffer *fb) +{ + int len = fb->len; + FBuffer *result; + + if (len > 0) { + result = fbuffer_alloc_with_length(len); + fbuffer_append(result, FBUFFER_PAIR(fb)); + } else { + result = fbuffer_alloc(); + } + return result; +} + +/* + * Document-module: JSON::Ext::Generator + * + * This is the JSON generator implemented as a C extension. It can be + * configured to be used by setting + * + * JSON.generator = JSON::Ext::Generator + * + * with the method generator= in JSON. + * + */ + +/* + * call-seq: to_json(state = nil, depth = 0) + * + * Returns a JSON string containing a JSON object, that is generated from + * this Hash instance. + * _state_ is a JSON::State object, that can also be used to configure the + * produced JSON string output further. + * _depth_ is used to find out nesting depth, to indent accordingly. + */ +static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE state, depth; + + rb_scan_args(argc, argv, "02", &state, &depth); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, self, depth); +} + +/* + * call-seq: to_json(state = nil, depth = 0) + * + * Returns a JSON string containing a JSON array, that is generated from + * this Array instance. + * _state_ is a JSON::State object, that can also be used to configure the + * produced JSON string output further. + * _depth_ is used to find out nesting depth, to indent accordingly. + */ +static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { + VALUE state, depth; + rb_scan_args(argc, argv, "02", &state, &depth); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, self, depth); +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string representation for this Integer number. + */ +static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE state, depth; + rb_scan_args(argc, argv, "02", &state, &depth); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, self, depth); +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string representation for this Float number. + */ +static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE state, depth; + rb_scan_args(argc, argv, "02", &state, &depth); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, self, depth); +} + +/* + * call-seq: String.included(modul) + * + * Extends _modul_ with the String::Extend module. + */ +static VALUE mString_included_s(VALUE self, VALUE modul) { + VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend); + return result; +} + +/* + * call-seq: to_json(*) + * + * This string should be encoded with UTF-8 A call to this method + * returns a JSON string encoded with UTF16 big endian characters as + * \u????. + */ +static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE state, depth; + rb_scan_args(argc, argv, "02", &state, &depth); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, self, depth); +} + +/* + * call-seq: to_json_raw_object() + * + * This method creates a raw object hash, that can be nested into + * other data structures and will be generated as a raw string. This + * method should be used, if you want to convert raw strings to JSON + * instead of UTF-8 strings, e. g. binary data. + */ +static VALUE mString_to_json_raw_object(VALUE self) +{ + VALUE ary; + VALUE result = rb_hash_new(); + rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); + ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); + rb_hash_aset(result, rb_str_new2("raw"), ary); + return result; +} + +/* + * call-seq: to_json_raw(*args) + * + * This method creates a JSON text from the result of a call to + * to_json_raw_object of this String. + */ +static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) +{ + VALUE obj = mString_to_json_raw_object(self); + Check_Type(obj, T_HASH); + return mHash_to_json(argc, argv, obj); +} + +/* + * call-seq: json_create(o) + * + * Raw Strings are JSON Objects (the raw bytes are stored in an array for the + * key "raw"). The Ruby String can be created by this module method. + */ +static VALUE mString_Extend_json_create(VALUE self, VALUE o) +{ + VALUE ary; + Check_Type(o, T_HASH); + ary = rb_hash_aref(o, rb_str_new2("raw")); + return rb_funcall(ary, i_pack, 1, rb_str_new2("C*")); +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string for true: 'true'. + */ +static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE state, depth; + rb_scan_args(argc, argv, "02", &state, &depth); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, self, depth); +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string for false: 'false'. + */ +static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE state, depth; + rb_scan_args(argc, argv, "02", &state, &depth); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, self, depth); +} + +/* + * call-seq: to_json(*) + * + */ +static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE state, depth; + rb_scan_args(argc, argv, "02", &state, &depth); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, self, depth); +} + +/* + * call-seq: to_json(*) + * + * Converts this object to a string (calling #to_s), converts + * it to a JSON string, and returns the result. This is a fallback, if no + * special method #to_json was defined for some object. + */ +static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE state, depth; + VALUE string = rb_funcall(self, i_to_s, 0); + rb_scan_args(argc, argv, "02", &state, &depth); + Check_Type(string, T_STRING); + state = cState_from_state_s(cState, state); + return cState_partial_generate(state, string, depth); +} + +static void State_free(JSON_Generator_State *state) +{ + if (state->indent) ruby_xfree(state->indent); + if (state->space) ruby_xfree(state->space); + if (state->space_before) ruby_xfree(state->space_before); + if (state->object_nl) ruby_xfree(state->object_nl); + if (state->array_nl) ruby_xfree(state->array_nl); + if (state->array_delim) fbuffer_free(state->array_delim); + if (state->object_delim) fbuffer_free(state->object_delim); + if (state->object_delim2) fbuffer_free(state->object_delim2); + ruby_xfree(state); +} + +static JSON_Generator_State *State_allocate() +{ + JSON_Generator_State *state = ALLOC(JSON_Generator_State); + return state; +} + +static VALUE cState_s_allocate(VALUE klass) +{ + JSON_Generator_State *state = State_allocate(); + return Data_Wrap_Struct(klass, NULL, State_free, state); +} + +/* + * call-seq: configure(opts) + * + * Configure this State instance with the Hash _opts_, and return + * itself. + */ +static VALUE cState_configure(VALUE self, VALUE opts) +{ + VALUE tmp; + GET_STATE(self); + tmp = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h"); + if (NIL_P(tmp)) { + rb_raise(rb_eArgError, "opts has to be hash like or convertable into a hash"); + } + opts = tmp; + tmp = rb_hash_aref(opts, ID2SYM(i_indent)); + if (RTEST(tmp)) { + int len; + Check_Type(tmp, T_STRING); + len = RSTRING_LEN(tmp); + state->indent = fstrndup(RSTRING_PTR(tmp), len); + state->indent_len = len; + } + tmp = rb_hash_aref(opts, ID2SYM(i_space)); + if (RTEST(tmp)) { + int len; + Check_Type(tmp, T_STRING); + len = RSTRING_LEN(tmp); + state->space = fstrndup(RSTRING_PTR(tmp), len); + state->space_len = len; + } + tmp = rb_hash_aref(opts, ID2SYM(i_space_before)); + if (RTEST(tmp)) { + int len; + Check_Type(tmp, T_STRING); + len = RSTRING_LEN(tmp); + state->space_before = fstrndup(RSTRING_PTR(tmp), len); + state->space_before_len = len; + } + tmp = rb_hash_aref(opts, ID2SYM(i_array_nl)); + if (RTEST(tmp)) { + int len; + Check_Type(tmp, T_STRING); + len = RSTRING_LEN(tmp); + state->array_nl = fstrndup(RSTRING_PTR(tmp), len); + state->array_nl_len = len; + } + tmp = rb_hash_aref(opts, ID2SYM(i_object_nl)); + if (RTEST(tmp)) { + int len; + Check_Type(tmp, T_STRING); + len = RSTRING_LEN(tmp); + state->object_nl = fstrndup(RSTRING_PTR(tmp), len); + state->object_nl_len = len; + } + tmp = ID2SYM(i_max_nesting); + state->max_nesting = 19; + if (option_given_p(opts, tmp)) { + VALUE max_nesting = rb_hash_aref(opts, tmp); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + state->max_nesting = FIX2LONG(max_nesting); + } else { + state->max_nesting = 0; + } + } + tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan)); + state->allow_nan = RTEST(tmp); + tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only)); + state->ascii_only = RTEST(tmp); + return self; +} + +/* + * call-seq: to_h + * + * Returns the configuration instance variables as a hash, that can be + * passed to the configure method. + */ +static VALUE cState_to_h(VALUE self) +{ + VALUE result = rb_hash_new(); + GET_STATE(self); + rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len)); + rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len)); + rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len)); + rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len)); + rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len)); + rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse); + rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse); + rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting)); + return result; +} + +/* +* call-seq: [](name) +* +* Return the value returned by method +name+. +*/ +static VALUE cState_aref(VALUE self, VALUE name) +{ + GET_STATE(self); + if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) { + return rb_funcall(self, i_send, 1, name); + } else { + return Qnil; + } +} + +static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth) +{ + VALUE tmp; + switch (TYPE(obj)) { + case T_HASH: + { + char *object_nl = state->object_nl; + long object_nl_len = state->object_nl_len; + char *indent = state->indent; + long indent_len = state->indent_len; + long max_nesting = state->max_nesting; + char *delim = FBUFFER_PTR(state->object_delim); + long delim_len = FBUFFER_LEN(state->object_delim); + char *delim2 = FBUFFER_PTR(state->object_delim2); + long delim2_len = FBUFFER_LEN(state->object_delim2); + int i, j; + VALUE key, key_to_s, keys; + depth++; + if (max_nesting != 0 && depth > max_nesting) { + fbuffer_free(buffer); + rb_raise(eNestingError, "nesting of %ld is too deep", depth); + } + fbuffer_append_char(buffer, '{'); + keys = rb_funcall(obj, rb_intern("keys"), 0); + for(i = 0; i < RARRAY_LEN(keys); i++) { + if (i > 0) fbuffer_append(buffer, delim, delim_len); + if (object_nl) { + fbuffer_append(buffer, object_nl, object_nl_len); + } + if (indent) { + for (j = 0; j < depth; j++) { + fbuffer_append(buffer, indent, indent_len); + } + } + key = rb_ary_entry(keys, i); + key_to_s = rb_funcall(key, i_to_s, 0); + Check_Type(key_to_s, T_STRING); + generate_json(buffer, Vstate, state, key_to_s, depth); + fbuffer_append(buffer, delim2, delim2_len); + generate_json(buffer, Vstate, state, rb_hash_aref(obj, key), depth); + } + depth--; + if (object_nl) { + fbuffer_append(buffer, object_nl, object_nl_len); + if (indent) { + for (j = 0; j < depth; j++) { + fbuffer_append(buffer, indent, indent_len); + } + } + } + fbuffer_append_char(buffer, '}'); + } + break; + case T_ARRAY: + { + char *array_nl = state->array_nl; + long array_nl_len = state->array_nl_len; + char *indent = state->indent; + long indent_len = state->indent_len; + long max_nesting = state->max_nesting; + char *delim = FBUFFER_PTR(state->array_delim); + long delim_len = FBUFFER_LEN(state->array_delim); + int i, j; + depth++; + if (max_nesting != 0 && depth > max_nesting) { + fbuffer_free(buffer); + rb_raise(eNestingError, "nesting of %ld is too deep", depth); + } + fbuffer_append_char(buffer, '['); + if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len); + for(i = 0; i < RARRAY_LEN(obj); i++) { + if (i > 0) fbuffer_append(buffer, delim, delim_len); + if (indent) { + for (j = 0; j < depth; j++) { + fbuffer_append(buffer, indent, indent_len); + } + } + generate_json(buffer, Vstate, state, rb_ary_entry(obj, i), depth); + } + depth--; + if (array_nl) { + fbuffer_append(buffer, array_nl, array_nl_len); + if (indent) { + for (j = 0; j < depth; j++) { + fbuffer_append(buffer, indent, indent_len); + } + } + } + fbuffer_append_char(buffer, ']'); + } + break; + case T_STRING: + fbuffer_append_char(buffer, '"'); +#ifdef HAVE_RUBY_ENCODING_H + obj = rb_funcall(obj, i_encode, 1, CEncoding_UTF_8); +#endif + if (state->ascii_only) { + convert_UTF8_to_JSON_ASCII(buffer, obj); + } else { + convert_UTF8_to_JSON(buffer, obj); + } + fbuffer_append_char(buffer, '"'); + break; + case T_NIL: + fbuffer_append(buffer, "null", 4); + break; + case T_FALSE: + fbuffer_append(buffer, "false", 5); + break; + case T_TRUE: + fbuffer_append(buffer, "true", 4); + break; + case T_FIXNUM: + fbuffer_append_long(buffer, FIX2LONG(obj)); + break; + case T_BIGNUM: + tmp = rb_funcall(obj, i_to_s, 0); + fbuffer_append(buffer, RSTRING_PAIR(tmp)); + break; + case T_FLOAT: + { + double value = RFLOAT_VALUE(obj); + char allow_nan = state->allow_nan; + tmp = rb_funcall(obj, i_to_s, 0); + if (!allow_nan) { + if (isinf(value)) { + fbuffer_free(buffer); + rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp)); + } else if (isnan(value)) { + fbuffer_free(buffer); + rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp)); + } + } + fbuffer_append(buffer, RSTRING_PAIR(tmp)); + } + break; + default: + if (rb_respond_to(obj, i_to_json)) { + tmp = rb_funcall(obj, i_to_json, 2, Vstate, INT2FIX(depth + 1)); + Check_Type(tmp, T_STRING); + fbuffer_append(buffer, RSTRING_PAIR(tmp)); + } else { + tmp = rb_funcall(obj, i_to_s, 0); + Check_Type(tmp, T_STRING); + generate_json(buffer, Vstate, state, tmp, depth + 1); + } + break; + } +} + +/* + * call-seq: partial_generate(obj) + * + * Generates a part of a JSON document from object +obj+ and returns the + * result. + */ +static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth) +{ + VALUE result; + FBuffer *buffer = fbuffer_alloc(); + GET_STATE(self); + + if (state->object_delim) { + fbuffer_clear(state->object_delim); + } else { + state->object_delim = fbuffer_alloc_with_length(16); + } + fbuffer_append_char(state->object_delim, ','); + if (state->object_delim2) { + fbuffer_clear(state->object_delim2); + } else { + state->object_delim2 = fbuffer_alloc_with_length(16); + } + fbuffer_append_char(state->object_delim2, ':'); + if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len); + + if (state->array_delim) { + fbuffer_clear(state->array_delim); + } else { + state->array_delim = fbuffer_alloc_with_length(16); + } + fbuffer_append_char(state->array_delim, ','); + if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len); + + generate_json(buffer, self, state, obj, NIL_P(depth) ? 0 : FIX2INT(depth)); + result = rb_str_new(FBUFFER_PAIR(buffer)); + fbuffer_free_only_buffer(buffer); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: generate(obj) + * + * Generates a valid JSON document from object +obj+ and returns the + * result. If no valid JSON document can be created this method raises a + * GeneratorError exception. + */ +static VALUE cState_generate(VALUE self, VALUE obj) +{ + VALUE result = cState_partial_generate(self, obj, Qnil); + VALUE re, args[2]; + args[0] = rb_str_new2("\\A\\s*(?:\\[.*\\]|\\{.*\\})\\s*\\Z"); + args[1] = CRegexp_MULTILINE; + re = rb_class_new_instance(2, args, rb_cRegexp); + if (NIL_P(rb_funcall(re, i_match, 1, result))) { + rb_raise(eGeneratorError, "only generation of JSON objects or arrays allowed"); + } + return result; +} + +/* + * call-seq: new(opts = {}) + * + * Instantiates a new State object, configured by _opts_. + * + * _opts_ can have the following keys: + * + * * *indent*: a string used to indent levels (default: ''), + * * *space*: a string that is put after, a : or , delimiter (default: ''), + * * *space_before*: a string that is put before a : pair delimiter (default: ''), + * * *object_nl*: a string that is put at the end of a JSON object (default: ''), + * * *array_nl*: a string that is put at the end of a JSON array (default: ''), + * * *allow_nan*: true if NaN, Infinity, and -Infinity should be + * generated, otherwise an exception is thrown, if these values are + * encountered. This options defaults to false. + */ +static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) +{ + VALUE opts; + GET_STATE(self); + MEMZERO(state, JSON_Generator_State, 1); + state->max_nesting = 19; + rb_scan_args(argc, argv, "01", &opts); + if (!NIL_P(opts)) cState_configure(self, opts); + return self; +} + +/* + * call-seq: initialize_copy(orig) + * + * Initializes this object from orig if it to be duplicated/cloned and returns + * it. +*/ +static VALUE cState_init_copy(VALUE obj, VALUE orig) +{ + JSON_Generator_State *objState, *origState; + + Data_Get_Struct(obj, JSON_Generator_State, objState); + Data_Get_Struct(orig, JSON_Generator_State, origState); + if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State"); + + MEMCPY(objState, origState, JSON_Generator_State, 1); + objState->indent = fstrndup(origState->indent, origState->indent_len); + objState->space = fstrndup(origState->space, origState->space_len); + objState->space_before = fstrndup(origState->space_before, origState->space_before_len); + objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len); + objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len); + if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim); + if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim); + if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2); + return obj; +} + +/* + * call-seq: from_state(opts) + * + * Creates a State object from _opts_, which ought to be Hash to create a + * new State instance configured by _opts_, something else to create an + * unconfigured instance. If _opts_ is a State object, it is just returned. + */ +static VALUE cState_from_state_s(VALUE self, VALUE opts) +{ + if (rb_obj_is_kind_of(opts, self)) { + return opts; + } else if (rb_obj_is_kind_of(opts, rb_cHash)) { + return rb_funcall(self, i_new, 1, opts); + } else { + if (NIL_P(CJSON_SAFE_STATE_PROTOTYPE)) { + CJSON_SAFE_STATE_PROTOTYPE = rb_const_get(mJSON, rb_intern("SAFE_STATE_PROTOTYPE")); + } + return CJSON_SAFE_STATE_PROTOTYPE; + } +} + +/* + * call-seq: indent() + * + * This string is used to indent levels in the JSON text. + */ +static VALUE cState_indent(VALUE self) +{ + GET_STATE(self); + return state->indent ? rb_str_new2(state->indent) : rb_str_new2(""); +} + +/* + * call-seq: indent=(indent) + * + * This string is used to indent levels in the JSON text. + */ +static VALUE cState_indent_set(VALUE self, VALUE indent) +{ + GET_STATE(self); + Check_Type(indent, T_STRING); + if (RSTRING_LEN(indent) == 0) { + if (state->indent) { + ruby_xfree(state->indent); + state->indent = NULL; + } + } else { + if (state->indent) ruby_xfree(state->indent); + state->indent = strdup(RSTRING_PTR(indent)); + } + return Qnil; +} + +/* + * call-seq: space() + * + * This string is used to insert a space between the tokens in a JSON + * string. + */ +static VALUE cState_space(VALUE self) +{ + GET_STATE(self); + return state->space ? rb_str_new2(state->space) : rb_str_new2(""); +} + +/* + * call-seq: space=(space) + * + * This string is used to insert a space between the tokens in a JSON + * string. + */ +static VALUE cState_space_set(VALUE self, VALUE space) +{ + GET_STATE(self); + Check_Type(space, T_STRING); + if (RSTRING_LEN(space) == 0) { + if (state->space) { + ruby_xfree(state->space); + state->space = NULL; + } + } else { + if (state->space) ruby_xfree(state->space); + state->space = strdup(RSTRING_PTR(space)); + } + return Qnil; +} + +/* + * call-seq: space_before() + * + * This string is used to insert a space before the ':' in JSON objects. + */ +static VALUE cState_space_before(VALUE self) +{ + GET_STATE(self); + return state->space_before ? rb_str_new2(state->space_before) : rb_str_new2(""); +} + +/* + * call-seq: space_before=(space_before) + * + * This string is used to insert a space before the ':' in JSON objects. + */ +static VALUE cState_space_before_set(VALUE self, VALUE space_before) +{ + GET_STATE(self); + Check_Type(space_before, T_STRING); + if (RSTRING_LEN(space_before) == 0) { + if (state->space_before) { + ruby_xfree(state->space_before); + state->space_before = NULL; + } + } else { + if (state->space_before) ruby_xfree(state->space_before); + state->space_before = strdup(RSTRING_PTR(space_before)); + } + return Qnil; +} + +/* + * call-seq: object_nl() + * + * This string is put at the end of a line that holds a JSON object (or + * Hash). + */ +static VALUE cState_object_nl(VALUE self) +{ + GET_STATE(self); + return state->object_nl ? rb_str_new2(state->object_nl) : rb_str_new2(""); +} + +/* + * call-seq: object_nl=(object_nl) + * + * This string is put at the end of a line that holds a JSON object (or + * Hash). + */ +static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) +{ + GET_STATE(self); + Check_Type(object_nl, T_STRING); + if (RSTRING_LEN(object_nl) == 0) { + if (state->object_nl) { + ruby_xfree(state->object_nl); + state->object_nl = NULL; + } + } else { + if (state->object_nl) ruby_xfree(state->object_nl); + state->object_nl = strdup(RSTRING_PTR(object_nl)); + } + return Qnil; +} + +/* + * call-seq: array_nl() + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_array_nl(VALUE self) +{ + GET_STATE(self); + return state->array_nl ? rb_str_new2(state->array_nl) : rb_str_new2(""); +} + +/* + * call-seq: array_nl=(array_nl) + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) +{ + GET_STATE(self); + Check_Type(array_nl, T_STRING); + if (RSTRING_LEN(array_nl) == 0) { + if (state->array_nl) { + ruby_xfree(state->array_nl); + state->array_nl = NULL; + } + } else { + if (state->array_nl) ruby_xfree(state->array_nl); + state->array_nl = strdup(RSTRING_PTR(array_nl)); + } + return Qnil; +} + + +/* +* call-seq: check_circular? +* +* Returns true, if circular data structures should be checked, +* otherwise returns false. +*/ +static VALUE cState_check_circular_p(VALUE self) +{ + GET_STATE(self); + return state->max_nesting ? Qtrue : Qfalse; +} + +/* + * call-seq: max_nesting + * + * This integer returns the maximum level of data structure nesting in + * the generated JSON, max_nesting = 0 if no maximum is checked. + */ +static VALUE cState_max_nesting(VALUE self) +{ + GET_STATE(self); + return LONG2FIX(state->max_nesting); +} + +/* + * call-seq: max_nesting=(depth) + * + * This sets the maximum level of data structure nesting in the generated JSON + * to the integer depth, max_nesting = 0 if no maximum should be checked. + */ +static VALUE cState_max_nesting_set(VALUE self, VALUE depth) +{ + GET_STATE(self); + Check_Type(depth, T_FIXNUM); + return state->max_nesting = FIX2LONG(depth); +} + +/* + * call-seq: allow_nan? + * + * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise + * returns false. + */ +static VALUE cState_allow_nan_p(VALUE self) +{ + GET_STATE(self); + return state->allow_nan ? Qtrue : Qfalse; +} + +/* + * call-seq: ascii_only? + * + * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise + * returns false. + */ +static VALUE cState_ascii_only_p(VALUE self) +{ + GET_STATE(self); + return state->ascii_only ? Qtrue : Qfalse; +} + +/* + * + */ +void Init_generator() +{ + rb_require("json/common"); + + mJSON = rb_define_module("JSON"); + mExt = rb_define_module_under(mJSON, "Ext"); + mGenerator = rb_define_module_under(mExt, "Generator"); + + eGeneratorError = rb_path2class("JSON::GeneratorError"); + eNestingError = rb_path2class("JSON::NestingError"); + + cState = rb_define_class_under(mGenerator, "State", rb_cObject); + rb_define_alloc_func(cState, cState_s_allocate); + rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); + rb_define_method(cState, "initialize", cState_initialize, -1); + rb_define_method(cState, "initialize_copy", cState_init_copy, 1); + rb_define_method(cState, "indent", cState_indent, 0); + rb_define_method(cState, "indent=", cState_indent_set, 1); + rb_define_method(cState, "space", cState_space, 0); + rb_define_method(cState, "space=", cState_space_set, 1); + rb_define_method(cState, "space_before", cState_space_before, 0); + rb_define_method(cState, "space_before=", cState_space_before_set, 1); + rb_define_method(cState, "object_nl", cState_object_nl, 0); + rb_define_method(cState, "object_nl=", cState_object_nl_set, 1); + rb_define_method(cState, "array_nl", cState_array_nl, 0); + rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); + rb_define_method(cState, "max_nesting", cState_max_nesting, 0); + rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1); + rb_define_method(cState, "check_circular?", cState_check_circular_p, 0); + rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0); + rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0); + rb_define_method(cState, "configure", cState_configure, 1); + rb_define_method(cState, "to_h", cState_to_h, 0); + rb_define_method(cState, "[]", cState_aref, 1); + rb_define_method(cState, "generate", cState_generate, 1); + rb_define_method(cState, "partial_generate", cState_partial_generate, 1); + + mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); + mObject = rb_define_module_under(mGeneratorMethods, "Object"); + rb_define_method(mObject, "to_json", mObject_to_json, -1); + mHash = rb_define_module_under(mGeneratorMethods, "Hash"); + rb_define_method(mHash, "to_json", mHash_to_json, -1); + mArray = rb_define_module_under(mGeneratorMethods, "Array"); + rb_define_method(mArray, "to_json", mArray_to_json, -1); + mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); + rb_define_method(mInteger, "to_json", mInteger_to_json, -1); + mFloat = rb_define_module_under(mGeneratorMethods, "Float"); + rb_define_method(mFloat, "to_json", mFloat_to_json, -1); + mString = rb_define_module_under(mGeneratorMethods, "String"); + rb_define_singleton_method(mString, "included", mString_included_s, 1); + rb_define_method(mString, "to_json", mString_to_json, -1); + rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1); + rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0); + mString_Extend = rb_define_module_under(mString, "Extend"); + rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1); + mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); + rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); + mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); + rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); + mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); + rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); + + CRegexp_MULTILINE = rb_const_get(rb_cRegexp, rb_intern("MULTILINE")); + i_to_s = rb_intern("to_s"); + i_to_json = rb_intern("to_json"); + i_new = rb_intern("new"); + i_indent = rb_intern("indent"); + i_space = rb_intern("space"); + i_space_before = rb_intern("space_before"); + i_object_nl = rb_intern("object_nl"); + i_array_nl = rb_intern("array_nl"); + i_max_nesting = rb_intern("max_nesting"); + i_allow_nan = rb_intern("allow_nan"); + i_ascii_only = rb_intern("ascii_only"); + i_pack = rb_intern("pack"); + i_unpack = rb_intern("unpack"); + i_create_id = rb_intern("create_id"); + i_extend = rb_intern("extend"); + i_key_p = rb_intern("key?"); + i_aref = rb_intern("[]"); + i_send = rb_intern("__send__"); + i_respond_to_p = rb_intern("respond_to?"); + i_match = rb_intern("match"); +#ifdef HAVE_RUBY_ENCODING_H + CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); + i_encoding = rb_intern("encoding"); + i_encode = rb_intern("encode"); +#endif + CJSON_SAFE_STATE_PROTOTYPE = Qnil; +} diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h new file mode 100644 index 0000000000..37240a9c6b --- /dev/null +++ b/ext/json/generator/generator.h @@ -0,0 +1,170 @@ +#ifndef _GENERATOR_H_ +#define _GENERATOR_H_ + +#include +#include +#include + +#include "ruby.h" + +#if HAVE_RUBY_RE_H +#include "ruby/re.h" +#endif + +#if HAVE_RE_H +#include "re.h" +#endif + +#ifdef HAVE_RUBY_ENCODING_H +#include "ruby/encoding.h" +#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +#else +#define FORCE_UTF8(obj) +#endif + +#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) + +#ifndef RHASH_SIZE +#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries) +#endif + +#ifndef RFLOAT_VALUE +#define RFLOAT_VALUE(val) (RFLOAT(val)->value) +#endif + +#ifndef RARRAY_PTR +#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr +#endif +#ifndef RARRAY_LEN +#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len +#endif +#ifndef RSTRING_PTR +#define RSTRING_PTR(string) RSTRING(string)->ptr +#endif +#ifndef RSTRING_LEN +#define RSTRING_LEN(string) RSTRING(string)->len +#endif + +#define RSTRING_PAIR(string) RSTRING_PTR(string), RSTRING_LEN(string) + +/* fbuffer implementation */ + +typedef struct FBufferStruct { + unsigned int initial_length; + char *ptr; + unsigned int len; + unsigned int capa; +} FBuffer; + +#define FBUFFER_INITIAL_LENGTH 4096 + +#define FBUFFER_PTR(fb) (fb->ptr) +#define FBUFFER_LEN(fb) (fb->len) +#define FBUFFER_CAPA(fb) (fb->capa) +#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb) + +static char *fstrndup(const char *ptr, int len); +static FBuffer *fbuffer_alloc(); +static FBuffer *fbuffer_alloc_with_length(unsigned initial_length); +static void fbuffer_free(FBuffer *fb); +static void fbuffer_free_only_buffer(FBuffer *fb); +static void fbuffer_clear(FBuffer *fb); +static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len); +static void fbuffer_append_long(FBuffer *fb, long number); +static void fbuffer_append_char(FBuffer *fb, char newchr); +static FBuffer *fbuffer_dup(FBuffer *fb); + +/* unicode defintions */ + +#define UNI_STRICT_CONVERSION 1 + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +static unsigned char isLegalUTF8(const UTF8 *source, int length); +static void unicode_escape(char *buf, UTF16 character); +static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character); +static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string); +static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string); + +/* ruby api and some helpers */ + +typedef struct JSON_Generator_StateStruct { + char *indent; + long indent_len; + char *space; + long space_len; + char *space_before; + long space_before_len; + char *object_nl; + long object_nl_len; + char *array_nl; + long array_nl_len; + FBuffer *array_delim; + FBuffer *object_delim; + FBuffer *object_delim2; + long max_nesting; + char allow_nan; + char ascii_only; +} JSON_Generator_State; + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + Data_Get_Struct(self, JSON_Generator_State, state) + +static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mString_included_s(VALUE self, VALUE modul); +static VALUE mString_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mString_to_json_raw_object(VALUE self); +static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self); +static VALUE mString_Extend_json_create(VALUE self, VALUE o); +static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self); +static void State_free(JSON_Generator_State *state); +static JSON_Generator_State *State_allocate(); +static VALUE cState_s_allocate(VALUE klass); +static VALUE cState_configure(VALUE self, VALUE opts); +static VALUE cState_to_h(VALUE self); +static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth); +static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth); +static VALUE cState_generate(VALUE self, VALUE obj); +static VALUE cState_initialize(int argc, VALUE *argv, VALUE self); +static VALUE cState_from_state_s(VALUE self, VALUE opts); +static VALUE cState_indent(VALUE self); +static VALUE cState_indent_set(VALUE self, VALUE indent); +static VALUE cState_space(VALUE self); +static VALUE cState_space_set(VALUE self, VALUE space); +static VALUE cState_space_before(VALUE self); +static VALUE cState_space_before_set(VALUE self, VALUE space_before); +static VALUE cState_object_nl(VALUE self); +static VALUE cState_object_nl_set(VALUE self, VALUE object_nl); +static VALUE cState_array_nl(VALUE self); +static VALUE cState_array_nl_set(VALUE self, VALUE array_nl); +static VALUE cState_max_nesting(VALUE self); +static VALUE cState_max_nesting_set(VALUE self, VALUE depth); +static VALUE cState_allow_nan_p(VALUE self); +static VALUE cState_ascii_only_p(VALUE self); + +#endif diff --git a/ext/json/lib/json/add/core.rb b/ext/json/lib/json/add/core.rb index 4423e7ad75..03a00dded4 100644 --- a/ext/json/lib/json/add/core.rb +++ b/ext/json/lib/json/add/core.rb @@ -7,6 +7,19 @@ unless Object.const_defined?(:JSON) and ::JSON.const_defined?(:JSON_LOADED) and end require 'date' +class Symbol + def to_json(*a) + { + JSON.create_id => self.class.name, + 's' => to_s, + }.to_json(*a) + end + + def self.json_create(o) + o['s'].to_sym + end +end + class Time def self.json_create(object) if usec = object.delete('u') # used to be tv_usec -> tv_nsec @@ -21,7 +34,7 @@ class Time def to_json(*args) { - 'json_class' => self.class.name, + JSON.create_id => self.class.name, 's' => tv_sec, 'n' => respond_to?(:tv_nsec) ? tv_nsec : tv_usec * 1000 }.to_json(*args) @@ -37,7 +50,7 @@ class Date def to_json(*args) { - 'json_class' => self.class.name, + JSON.create_id => self.class.name, 'y' => year, 'm' => month, 'd' => day, @@ -63,7 +76,7 @@ class DateTime def to_json(*args) { - 'json_class' => self.class.name, + JSON.create_id => self.class.name, 'y' => year, 'm' => month, 'd' => day, @@ -83,7 +96,7 @@ class Range def to_json(*args) { - 'json_class' => self.class.name, + JSON.create_id => self.class.name, 'a' => [ first, last, exclude_end? ] }.to_json(*args) end @@ -98,7 +111,7 @@ class Struct klass = self.class.name klass.to_s.empty? and raise JSON::JSONError, "Only named structs are supported!" { - 'json_class' => klass, + JSON.create_id => klass, 'v' => values, }.to_json(*args) end @@ -113,7 +126,7 @@ class Exception def to_json(*args) { - 'json_class' => self.class.name, + JSON.create_id => self.class.name, 'm' => message, 'b' => backtrace, }.to_json(*args) @@ -127,7 +140,7 @@ class Regexp def to_json(*) { - 'json_class' => self.class.name, + JSON.create_id => self.class.name, 'o' => options, 's' => source, }.to_json diff --git a/ext/json/lib/json/add/rails.rb b/ext/json/lib/json/add/rails.rb index e86ed1aab9..8ce85efe57 100644 --- a/ext/json/lib/json/add/rails.rb +++ b/ext/json/lib/json/add/rails.rb @@ -10,7 +10,7 @@ class Object def self.json_create(object) obj = new for key, value in object - next if key == 'json_class' + next if key == JSON.create_id instance_variable_set "@#{key}", value end obj @@ -18,7 +18,7 @@ class Object def to_json(*a) result = { - 'json_class' => self.class.name + JSON.create_id => self.class.name } instance_variables.inject(result) do |r, name| r[name[1..-1]] = instance_variable_get name diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index b816a66cf1..244634b7a1 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -1,4 +1,5 @@ require 'json/version' +require 'iconv' module JSON class << self @@ -32,12 +33,16 @@ module JSON # level (absolute namespace path?). If there doesn't exist a constant at # the given path, an ArgumentError is raised. def deep_const_get(path) # :nodoc: - path = path.to_s - path.split(/::/).inject(Object) do |p, c| + path.to_s.split(/::/).inject(Object) do |p, c| case when c.empty? then p when p.const_defined?(c) then p.const_get(c) - else raise ArgumentError, "can't find const #{path}" + else + begin + p.const_missing(c) + rescue NameError + raise ArgumentError, "can't find const #{path}" + end end end end @@ -58,6 +63,20 @@ module JSON end self.state = generator::State const_set :State, self.state + const_set :SAFE_STATE_PROTOTYPE, State.new.freeze + const_set :FAST_STATE_PROTOTYPE, State.new( + :indent => '', + :space => '', + :object_nl => "", + :array_nl => "", + :max_nesting => false + ).freeze + const_set :PRETTY_STATE_PROTOTYPE, State.new( + :indent => ' ', + :space => ' ', + :object_nl => "\n", + :array_nl => "\n" + ).freeze end # Returns the JSON generator modul, that is used by JSON. This might be @@ -90,22 +109,22 @@ module JSON # deep. class NestingError < ParserError; end + # :stopdoc: + class CircularDatastructure < NestingError; end + # :startdoc: + # This exception is raised, if a generator or unparser error occurs. class GeneratorError < JSONError; end # For backwards compatibility UnparserError = GeneratorError - # If a circular data structure is encountered while unparsing - # this exception is raised. - class CircularDatastructure < GeneratorError; end - # This exception is raised, if the required unicode support is missing on the # system. Usually this means, that the iconv library is not installed. class MissingUnicodeSupport < JSONError; end module_function - # Parse the JSON string _source_ into a Ruby data structure and return it. + # Parse the JSON document _source_ into a Ruby data structure and return it. # # _opts_ can have the following # keys: @@ -115,16 +134,21 @@ module JSON # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in # defiance of RFC 4627 to be parsed by the Parser. This option defaults # to false. + # * *symbolize_names*: If set to true, returns symbols for the names + # (keys) in a JSON object. Otherwise strings are returned, which is also + # the default. # * *create_additions*: If set to false, the Parser doesn't create # additions even if a matchin class and create_id was found. This option # defaults to true. + # * *object_class*: Defaults to Hash + # * *array_class*: Defaults to Array def parse(source, opts = {}) - JSON.parser.new(source, opts).parse + Parser.new(source, opts).parse end - # Parse the JSON string _source_ into a Ruby data structure and return it. + # Parse the JSON document _source_ into a Ruby data structure and return it. # The bang version of the parse method, defaults to the more dangerous values - # for the _opts_ hash, so be sure only to parse trusted _source_ strings. + # for the _opts_ hash, so be sure only to parse trusted _source_ documents. # # _opts_ can have the following keys: # * *max_nesting*: The maximum depth of nesting allowed in the parsed data @@ -139,15 +163,14 @@ module JSON # defaults to true. def parse!(source, opts = {}) opts = { - :max_nesting => false, - :allow_nan => true + :max_nesting => false, + :allow_nan => true }.update(opts) - JSON.parser.new(source, opts).parse + Parser.new(source, opts).parse end - # Unparse the Ruby data structure _obj_ into a single line JSON string and - # return it. _state_ is - # * a JSON::State object, + # Generate a JSON document from the Ruby data structure _obj_ and return + # it. _state_ is * a JSON::State object, # * or a Hash like object (responding to to_hash), # * an object convertible into a hash by a to_h method, # that is used as or to configure a State object. @@ -160,10 +183,8 @@ module JSON # * *indent*: a string used to indent levels (default: ''), # * *space*: a string that is put after, a : or , delimiter (default: ''), # * *space_before*: a string that is put before a : pair delimiter (default: ''), - # * *object_nl*: a string that is put at the end of a JSON object (default: ''), + # * *object_nl*: a string that is put at the end of a JSON object (default: ''), # * *array_nl*: a string that is put at the end of a JSON array (default: ''), - # * *check_circular*: true if checking for circular data structures - # should be done (the default), false otherwise. # * *allow_nan*: true if NaN, Infinity, and -Infinity should be # generated, otherwise an exception is thrown, if these values are # encountered. This options defaults to false. @@ -174,13 +195,21 @@ module JSON # See also the fast_generate for the fastest creation method with the least # amount of sanity checks, and the pretty_generate method for some # defaults for a pretty output. - def generate(obj, state = nil) - if state - state = State.from_state(state) + def generate(obj, opts = nil) + if opts + if opts.respond_to? :to_hash + opts = opts.to_hash + elsif opts.respond_to? :to_h + opts = opts.to_h + else + raise TypeError, "can't convert #{opts.class} into Hash" + end + state = SAFE_STATE_PROTOTYPE.dup + state = state.configure(opts) else - state = State.new + state = SAFE_STATE_PROTOTYPE end - obj.to_json(state) + state.generate(obj) end # :stopdoc: @@ -190,35 +219,12 @@ module JSON module_function :unparse # :startdoc: - # Unparse the Ruby data structure _obj_ into a single line JSON string and - # return it. This method disables the checks for circles in Ruby objects, and - # also generates NaN, Infinity, and, -Infinity float values. + # Generate a JSON document from the Ruby data structure _obj_ and return it. + # This method disables the checks for circles in Ruby objects. # # *WARNING*: Be careful not to pass any Ruby data structures with circles as # _obj_ argument, because this will cause JSON to go into an infinite loop. - def fast_generate(obj) - obj.to_json(nil) - end - - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and later delete them. - alias fast_unparse fast_generate - module_function :fast_unparse - # :startdoc: - - # Unparse the Ruby data structure _obj_ into a JSON string and return it. The - # returned string is a prettier form of the string returned by #unparse. - # - # The _opts_ argument can be used to configure the generator, see the - # generate method for a more detailed explanation. - def pretty_generate(obj, opts = nil) - state = JSON.state.new( - :indent => ' ', - :space => ' ', - :object_nl => "\n", - :array_nl => "\n", - :check_circular => true - ) + def fast_generate(obj, opts = nil) if opts if opts.respond_to? :to_hash opts = opts.to_hash @@ -227,9 +233,41 @@ module JSON else raise TypeError, "can't convert #{opts.class} into Hash" end + state = FAST_STATE_PROTOTYPE.dup state.configure(opts) + else + state = FAST_STATE_PROTOTYPE end - obj.to_json(state) + state.generate(obj) + end + + # :stopdoc: + # I want to deprecate these later, so I'll first be silent about them, and later delete them. + alias fast_unparse fast_generate + module_function :fast_unparse + # :startdoc: + + # Generate a JSON document from the Ruby data structure _obj_ and return it. + # The returned document is a prettier form of the document returned by + # #unparse. + # + # The _opts_ argument can be used to configure the generator, see the + # generate method for a more detailed explanation. + def pretty_generate(obj, opts = nil) + if opts + if opts.respond_to? :to_hash + opts = opts.to_hash + elsif opts.respond_to? :to_h + opts = opts.to_h + else + raise TypeError, "can't convert #{opts.class} into Hash" + end + state = PRETTY_STATE_PROTOTYPE.dup + state.configure(opts) + else + state = PRETTY_STATE_PROTOTYPE + end + state.generate(obj) end # :stopdoc: @@ -305,6 +343,11 @@ module JSON rescue JSON::NestingError raise ArgumentError, "exceed depth limit" end + + # Shortuct for iconv. + def self.iconv(to, from, string) + Iconv.iconv(to, from, string).first + end end module ::Kernel diff --git a/ext/json/lib/json/editor.rb b/ext/json/lib/json/editor.rb index 9e05f44b5b..1e13f33c8c 100644 --- a/ext/json/lib/json/editor.rb +++ b/ext/json/lib/json/editor.rb @@ -48,14 +48,14 @@ module JSON # Opens an error dialog on top of _window_ showing the error message # _text_. def Editor.error_dialog(window, text) - dialog = MessageDialog.new(window, Dialog::MODAL, - MessageDialog::ERROR, + dialog = MessageDialog.new(window, Dialog::MODAL, + MessageDialog::ERROR, MessageDialog::BUTTONS_CLOSE, text) dialog.show_all dialog.run rescue TypeError - dialog = MessageDialog.new(Editor.window, Dialog::MODAL, - MessageDialog::ERROR, + dialog = MessageDialog.new(Editor.window, Dialog::MODAL, + MessageDialog::ERROR, MessageDialog::BUTTONS_CLOSE, text) dialog.show_all dialog.run @@ -67,8 +67,8 @@ module JSON # message _text_. If yes was answered _true_ is returned, otherwise # _false_. def Editor.question_dialog(window, text) - dialog = MessageDialog.new(window, Dialog::MODAL, - MessageDialog::QUESTION, + dialog = MessageDialog.new(window, Dialog::MODAL, + MessageDialog::QUESTION, MessageDialog::BUTTONS_YES_NO, text) dialog.show_all dialog.run do |response| @@ -465,7 +465,7 @@ module JSON add_separator add_item("Append new node", ?a, &method(:append_new_node)) add_item("Insert new node before", ?i, &method(:insert_new_node)) - add_separator + add_separator add_item("Collapse/Expand node (recursively)", ?e, &method(:collapse_expand)) @@ -504,7 +504,7 @@ module JSON # Revert the current JSON document in the editor to the saved version. def revert(item) window.instance_eval do - @filename and file_open(@filename) + @filename and file_open(@filename) end end @@ -666,7 +666,7 @@ module JSON collapse_all else self.expanded = true - expand_all + expand_all end end @@ -885,7 +885,7 @@ module JSON dialog.signal_connect(:'key-press-event', &DEFAULT_DIALOG_KEY_PRESS_HANDLER) dialog.show_all self.focus = dialog - dialog.run do |response| + dialog.run do |response| if response == Dialog::RESPONSE_ACCEPT @key = key_input.text type = ALL_TYPES[@type = type_input.active] @@ -937,7 +937,7 @@ module JSON dialog.signal_connect(:'key-press-event', &DEFAULT_DIALOG_KEY_PRESS_HANDLER) dialog.show_all self.focus = dialog - dialog.run do |response| + dialog.run do |response| if response == Dialog::RESPONSE_ACCEPT type = types[type_input.active] @content = case type @@ -982,7 +982,7 @@ module JSON dialog.signal_connect(:'key-press-event', &DEFAULT_DIALOG_KEY_PRESS_HANDLER) dialog.show_all self.focus = dialog - dialog.run do |response| + dialog.run do |response| if response == Dialog::RESPONSE_ACCEPT return @order = order_input.text, reverse_checkbox.active? end @@ -1017,7 +1017,7 @@ module JSON dialog.signal_connect(:'key-press-event', &DEFAULT_DIALOG_KEY_PRESS_HANDLER) dialog.show_all self.focus = dialog - dialog.run do |response| + dialog.run do |response| if response == Dialog::RESPONSE_ACCEPT begin return Regexp.new(regex_input.text, icase_checkbox.active? ? Regexp::IGNORECASE : 0) @@ -1216,7 +1216,7 @@ module JSON end end - # Save the current file as the filename + # Save the current file as the filename def file_save_as filename = select_file('Save as a JSON file') store_file(filename) @@ -1242,7 +1242,7 @@ module JSON rescue SystemCallError => e Editor.error_dialog(self, "Failed to store JSON file: #{e}!") end - + # Load the file named _filename_ into the editor as a JSON document. def load_file(filename) if filename @@ -1335,7 +1335,7 @@ module JSON dialog.signal_connect(:'key-press-event', &DEFAULT_DIALOG_KEY_PRESS_HANDLER) dialog.show_all - dialog.run do |response| + dialog.run do |response| if response == Dialog::RESPONSE_ACCEPT return @location = location_input.text end diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb index 28bea76b19..38df3b5951 100644 --- a/ext/json/lib/json/version.rb +++ b/ext/json/lib/json/version.rb @@ -1,6 +1,6 @@ module JSON # JSON version - VERSION = '1.1.8' + VERSION = '1.4.1' VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc: VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc: VERSION_MINOR = VERSION_ARRAY[1] # :nodoc: diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb new file mode 100644 index 0000000000..f378479875 --- /dev/null +++ b/ext/json/parser/extconf.rb @@ -0,0 +1,4 @@ +require 'mkmf' +require 'rbconfig' + +create_makefile 'json/ext/parser' diff --git a/ext/json/ext/parser/parser.c b/ext/json/parser/parser.c similarity index 75% rename from ext/json/ext/parser/parser.c rename to ext/json/parser/parser.c index a71c3b8e98..1d639d29b8 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1,68 +1,92 @@ #line 1 "parser.rl" -#include "ruby.h" -#include "unicode.h" -#if HAVE_RE_H -#include "re.h" -#endif -#if HAVE_RUBY_ST_H -#include "ruby/st.h" -#endif -#if HAVE_ST_H -#include "st.h" -#endif +#include "parser.h" -#define EVIL 0x666 +/* unicode */ -#ifndef RHASH_TBL -#define RHASH_TBL(hsh) (RHASH(hsh)->tbl) -#endif +static const char digit_values[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1 +}; + +static UTF32 unescape_unicode(const unsigned char *p) +{ + char b; + UTF32 result = 0; + b = digit_values[p[0]]; + if (b < 0) return UNI_REPLACEMENT_CHAR; + result = (result << 4) | b; + b = digit_values[p[1]]; + result = (result << 4) | b; + if (b < 0) return UNI_REPLACEMENT_CHAR; + b = digit_values[p[2]]; + result = (result << 4) | b; + if (b < 0) return UNI_REPLACEMENT_CHAR; + b = digit_values[p[3]]; + result = (result << 4) | b; + if (b < 0) return UNI_REPLACEMENT_CHAR; + return result; +} + +static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) +{ + int len = 1; + if (ch <= 0x7F) { + buf[0] = (char) ch; + } else if (ch <= 0x07FF) { + buf[0] = (char) ((ch >> 6) | 0xC0); + buf[1] = (char) ((ch & 0x3F) | 0x80); + len++; + } else if (ch <= 0xFFFF) { + buf[0] = (char) ((ch >> 12) | 0xE0); + buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); + buf[2] = (char) ((ch & 0x3F) | 0x80); + len += 2; + } else if (ch <= 0x1fffff) { + buf[0] =(char) ((ch >> 18) | 0xF0); + buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); + buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); + buf[3] =(char) ((ch & 0x3F) | 0x80); + len += 3; + } else { + buf[0] = '?'; + } + return len; +} #ifdef HAVE_RUBY_ENCODING_H -#include "ruby/encoding.h" -#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE, + CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE; +static ID i_encoding, i_encode, i_encode_bang, i_force_encoding; #else -#define FORCE_UTF8(obj) +static ID i_iconv; #endif static VALUE mJSON, mExt, cParser, eParserError, eNestingError; static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class; - -#define MinusInfinity "-Infinity" - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - int max_nesting; - int current_nesting; - int allow_nan; - VALUE object_class; - VALUE array_class; -} JSON_Parser; - -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); - -#define GET_STRUCT \ - JSON_Parser *json; \ - Data_Get_Struct(self, JSON_Parser, json); + i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class, + i_array_class, i_key_p, i_deep_const_get; -#line 84 "parser.rl" +#line 108 "parser.rl" -#line 66 "parser.c" +#line 90 "parser.c" static const int JSON_object_start = 1; static const int JSON_object_first_final = 27; static const int JSON_object_error = 0; @@ -70,7 +94,7 @@ static const int JSON_object_error = 0; static const int JSON_object_en_main = 1; -#line 117 "parser.rl" +#line 143 "parser.rl" static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -86,14 +110,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); -#line 90 "parser.c" +#line 114 "parser.c" { cs = JSON_object_start; } -#line 132 "parser.rl" +#line 158 "parser.rl" -#line 97 "parser.c" +#line 121 "parser.c" { if ( p == pe ) goto _test_eof; @@ -121,9 +145,12 @@ case 2: goto st2; goto st0; tr2: -#line 103 "parser.rl" +#line 127 "parser.rl" { - char *np = JSON_parse_string(json, p, pe, &last_name); + char *np; + json->parsing_name = 1; + np = JSON_parse_string(json, p, pe, &last_name); + json->parsing_name = 0; if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} } goto st3; @@ -131,7 +158,7 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 135 "parser.c" +#line 161 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -198,10 +225,10 @@ case 8: goto st8; goto st0; tr11: -#line 92 "parser.rl" +#line 116 "parser.rl" { VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v); + char *np = JSON_parse_value(json, p, pe, &v); if (np == NULL) { p--; {p++; cs = 9; goto _out;} } else { @@ -214,7 +241,7 @@ st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 218 "parser.c" +#line 244 "parser.c" switch( (*p) ) { case 13: goto st9; case 32: goto st9; @@ -303,14 +330,14 @@ case 18: goto st9; goto st18; tr4: -#line 108 "parser.rl" +#line 134 "parser.rl" { p--; {p++; cs = 27; goto _out;} } goto st27; st27: if ( ++p == pe ) goto _test_eof27; case 27: -#line 314 "parser.c" +#line 340 "parser.c" goto st0; st19: if ( ++p == pe ) @@ -408,13 +435,13 @@ case 26: _out: {} } -#line 133 "parser.rl" +#line 159 "parser.rl" if (cs >= JSON_object_first_final) { if (RTEST(json->create_id)) { VALUE klassname = rb_hash_aref(*result, json->create_id); if (!NIL_P(klassname)) { - VALUE klass = rb_path2class(StringValueCStr(klassname)); + VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) { *result = rb_funcall(klass, i_json_create, 1, *result); } @@ -427,7 +454,7 @@ case 26: } -#line 431 "parser.c" +#line 457 "parser.c" static const int JSON_value_start = 1; static const int JSON_value_first_final = 21; static const int JSON_value_error = 0; @@ -435,7 +462,7 @@ static const int JSON_value_error = 0; static const int JSON_value_en_main = 1; -#line 231 "parser.rl" +#line 257 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -443,14 +470,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 447 "parser.c" +#line 473 "parser.c" { cs = JSON_value_start; } -#line 238 "parser.rl" +#line 264 "parser.rl" -#line 454 "parser.c" +#line 480 "parser.c" { if ( p == pe ) goto _test_eof; @@ -475,14 +502,14 @@ st0: cs = 0; goto _out; tr0: -#line 179 "parser.rl" +#line 205 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;} } goto st21; tr2: -#line 184 "parser.rl" +#line 210 "parser.rl" { char *np; if(pe > p + 9 && !strncmp(MinusInfinity, p, 9)) { @@ -502,8 +529,8 @@ tr2: } goto st21; tr5: -#line 202 "parser.rl" - { +#line 228 "parser.rl" + { char *np; json->current_nesting++; np = JSON_parse_array(json, p, pe, result); @@ -512,8 +539,8 @@ tr5: } goto st21; tr9: -#line 210 "parser.rl" - { +#line 236 "parser.rl" + { char *np; json->current_nesting++; np = JSON_parse_object(json, p, pe, result); @@ -522,7 +549,7 @@ tr9: } goto st21; tr16: -#line 172 "parser.rl" +#line 198 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -532,7 +559,7 @@ tr16: } goto st21; tr18: -#line 165 "parser.rl" +#line 191 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -542,19 +569,19 @@ tr18: } goto st21; tr22: -#line 159 "parser.rl" +#line 185 "parser.rl" { *result = Qfalse; } goto st21; tr25: -#line 156 "parser.rl" +#line 182 "parser.rl" { *result = Qnil; } goto st21; tr28: -#line 162 "parser.rl" +#line 188 "parser.rl" { *result = Qtrue; } @@ -563,9 +590,9 @@ st21: if ( ++p == pe ) goto _test_eof21; case 21: -#line 218 "parser.rl" +#line 244 "parser.rl" { p--; {p++; cs = 21; goto _out;} } -#line 569 "parser.c" +#line 595 "parser.c" goto st0; st2: if ( ++p == pe ) @@ -726,7 +753,7 @@ case 20: _out: {} } -#line 239 "parser.rl" +#line 265 "parser.rl" if (cs >= JSON_value_first_final) { return p; @@ -736,7 +763,7 @@ case 20: } -#line 740 "parser.c" +#line 766 "parser.c" static const int JSON_integer_start = 1; static const int JSON_integer_first_final = 5; static const int JSON_integer_error = 0; @@ -744,7 +771,7 @@ static const int JSON_integer_error = 0; static const int JSON_integer_en_main = 1; -#line 255 "parser.rl" +#line 281 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -752,15 +779,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 756 "parser.c" +#line 782 "parser.c" { cs = JSON_integer_start; } -#line 262 "parser.rl" +#line 288 "parser.rl" json->memo = p; -#line 764 "parser.c" +#line 790 "parser.c" { if ( p == pe ) goto _test_eof; @@ -794,14 +821,14 @@ case 3: goto st0; goto tr4; tr4: -#line 252 "parser.rl" +#line 278 "parser.rl" { p--; {p++; cs = 5; goto _out;} } goto st5; st5: if ( ++p == pe ) goto _test_eof5; case 5: -#line 805 "parser.c" +#line 831 "parser.c" goto st0; st4: if ( ++p == pe ) @@ -820,7 +847,7 @@ case 4: _out: {} } -#line 264 "parser.rl" +#line 290 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -832,7 +859,7 @@ case 4: } -#line 836 "parser.c" +#line 862 "parser.c" static const int JSON_float_start = 1; static const int JSON_float_first_final = 10; static const int JSON_float_error = 0; @@ -840,7 +867,7 @@ static const int JSON_float_error = 0; static const int JSON_float_en_main = 1; -#line 286 "parser.rl" +#line 312 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -848,15 +875,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 852 "parser.c" +#line 878 "parser.c" { cs = JSON_float_start; } -#line 293 "parser.rl" +#line 319 "parser.rl" json->memo = p; -#line 860 "parser.c" +#line 886 "parser.c" { if ( p == pe ) goto _test_eof; @@ -914,14 +941,14 @@ case 5: goto st0; goto tr7; tr7: -#line 280 "parser.rl" +#line 306 "parser.rl" { p--; {p++; cs = 10; goto _out;} } goto st10; st10: if ( ++p == pe ) goto _test_eof10; case 10: -#line 925 "parser.c" +#line 951 "parser.c" goto st0; st6: if ( ++p == pe ) @@ -982,7 +1009,7 @@ case 9: _out: {} } -#line 295 "parser.rl" +#line 321 "parser.rl" if (cs >= JSON_float_first_final) { long len = p - json->memo; @@ -995,7 +1022,7 @@ case 9: -#line 999 "parser.c" +#line 1025 "parser.c" static const int JSON_array_start = 1; static const int JSON_array_first_final = 17; static const int JSON_array_error = 0; @@ -1003,7 +1030,7 @@ static const int JSON_array_error = 0; static const int JSON_array_en_main = 1; -#line 331 "parser.rl" +#line 357 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1017,14 +1044,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); -#line 1021 "parser.c" +#line 1047 "parser.c" { cs = JSON_array_start; } -#line 344 "parser.rl" +#line 370 "parser.rl" -#line 1028 "parser.c" +#line 1054 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1063,10 +1090,10 @@ case 2: goto st2; goto st0; tr2: -#line 312 "parser.rl" +#line 338 "parser.rl" { VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v); + char *np = JSON_parse_value(json, p, pe, &v); if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { @@ -1079,7 +1106,7 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1083 "parser.c" +#line 1109 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -1179,14 +1206,14 @@ case 12: goto st3; goto st12; tr4: -#line 323 "parser.rl" +#line 349 "parser.rl" { p--; {p++; cs = 17; goto _out;} } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 1190 "parser.c" +#line 1216 "parser.c" goto st0; st13: if ( ++p == pe ) @@ -1242,73 +1269,88 @@ case 16: _out: {} } -#line 345 "parser.rl" +#line 371 "parser.rl" if(cs >= JSON_array_first_final) { return p + 1; } else { rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + return NULL; } } -static VALUE json_string_unescape(char *p, char *pe) +static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) { - VALUE result = rb_str_buf_new(pe - p + 1); + char *p = string, *pe = string, *unescape; + int unescape_len; - while (p < pe) { - if (*p == '\\') { - p++; - if (p >= pe) return Qnil; /* raise an exception later, \ at end */ - switch (*p) { - case '"': - case '\\': - rb_str_buf_cat(result, p, 1); - p++; - break; - case 'b': - rb_str_buf_cat2(result, "\b"); - p++; - break; - case 'f': - rb_str_buf_cat2(result, "\f"); - p++; - break; + while (pe < stringEnd) { + if (*pe == '\\') { + unescape = (char *) "?"; + unescape_len = 1; + if (pe > p) rb_str_buf_cat(result, p, pe - p); + switch (*++pe) { case 'n': - rb_str_buf_cat2(result, "\n"); - p++; + unescape = (char *) "\n"; break; case 'r': - rb_str_buf_cat2(result, "\r"); - p++; + unescape = (char *) "\r"; break; case 't': - rb_str_buf_cat2(result, "\t"); - p++; + unescape = (char *) "\t"; + break; + case '"': + unescape = (char *) "\""; + break; + case '\\': + unescape = (char *) "\\"; + break; + case 'b': + unescape = (char *) "\b"; + break; + case 'f': + unescape = (char *) "\f"; break; case 'u': - if (p > pe - 4) { + if (pe > stringEnd - 4) { return Qnil; } else { - p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion); + char buf[4]; + UTF32 ch = unescape_unicode((unsigned char *) ++pe); + pe += 3; + if (UNI_SUR_HIGH_START == (ch & 0xFC00)) { + pe++; + if (pe > stringEnd - 6) return Qnil; + if (pe[0] == '\\' && pe[1] == 'u') { + UTF32 sur = unescape_unicode((unsigned char *) pe + 2); + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) + | (sur & 0x3FF)); + pe += 5; + } else { + unescape = (char *) "?"; + break; + } + } + unescape_len = convert_UTF32_to_UTF8(buf, ch); + unescape = buf; } break; default: - rb_str_buf_cat(result, p, 1); - p++; - break; + p = pe; + continue; } + rb_str_buf_cat(result, unescape, unescape_len); + p = ++pe; } else { - char *q = p; - while (*q != '\\' && q < pe) q++; - rb_str_buf_cat(result, p, q - p); - p = q; + pe++; } } + rb_str_buf_cat(result, p, pe - p); return result; } -#line 1312 "parser.c" +#line 1353 "parser.c" static const int JSON_string_start = 1; static const int JSON_string_first_final = 8; static const int JSON_string_error = 0; @@ -1316,24 +1358,24 @@ static const int JSON_string_error = 0; static const int JSON_string_en_main = 1; -#line 429 "parser.rl" +#line 470 "parser.rl" static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; - *result = rb_str_new("", 0); + *result = rb_str_buf_new(0); -#line 1329 "parser.c" +#line 1370 "parser.c" { cs = JSON_string_start; } -#line 437 "parser.rl" +#line 478 "parser.rl" json->memo = p; -#line 1337 "parser.c" +#line 1378 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1358,9 +1400,9 @@ case 2: goto st0; goto st2; tr2: -#line 415 "parser.rl" +#line 456 "parser.rl" { - *result = json_string_unescape(json->memo + 1, p); + *result = json_string_unescape(*result, json->memo + 1, p); if (NIL_P(*result)) { p--; {p++; cs = 8; goto _out;} @@ -1369,14 +1411,14 @@ tr2: {p = (( p + 1))-1;} } } -#line 426 "parser.rl" +#line 467 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1380 "parser.c" +#line 1421 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1452,8 +1494,11 @@ case 7: _out: {} } -#line 439 "parser.rl" +#line 480 "parser.rl" + if (json->symbolize_names && json->parsing_name) { + *result = rb_str_intern(*result); + } if (cs >= JSON_string_first_final) { return p + 1; } else { @@ -1463,7 +1508,7 @@ case 7: -#line 1467 "parser.c" +#line 1511 "parser.c" static const int JSON_start = 1; static const int JSON_first_final = 10; static const int JSON_error = 0; @@ -1471,10 +1516,10 @@ static const int JSON_error = 0; static const int JSON_en_main = 1; -#line 473 "parser.rl" +#line 517 "parser.rl" -/* +/* * Document-class: JSON::Ext::Parser * * This is the JSON parser implemented as a C extension. It can be configured @@ -1486,6 +1531,54 @@ static const int JSON_en_main = 1; * */ +static VALUE convert_encoding(VALUE source) +{ + char *ptr = RSTRING_PTR(source); + long len = RSTRING_LEN(source); + if (len < 2) { + rb_raise(eParserError, "A JSON text must at least contain two octets!"); + } +#ifdef HAVE_RUBY_ENCODING_H + { + VALUE encoding = rb_funcall(source, i_encoding, 0); + if (encoding == CEncoding_ASCII_8BIT) { + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32BE); + source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16BE); + source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32LE); + source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16LE); + source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8); + } else { + FORCE_UTF8(source); + } + } else { + source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8); + } + } +#else + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source); + } +#endif + return source; +} + /* * call-seq: new(source, opts => {}) * @@ -1503,6 +1596,9 @@ static const int JSON_en_main = 1; * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in * defiance of RFC 4627 to be parsed by the Parser. This option defaults to * false. + * * *symbolize_names*: If set to true, returns symbols for the names + * (keys) in a JSON object. Otherwise strings are returned, which is also + * the default. * * *create_additions*: If set to false, the Parser doesn't create * additions even if a matchin class and create_id was found. This option * defaults to true. @@ -1514,21 +1610,18 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) char *ptr; long len; VALUE source, opts; - GET_STRUCT; + GET_PARSER; rb_scan_args(argc, argv, "11", &source, &opts); - source = StringValue(source); + source = convert_encoding(StringValue(source)); ptr = RSTRING_PTR(source); len = RSTRING_LEN(source); - if (len < 2) { - rb_raise(eParserError, "A JSON text must at least contain two octets!"); - } if (!NIL_P(opts)) { opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); if (NIL_P(opts)) { rb_raise(rb_eArgError, "opts needs to be like a hash"); } else { VALUE tmp = ID2SYM(i_max_nesting); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { VALUE max_nesting = rb_hash_aref(opts, tmp); if (RTEST(max_nesting)) { Check_Type(max_nesting, T_FIXNUM); @@ -1540,14 +1633,21 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) json->max_nesting = 19; } tmp = ID2SYM(i_allow_nan); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { VALUE allow_nan = rb_hash_aref(opts, tmp); json->allow_nan = RTEST(allow_nan) ? 1 : 0; } else { json->allow_nan = 0; } + tmp = ID2SYM(i_symbolize_names); + if (option_given_p(opts, tmp)) { + VALUE symbolize_names = rb_hash_aref(opts, tmp); + json->symbolize_names = RTEST(symbolize_names) ? 1 : 0; + } else { + json->symbolize_names = 0; + } tmp = ID2SYM(i_create_additions); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { VALUE create_additions = rb_hash_aref(opts, tmp); if (RTEST(create_additions)) { json->create_id = rb_funcall(mJSON, i_create_id, 0); @@ -1558,13 +1658,13 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) json->create_id = rb_funcall(mJSON, i_create_id, 0); } tmp = ID2SYM(i_object_class); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { json->object_class = rb_hash_aref(opts, tmp); } else { json->object_class = Qnil; } tmp = ID2SYM(i_array_class); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { json->array_class = rb_hash_aref(opts, tmp); } else { json->array_class = Qnil; @@ -1578,18 +1678,6 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) json->array_class = Qnil; } json->current_nesting = 0; - /* - Convert these? - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } - */ json->len = len; json->source = ptr; json->Vsource = source; @@ -1607,19 +1695,19 @@ static VALUE cParser_parse(VALUE self) char *p, *pe; int cs = EVIL; VALUE result = Qnil; - GET_STRUCT; + GET_PARSER; -#line 1614 "parser.c" +#line 1701 "parser.c" { cs = JSON_start; } -#line 611 "parser.rl" +#line 698 "parser.rl" p = json->source; pe = p + json->len; -#line 1623 "parser.c" +#line 1710 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1675,7 +1763,7 @@ case 5: goto st1; goto st5; tr3: -#line 462 "parser.rl" +#line 506 "parser.rl" { char *np; json->current_nesting = 1; @@ -1684,7 +1772,7 @@ tr3: } goto st10; tr4: -#line 455 "parser.rl" +#line 499 "parser.rl" { char *np; json->current_nesting = 1; @@ -1696,7 +1784,7 @@ st10: if ( ++p == pe ) goto _test_eof10; case 10: -#line 1700 "parser.c" +#line 1787 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -1753,16 +1841,17 @@ case 9: _out: {} } -#line 614 "parser.rl" +#line 701 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; } else { rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + return Qnil; } } -inline static JSON_Parser *JSON_allocate() +static JSON_Parser *JSON_allocate() { JSON_Parser *json = ALLOC(JSON_Parser); MEMZERO(json, JSON_Parser, 1); @@ -1796,7 +1885,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass) */ static VALUE cParser_source(VALUE self) { - GET_STRUCT; + GET_PARSER; return rb_str_dup(json->Vsource); } @@ -1824,6 +1913,23 @@ void Init_parser() i_chr = rb_intern("chr"); i_max_nesting = rb_intern("max_nesting"); i_allow_nan = rb_intern("allow_nan"); + i_symbolize_names = rb_intern("symbolize_names"); i_object_class = rb_intern("object_class"); i_array_class = rb_intern("array_class"); + i_key_p = rb_intern("key?"); + i_deep_const_get = rb_intern("deep_const_get"); +#ifdef HAVE_RUBY_ENCODING_H + CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); + CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be")); + CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le")); + CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be")); + CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le")); + CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit")); + i_encoding = rb_intern("encoding"); + i_encode = rb_intern("encode"); + i_encode_bang = rb_intern("encode!"); + i_force_encoding = rb_intern("force_encoding"); +#else + i_iconv = rb_intern("iconv"); +#endif } diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h new file mode 100644 index 0000000000..688ffdaeba --- /dev/null +++ b/ext/json/parser/parser.h @@ -0,0 +1,71 @@ +#ifndef _PARSER_H_ +#define _PARSER_H_ + +#include "ruby.h" + +#if HAVE_RE_H +#include "re.h" +#endif + +#ifdef HAVE_RUBY_ENCODING_H +#include "ruby/encoding.h" +#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +#else +#define FORCE_UTF8(obj) +#endif + +#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) + +/* unicode */ + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + int max_nesting; + int current_nesting; + int allow_nan; + int parsing_name; + int symbolize_names; + VALUE object_class; + VALUE array_class; +} JSON_Parser; + +#define GET_PARSER \ + JSON_Parser *json; \ + Data_Get_Struct(self, JSON_Parser, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static UTF32 unescape_unicode(const unsigned char *p); +static int convert_UTF32_to_UTF8(char *buf, UTF32 ch); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result); +static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd); +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static VALUE convert_encoding(VALUE source); +static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); +static VALUE cParser_parse(VALUE self); +static JSON_Parser *JSON_allocate(); +static void JSON_mark(JSON_Parser *json); +static void JSON_free(JSON_Parser *json); +static VALUE cJSON_parser_s_allocate(VALUE klass); +static VALUE cParser_source(VALUE self); + +#endif diff --git a/ext/json/ext/parser/parser.rl b/ext/json/parser/parser.rl similarity index 63% rename from ext/json/ext/parser/parser.rl rename to ext/json/parser/parser.rl index 3f180c4ec9..dd07485f3a 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/parser/parser.rl @@ -1,59 +1,83 @@ -#include "ruby.h" -#include "unicode.h" -#if HAVE_RE_H -#include "re.h" -#endif -#if HAVE_RUBY_ST_H -#include "ruby/st.h" -#endif -#if HAVE_ST_H -#include "st.h" -#endif +#include "parser.h" -#define EVIL 0x666 +/* unicode */ -#ifndef RHASH_TBL -#define RHASH_TBL(hsh) (RHASH(hsh)->tbl) -#endif +static const char digit_values[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1 +}; + +static UTF32 unescape_unicode(const unsigned char *p) +{ + char b; + UTF32 result = 0; + b = digit_values[p[0]]; + if (b < 0) return UNI_REPLACEMENT_CHAR; + result = (result << 4) | b; + b = digit_values[p[1]]; + result = (result << 4) | b; + if (b < 0) return UNI_REPLACEMENT_CHAR; + b = digit_values[p[2]]; + result = (result << 4) | b; + if (b < 0) return UNI_REPLACEMENT_CHAR; + b = digit_values[p[3]]; + result = (result << 4) | b; + if (b < 0) return UNI_REPLACEMENT_CHAR; + return result; +} + +static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) +{ + int len = 1; + if (ch <= 0x7F) { + buf[0] = (char) ch; + } else if (ch <= 0x07FF) { + buf[0] = (char) ((ch >> 6) | 0xC0); + buf[1] = (char) ((ch & 0x3F) | 0x80); + len++; + } else if (ch <= 0xFFFF) { + buf[0] = (char) ((ch >> 12) | 0xE0); + buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); + buf[2] = (char) ((ch & 0x3F) | 0x80); + len += 2; + } else if (ch <= 0x1fffff) { + buf[0] =(char) ((ch >> 18) | 0xF0); + buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); + buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); + buf[3] =(char) ((ch & 0x3F) | 0x80); + len += 3; + } else { + buf[0] = '?'; + } + return len; +} #ifdef HAVE_RUBY_ENCODING_H -#include "ruby/encoding.h" -#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE, + CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE; +static ID i_encoding, i_encode, i_encode_bang, i_force_encoding; #else -#define FORCE_UTF8(obj) +static ID i_iconv; #endif static VALUE mJSON, mExt, cParser, eParserError, eNestingError; static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class; - -#define MinusInfinity "-Infinity" - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - int max_nesting; - int current_nesting; - int allow_nan; - VALUE object_class; - VALUE array_class; -} JSON_Parser; - -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); - -#define GET_STRUCT \ - JSON_Parser *json; \ - Data_Get_Struct(self, JSON_Parser, json); + i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class, + i_array_class, i_key_p, i_deep_const_get; %%{ machine JSON_common; @@ -91,7 +115,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul action parse_value { VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v); + char *np = JSON_parse_value(json, fpc, pe, &v); if (np == NULL) { fhold; fbreak; } else { @@ -101,7 +125,10 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul } action parse_name { - char *np = JSON_parse_string(json, fpc, pe, &last_name); + char *np; + json->parsing_name = 1; + np = JSON_parse_string(json, fpc, pe, &last_name); + json->parsing_name = 0; if (np == NULL) { fhold; fbreak; } else fexec np; } @@ -135,7 +162,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu if (RTEST(json->create_id)) { VALUE klassname = rb_hash_aref(*result, json->create_id); if (!NIL_P(klassname)) { - VALUE klass = rb_path2class(StringValueCStr(klassname)); + VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) { *result = rb_funcall(klass, i_json_create, 1, *result); } @@ -199,7 +226,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu fhold; fbreak; } - action parse_array { + action parse_array { char *np; json->current_nesting++; np = JSON_parse_array(json, fpc, pe, result); @@ -207,7 +234,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu if (np == NULL) { fhold; fbreak; } else fexec np; } - action parse_object { + action parse_object { char *np; json->current_nesting++; np = JSON_parse_object(json, fpc, pe, result); @@ -311,7 +338,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul action parse_value { VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v); + char *np = JSON_parse_value(json, fpc, pe, &v); if (np == NULL) { fhold; fbreak; } else { @@ -347,62 +374,77 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul return p + 1; } else { rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + return NULL; } } -static VALUE json_string_unescape(char *p, char *pe) +static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) { - VALUE result = rb_str_buf_new(pe - p + 1); + char *p = string, *pe = string, *unescape; + int unescape_len; - while (p < pe) { - if (*p == '\\') { - p++; - if (p >= pe) return Qnil; /* raise an exception later, \ at end */ - switch (*p) { - case '"': - case '\\': - rb_str_buf_cat(result, p, 1); - p++; - break; - case 'b': - rb_str_buf_cat2(result, "\b"); - p++; - break; - case 'f': - rb_str_buf_cat2(result, "\f"); - p++; - break; + while (pe < stringEnd) { + if (*pe == '\\') { + unescape = (char *) "?"; + unescape_len = 1; + if (pe > p) rb_str_buf_cat(result, p, pe - p); + switch (*++pe) { case 'n': - rb_str_buf_cat2(result, "\n"); - p++; + unescape = (char *) "\n"; break; case 'r': - rb_str_buf_cat2(result, "\r"); - p++; + unescape = (char *) "\r"; break; case 't': - rb_str_buf_cat2(result, "\t"); - p++; + unescape = (char *) "\t"; + break; + case '"': + unescape = (char *) "\""; + break; + case '\\': + unescape = (char *) "\\"; + break; + case 'b': + unescape = (char *) "\b"; + break; + case 'f': + unescape = (char *) "\f"; break; case 'u': - if (p > pe - 4) { + if (pe > stringEnd - 4) { return Qnil; } else { - p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion); + char buf[4]; + UTF32 ch = unescape_unicode((unsigned char *) ++pe); + pe += 3; + if (UNI_SUR_HIGH_START == (ch & 0xFC00)) { + pe++; + if (pe > stringEnd - 6) return Qnil; + if (pe[0] == '\\' && pe[1] == 'u') { + UTF32 sur = unescape_unicode((unsigned char *) pe + 2); + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) + | (sur & 0x3FF)); + pe += 5; + } else { + unescape = (char *) "?"; + break; + } + } + unescape_len = convert_UTF32_to_UTF8(buf, ch); + unescape = buf; } break; default: - rb_str_buf_cat(result, p, 1); - p++; - break; + p = pe; + continue; } + rb_str_buf_cat(result, unescape, unescape_len); + p = ++pe; } else { - char *q = p; - while (*q != '\\' && q < pe) q++; - rb_str_buf_cat(result, p, q - p); - p = q; + pe++; } } + rb_str_buf_cat(result, p, pe - p); return result; } @@ -413,7 +455,7 @@ static VALUE json_string_unescape(char *p, char *pe) write data; action parse_string { - *result = json_string_unescape(json->memo + 1, p); + *result = json_string_unescape(*result, json->memo + 1, p); if (NIL_P(*result)) { fhold; fbreak; @@ -432,11 +474,14 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu { int cs = EVIL; - *result = rb_str_new("", 0); + *result = rb_str_buf_new(0); %% write init; json->memo = p; %% write exec; + if (json->symbolize_names && json->parsing_name) { + *result = rb_str_intern(*result); + } if (cs >= JSON_string_first_final) { return p + 1; } else { @@ -472,7 +517,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu ) ignore*; }%% -/* +/* * Document-class: JSON::Ext::Parser * * This is the JSON parser implemented as a C extension. It can be configured @@ -484,6 +529,54 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu * */ +static VALUE convert_encoding(VALUE source) +{ + char *ptr = RSTRING_PTR(source); + long len = RSTRING_LEN(source); + if (len < 2) { + rb_raise(eParserError, "A JSON text must at least contain two octets!"); + } +#ifdef HAVE_RUBY_ENCODING_H + { + VALUE encoding = rb_funcall(source, i_encoding, 0); + if (encoding == CEncoding_ASCII_8BIT) { + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32BE); + source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16BE); + source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32LE); + source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16LE); + source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8); + } else { + FORCE_UTF8(source); + } + } else { + source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8); + } + } +#else + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source); + } +#endif + return source; +} + /* * call-seq: new(source, opts => {}) * @@ -501,6 +594,9 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in * defiance of RFC 4627 to be parsed by the Parser. This option defaults to * false. + * * *symbolize_names*: If set to true, returns symbols for the names + * (keys) in a JSON object. Otherwise strings are returned, which is also + * the default. * * *create_additions*: If set to false, the Parser doesn't create * additions even if a matchin class and create_id was found. This option * defaults to true. @@ -512,21 +608,18 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) char *ptr; long len; VALUE source, opts; - GET_STRUCT; + GET_PARSER; rb_scan_args(argc, argv, "11", &source, &opts); - source = StringValue(source); + source = convert_encoding(StringValue(source)); ptr = RSTRING_PTR(source); len = RSTRING_LEN(source); - if (len < 2) { - rb_raise(eParserError, "A JSON text must at least contain two octets!"); - } if (!NIL_P(opts)) { opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); if (NIL_P(opts)) { rb_raise(rb_eArgError, "opts needs to be like a hash"); } else { VALUE tmp = ID2SYM(i_max_nesting); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { VALUE max_nesting = rb_hash_aref(opts, tmp); if (RTEST(max_nesting)) { Check_Type(max_nesting, T_FIXNUM); @@ -538,14 +631,21 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) json->max_nesting = 19; } tmp = ID2SYM(i_allow_nan); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { VALUE allow_nan = rb_hash_aref(opts, tmp); json->allow_nan = RTEST(allow_nan) ? 1 : 0; } else { json->allow_nan = 0; } + tmp = ID2SYM(i_symbolize_names); + if (option_given_p(opts, tmp)) { + VALUE symbolize_names = rb_hash_aref(opts, tmp); + json->symbolize_names = RTEST(symbolize_names) ? 1 : 0; + } else { + json->symbolize_names = 0; + } tmp = ID2SYM(i_create_additions); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { VALUE create_additions = rb_hash_aref(opts, tmp); if (RTEST(create_additions)) { json->create_id = rb_funcall(mJSON, i_create_id, 0); @@ -556,13 +656,13 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) json->create_id = rb_funcall(mJSON, i_create_id, 0); } tmp = ID2SYM(i_object_class); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { json->object_class = rb_hash_aref(opts, tmp); } else { json->object_class = Qnil; } tmp = ID2SYM(i_array_class); - if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + if (option_given_p(opts, tmp)) { json->array_class = rb_hash_aref(opts, tmp); } else { json->array_class = Qnil; @@ -576,18 +676,6 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) json->array_class = Qnil; } json->current_nesting = 0; - /* - Convert these? - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } - */ json->len = len; json->source = ptr; json->Vsource = source; @@ -605,7 +693,7 @@ static VALUE cParser_parse(VALUE self) char *p, *pe; int cs = EVIL; VALUE result = Qnil; - GET_STRUCT; + GET_PARSER; %% write init; p = json->source; @@ -616,10 +704,11 @@ static VALUE cParser_parse(VALUE self) return result; } else { rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + return Qnil; } } -inline static JSON_Parser *JSON_allocate() +static JSON_Parser *JSON_allocate() { JSON_Parser *json = ALLOC(JSON_Parser); MEMZERO(json, JSON_Parser, 1); @@ -653,7 +742,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass) */ static VALUE cParser_source(VALUE self) { - GET_STRUCT; + GET_PARSER; return rb_str_dup(json->Vsource); } @@ -681,6 +770,23 @@ void Init_parser() i_chr = rb_intern("chr"); i_max_nesting = rb_intern("max_nesting"); i_allow_nan = rb_intern("allow_nan"); + i_symbolize_names = rb_intern("symbolize_names"); i_object_class = rb_intern("object_class"); i_array_class = rb_intern("array_class"); + i_key_p = rb_intern("key?"); + i_deep_const_get = rb_intern("deep_const_get"); +#ifdef HAVE_RUBY_ENCODING_H + CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); + CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be")); + CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le")); + CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be")); + CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le")); + CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit")); + i_encoding = rb_intern("encoding"); + i_encode = rb_intern("encode"); + i_encode_bang = rb_intern("encode!"); + i_force_encoding = rb_intern("force_encoding"); +#else + i_iconv = rb_intern("iconv"); +#endif }