1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* lib/json.rb, lib/json, ext/json, test/json:

import JSON library.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12428 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2007-06-04 12:31:26 +00:00
parent 6b3ef2249c
commit af1c416728
69 changed files with 8551 additions and 70 deletions

View file

@ -1,3 +1,8 @@
Mon Jun 04 21:15:45 2007 NARUSE, Yui <naruse@ruby-lang.org>
* lib/json.rb, lib/json, ext/json, test/json:
import JSON library.
Sat Jun 2 16:48:55 2007 Koichi Sasada <ko1@atdot.net>
* cont.c (Fiber#pass): rename to Fiber#yield. Block parameter

View file

@ -0,0 +1,3 @@
#ifndef EXTCONF_H
#define EXTCONF_H
#endif

View file

@ -0,0 +1,9 @@
require 'mkmf'
require 'rbconfig'
if CONFIG['CC'] =~ /gcc/
CONFIG['CC'] += ' -Wall -ggdb'
#CONFIG['CC'] += ' -Wall'
end
create_makefile 'json/ext/generator'

View file

@ -0,0 +1,728 @@
/* vim: set cin et sw=4 ts=4: */
#include <string.h>
#include "ruby.h"
#include "st.h"
#include "unicode.h"
static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
mHash, mArray, mInteger, mFloat, mString, mString_Extend,
mTrueClass, mFalseClass, mNilClass, eGeneratorError,
eCircularDatastructure;
static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
i_object_nl, i_array_nl, i_check_circular, i_pack, i_unpack,
i_create_id, i_extend;
typedef struct JSON_Generator_StateStruct {
VALUE indent;
VALUE space;
VALUE space_before;
VALUE object_nl;
VALUE array_nl;
int check_circular;
VALUE seen;
VALUE memo;
VALUE depth;
int flag;
} JSON_Generator_State;
#define GET_STATE(self) \
JSON_Generator_State *state; \
Data_Get_Struct(self, JSON_Generator_State, state);
/*
* Document-module: JSON::Ext::Generator
*
* This is the JSON generator implemented as a C extension. It can be
* configured to be used by setting
*
* JSON.generator = JSON::Ext::Generator
*
* with the method generator= in JSON.
*
*/
static int hash_to_json_state_i(VALUE key, VALUE value, VALUE Vstate)
{
VALUE json, buf, Vdepth;
GET_STATE(Vstate);
buf = state->memo;
Vdepth = state->depth;
if (key == Qundef) return ST_CONTINUE;
if (state->flag) {
state->flag = 0;
rb_str_buf_cat2(buf, ",");
if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(buf, state->object_nl);
}
if (RSTRING_LEN(state->object_nl)) {
rb_str_buf_append(buf, rb_str_times(state->indent, Vdepth));
}
json = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 2, Vstate, Vdepth);
rb_str_buf_append(buf, json);
OBJ_INFECT(buf, json);
if (RSTRING_LEN(state->space_before)) {
rb_str_buf_append(buf, state->space_before);
}
rb_str_buf_cat2(buf, ":");
if (RSTRING_LEN(state->space)) rb_str_buf_append(buf, state->space);
json = rb_funcall(value, i_to_json, 2, Vstate, Vdepth);
state->flag = 1;
rb_str_buf_append(buf, json);
OBJ_INFECT(buf, json);
state->depth = Vdepth;
state->memo = buf;
return ST_CONTINUE;
}
inline static VALUE mHash_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) {
long depth, len = RHASH(self)->tbl->num_entries;
VALUE result;
GET_STATE(Vstate);
depth = 1 + FIX2LONG(Vdepth);
result = rb_str_buf_new(len);
state->memo = result;
state->depth = LONG2FIX(depth);
state->flag = 0;
rb_str_buf_cat2(result, "{");
if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl);
rb_hash_foreach(self, hash_to_json_state_i, Vstate);
if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl);
if (RSTRING_LEN(state->object_nl)) {
rb_str_buf_append(result, rb_str_times(state->indent, Vdepth));
}
rb_str_buf_cat2(result, "}");
return result;
}
static int hash_to_json_i(VALUE key, VALUE value, VALUE buf)
{
VALUE tmp;
if (key == Qundef) return ST_CONTINUE;
if (RSTRING_LEN(buf) > 1) rb_str_buf_cat2(buf, ",");
tmp = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 0);
rb_str_buf_append(buf, tmp);
OBJ_INFECT(buf, tmp);
rb_str_buf_cat2(buf, ":");
tmp = rb_funcall(value, i_to_json, 0);
rb_str_buf_append(buf, tmp);
OBJ_INFECT(buf, tmp);
return ST_CONTINUE;
}
/*
* call-seq: to_json(state = nil, depth = 0)
*
* Returns a JSON string containing a JSON object, that is unparsed from
* this Hash instance.
* _state_ is a JSON::State object, that can also be used to configure the
* produced JSON string output further.
* _depth_ is used to find out nesting depth, to indent accordingly.
*/
static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
{
VALUE Vstate, Vdepth, result;
long depth;
rb_scan_args(argc, argv, "02", &Vstate, &Vdepth);
depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth);
if (NIL_P(Vstate)) {
long len = RHASH(self)->tbl->num_entries;
result = rb_str_buf_new(len);
rb_str_buf_cat2(result, "{");
rb_hash_foreach(self, hash_to_json_i, result);
rb_str_buf_cat2(result, "}");
} else {
GET_STATE(Vstate);
if (state->check_circular) {
VALUE self_id = rb_obj_id(self);
if (RTEST(rb_hash_aref(state->seen, self_id))) {
rb_raise(eCircularDatastructure,
"circular data structures not supported!");
}
rb_hash_aset(state->seen, self_id, Qtrue);
result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth));
rb_hash_delete(state->seen, self_id);
} else {
result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth));
}
}
OBJ_INFECT(result, self);
return result;
}
inline static VALUE mArray_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) {
long i, len = RARRAY_LEN(self);
VALUE shift, result;
long depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth);
VALUE delim = rb_str_new2(",");
GET_STATE(Vstate);
if (state->check_circular) {
VALUE self_id = rb_obj_id(self);
rb_hash_aset(state->seen, self_id, Qtrue);
result = rb_str_buf_new(len);
if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl);
shift = rb_str_times(state->indent, LONG2FIX(depth + 1));
rb_str_buf_cat2(result, "[");
rb_str_buf_append(result, state->array_nl);
for (i = 0; i < len; i++) {
VALUE element = RARRAY_PTR(self)[i];
if (RTEST(rb_hash_aref(state->seen, rb_obj_id(element)))) {
rb_raise(eCircularDatastructure,
"circular data structures not supported!");
}
OBJ_INFECT(result, element);
if (i > 0) rb_str_buf_append(result, delim);
rb_str_buf_append(result, shift);
rb_str_buf_append(result, rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1)));
}
if (RSTRING_LEN(state->array_nl)) {
rb_str_buf_append(result, state->array_nl);
rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth)));
}
rb_str_buf_cat2(result, "]");
rb_hash_delete(state->seen, self_id);
} else {
result = rb_str_buf_new(len);
if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl);
shift = rb_str_times(state->indent, LONG2FIX(depth + 1));
rb_str_buf_cat2(result, "[");
rb_str_buf_append(result, state->array_nl);
for (i = 0; i < len; i++) {
VALUE element = RARRAY_PTR(self)[i];
OBJ_INFECT(result, element);
if (i > 0) rb_str_buf_append(result, delim);
rb_str_buf_append(result, shift);
rb_str_buf_append(result, rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1)));
}
rb_str_buf_append(result, state->array_nl);
if (RSTRING_LEN(state->array_nl)) {
rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth)));
}
rb_str_buf_cat2(result, "]");
}
return result;
}
/*
* call-seq: to_json(state = nil, depth = 0)
*
* Returns a JSON string containing a JSON array, that is unparsed from
* this Array instance.
* _state_ is a JSON::State object, that can also be used to configure the
* produced JSON string output further.
* _depth_ is used to find out nesting depth, to indent accordingly.
*/
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
VALUE Vstate, Vdepth, result;
rb_scan_args(argc, argv, "02", &Vstate, &Vdepth);
if (NIL_P(Vstate)) {
long i, len = RARRAY_LEN(self);
result = rb_str_buf_new(2 + 2 * len);
rb_str_buf_cat2(result, "[");
for (i = 0; i < len; i++) {
VALUE element = RARRAY_PTR(self)[i];
OBJ_INFECT(result, element);
if (i > 0) rb_str_buf_cat2(result, ",");
rb_str_buf_append(result, rb_funcall(element, i_to_json, 0));
}
rb_str_buf_cat2(result, "]");
} else {
result = mArray_json_transfrom(self, Vstate, Vdepth);
}
OBJ_INFECT(result, self);
return result;
}
/*
* call-seq: to_json(*)
*
* Returns a JSON string representation for this Integer number.
*/
static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
{
return rb_funcall(self, i_to_s, 0);
}
/*
* call-seq: to_json(*)
*
* Returns a JSON string representation for this Float number.
*/
static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
{
return rb_funcall(self, i_to_s, 0);
}
/*
* call-seq: String.included(modul)
*
* Extends _modul_ with the String::Extend module.
*/
static VALUE mString_included_s(VALUE self, VALUE modul) {
return rb_funcall(modul, i_extend, 1, mString_Extend);
}
/*
* call-seq: to_json(*)
*
* This string should be encoded with UTF-8 A call to this method
* returns a JSON string encoded with UTF16 big endian characters as
* \u????.
*/
static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
{
VALUE result = rb_str_buf_new(RSTRING_LEN(self));
rb_str_buf_cat2(result, "\"");
JSON_convert_UTF8_to_JSON(result, self, strictConversion);
rb_str_buf_cat2(result, "\"");
return result;
}
/*
* call-seq: to_json_raw_object()
*
* This method creates a raw object hash, that can be nested into
* other data structures and will be unparsed as a raw string. This
* method should be used, if you want to convert raw strings to JSON
* instead of UTF-8 strings, e. g. binary data.
*/
static VALUE mString_to_json_raw_object(VALUE self) {
VALUE ary;
VALUE result = rb_hash_new();
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
rb_hash_aset(result, rb_str_new2("raw"), ary);
return result;
}
/*
* call-seq: to_json_raw(*args)
*
* This method creates a JSON text from the result of a call to
* to_json_raw_object of this String.
*/
static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) {
VALUE obj = mString_to_json_raw_object(self);
Check_Type(obj, T_HASH);
return mHash_to_json(argc, argv, obj);
}
/*
* call-seq: json_create(o)
*
* Raw Strings are JSON Objects (the raw bytes are stored in an array for the
* key "raw"). The Ruby String can be created by this module method.
*/
static VALUE mString_Extend_json_create(VALUE self, VALUE o) {
VALUE ary;
Check_Type(o, T_HASH);
ary = rb_hash_aref(o, rb_str_new2("raw"));
return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
}
/*
* call-seq: to_json(state = nil, depth = 0)
*
* Returns a JSON string for true: 'true'.
*/
static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
{
return rb_str_new2("true");
}
/*
* call-seq: to_json(state = nil, depth = 0)
*
* Returns a JSON string for false: 'false'.
*/
static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
{
return rb_str_new2("false");
}
/*
* call-seq: to_json(state = nil, depth = 0)
*
*/
static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
{
return rb_str_new2("null");
}
/*
* call-seq: to_json(*)
*
* Converts this object to a string (calling #to_s), converts
* it to a JSON string, and returns the result. This is a fallback, if no
* special method #to_json was defined for some object.
*/
static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
{
VALUE string = rb_funcall(self, i_to_s, 0);
Check_Type(string, T_STRING);
return mString_to_json(argc, argv, string);
}
/*
* Document-class: JSON::Ext::Generator::State
*
* This class is used to create State instances, that are use to hold data
* while generating a JSON text from a a Ruby data structure.
*/
static void State_mark(JSON_Generator_State *state)
{
rb_gc_mark_maybe(state->indent);
rb_gc_mark_maybe(state->space);
rb_gc_mark_maybe(state->space_before);
rb_gc_mark_maybe(state->object_nl);
rb_gc_mark_maybe(state->array_nl);
rb_gc_mark_maybe(state->seen);
rb_gc_mark_maybe(state->memo);
rb_gc_mark_maybe(state->depth);
}
static JSON_Generator_State *State_allocate()
{
JSON_Generator_State *state = ALLOC(JSON_Generator_State);
return state;
}
static VALUE cState_s_allocate(VALUE klass)
{
JSON_Generator_State *state = State_allocate();
return Data_Wrap_Struct(klass, State_mark, -1, state);
}
/*
* call-seq: new(opts = {})
*
* Instantiates a new State object, configured by _opts_.
*
* _opts_ can have the following keys:
*
* * *indent*: a string used to indent levels (default: ''),
* * *space*: a string that is put after, a : or , delimiter (default: ''),
* * *space_before*: a string that is put before a : pair delimiter (default: ''),
* * *object_nl*: a string that is put at the end of a JSON object (default: ''),
* * *array_nl*: a string that is put at the end of a JSON array (default: ''),
* * *check_circular*: true if checking for circular data structures
* should be done, false (the default) otherwise.
*/
static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
{
VALUE opts;
GET_STATE(self);
rb_scan_args(argc, argv, "01", &opts);
if (NIL_P(opts)) {
state->indent = rb_str_new2("");
state->space = rb_str_new2("");
state->space_before = rb_str_new2("");
state->array_nl = rb_str_new2("");
state->object_nl = rb_str_new2("");
state->check_circular = 0;
} else {
VALUE tmp;
opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
tmp = rb_hash_aref(opts, ID2SYM(i_indent));
if (RTEST(tmp)) {
Check_Type(tmp, T_STRING);
state->indent = tmp;
} else {
state->indent = rb_str_new2("");
}
tmp = rb_hash_aref(opts, ID2SYM(i_space));
if (RTEST(tmp)) {
Check_Type(tmp, T_STRING);
state->space = tmp;
} else {
state->space = rb_str_new2("");
}
tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
if (RTEST(tmp)) {
Check_Type(tmp, T_STRING);
state->space_before = tmp;
} else {
state->space_before = rb_str_new2("");
}
tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
if (RTEST(tmp)) {
Check_Type(tmp, T_STRING);
state->array_nl = tmp;
} else {
state->array_nl = rb_str_new2("");
}
tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
if (RTEST(tmp)) {
Check_Type(tmp, T_STRING);
state->object_nl = tmp;
} else {
state->object_nl = rb_str_new2("");
}
tmp = rb_hash_aref(opts, ID2SYM(i_check_circular));
state->check_circular = RTEST(tmp);
}
state->seen = rb_hash_new();
state->memo = Qnil;
state->depth = INT2FIX(0);
return self;
}
/*
* call-seq: from_state(opts)
*
* Creates a State object from _opts_, which ought to be Hash to create a
* new State instance configured by _opts_, something else to create an
* unconfigured instance. If _opts_ is a State object, it is just returned.
*/
static VALUE cState_from_state_s(VALUE self, VALUE opts)
{
if (rb_obj_is_kind_of(opts, self)) {
return opts;
} else if (rb_obj_is_kind_of(opts, rb_cHash)) {
return rb_funcall(self, i_new, 1, opts);
} else {
return rb_funcall(self, i_new, 0);
}
}
/*
* call-seq: indent()
*
* This string is used to indent levels in the JSON text.
*/
static VALUE cState_indent(VALUE self)
{
GET_STATE(self);
return state->indent;
}
/*
* call-seq: indent=(indent)
*
* This string is used to indent levels in the JSON text.
*/
static VALUE cState_indent_set(VALUE self, VALUE indent)
{
GET_STATE(self);
Check_Type(indent, T_STRING);
return state->indent = indent;
}
/*
* call-seq: space()
*
* This string is used to insert a space between the tokens in a JSON
* string.
*/
static VALUE cState_space(VALUE self)
{
GET_STATE(self);
return state->space;
}
/*
* call-seq: space=(space)
*
* This string is used to insert a space between the tokens in a JSON
* string.
*/
static VALUE cState_space_set(VALUE self, VALUE space)
{
GET_STATE(self);
Check_Type(space, T_STRING);
return state->space = space;
}
/*
* call-seq: space_before()
*
* This string is used to insert a space before the ':' in JSON objects.
*/
static VALUE cState_space_before(VALUE self)
{
GET_STATE(self);
return state->space_before;
}
/*
* call-seq: space_before=(space_before)
*
* This string is used to insert a space before the ':' in JSON objects.
*/
static VALUE cState_space_before_set(VALUE self, VALUE space_before)
{
GET_STATE(self);
Check_Type(space_before, T_STRING);
return state->space_before = space_before;
}
/*
* call-seq: object_nl()
*
* This string is put at the end of a line that holds a JSON object (or
* Hash).
*/
static VALUE cState_object_nl(VALUE self)
{
GET_STATE(self);
return state->object_nl;
}
/*
* call-seq: object_nl=(object_nl)
*
* This string is put at the end of a line that holds a JSON object (or
* Hash).
*/
static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
{
GET_STATE(self);
Check_Type(object_nl, T_STRING);
return state->object_nl = object_nl;
}
/*
* call-seq: array_nl()
*
* This string is put at the end of a line that holds a JSON array.
*/
static VALUE cState_array_nl(VALUE self)
{
GET_STATE(self);
return state->array_nl;
}
/*
* call-seq: array_nl=(array_nl)
*
* This string is put at the end of a line that holds a JSON array.
*/
static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
{
GET_STATE(self);
Check_Type(array_nl, T_STRING);
return state->array_nl = array_nl;
}
/*
* call-seq: check_circular?(object)
*
* Returns true, if circular data structures should be checked,
* otherwise returns false.
*/
static VALUE cState_check_circular_p(VALUE self)
{
GET_STATE(self);
return state->check_circular ? Qtrue : Qfalse;
}
/*
* call-seq: seen?(object)
*
* Returns _true_, if _object_ was already seen during this generating run.
*/
static VALUE cState_seen_p(VALUE self, VALUE object)
{
GET_STATE(self);
return rb_hash_aref(state->seen, rb_obj_id(object));
}
/*
* call-seq: remember(object)
*
* Remember _object_, to find out if it was already encountered (if a cyclic
* data structure is rendered).
*/
static VALUE cState_remember(VALUE self, VALUE object)
{
GET_STATE(self);
return rb_hash_aset(state->seen, rb_obj_id(object), Qtrue);
}
/*
* call-seq: forget(object)
*
* Forget _object_ for this generating run.
*/
static VALUE cState_forget(VALUE self, VALUE object)
{
GET_STATE(self);
return rb_hash_delete(state->seen, rb_obj_id(object));
}
void Init_generator()
{
mJSON = rb_define_module("JSON");
mExt = rb_define_module_under(mJSON, "Ext");
mGenerator = rb_define_module_under(mExt, "Generator");
eGeneratorError = rb_path2class("JSON::GeneratorError");
eCircularDatastructure = rb_path2class("JSON::CircularDatastructure");
cState = rb_define_class_under(mGenerator, "State", rb_cObject);
rb_define_alloc_func(cState, cState_s_allocate);
rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
rb_define_method(cState, "initialize", cState_initialize, -1);
rb_define_method(cState, "indent", cState_indent, 0);
rb_define_method(cState, "indent=", cState_indent_set, 1);
rb_define_method(cState, "space", cState_space, 0);
rb_define_method(cState, "space=", cState_space_set, 1);
rb_define_method(cState, "space_before", cState_space_before, 0);
rb_define_method(cState, "space_before=", cState_space_before_set, 1);
rb_define_method(cState, "object_nl", cState_object_nl, 0);
rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
rb_define_method(cState, "array_nl", cState_array_nl, 0);
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
rb_define_method(cState, "seen?", cState_seen_p, 1);
rb_define_method(cState, "remember", cState_remember, 1);
rb_define_method(cState, "forget", cState_forget, 1);
mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
mObject = rb_define_module_under(mGeneratorMethods, "Object");
rb_define_method(mObject, "to_json", mObject_to_json, -1);
mHash = rb_define_module_under(mGeneratorMethods, "Hash");
rb_define_method(mHash, "to_json", mHash_to_json, -1);
mArray = rb_define_module_under(mGeneratorMethods, "Array");
rb_define_method(mArray, "to_json", mArray_to_json, -1);
mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
mFloat = rb_define_module_under(mGeneratorMethods, "Float");
rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
mString = rb_define_module_under(mGeneratorMethods, "String");
rb_define_singleton_method(mString, "included", mString_included_s, 1);
rb_define_method(mString, "to_json", mString_to_json, -1);
rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
mString_Extend = rb_define_module_under(mString, "Extend");
rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
i_to_s = rb_intern("to_s");
i_to_json = rb_intern("to_json");
i_new = rb_intern("new");
i_indent = rb_intern("indent");
i_space = rb_intern("space");
i_space_before = rb_intern("space_before");
i_object_nl = rb_intern("object_nl");
i_array_nl = rb_intern("array_nl");
i_check_circular = rb_intern("check_circular");
i_pack = rb_intern("pack");
i_unpack = rb_intern("unpack");
i_create_id = rb_intern("create_id");
i_extend = rb_intern("extend");
}

View file

@ -0,0 +1,184 @@
/* vim: set cin et sw=4 ts=4: */
#include "unicode.h"
#define unicode_escape(buffer, character) \
snprintf(buf, 7, "\\u%04x", (unsigned int) (character)); \
rb_str_buf_cat(buffer, buf, 6);
/*
* Copyright 2001-2004 Unicode, Inc.
*
* Disclaimer
*
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
*
* Limitations on Rights to Redistribute This Code
*
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*/
/*
* Index into the table below with the first byte of a UTF-8 sequence to
* get the number of trailing bytes that are supposed to follow it.
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
* left as-is for anyone who may want to do such conversion, which was
* allowed in earlier algorithms.
*/
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
/*
* Magic values subtracted from a buffer value during UTF8 conversion.
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types.
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
/*
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
* This must be called with the length pre-determined by the first byte.
* If not calling this from ConvertUTF8to*, then the length can be set by:
* length = trailingBytesForUTF8[*source]+1;
* and the sequence is illegal right away if there aren't that many bytes
* available.
* If presented with a length > 4, this returns 0. The Unicode
* definition of UTF-8 goes up to 4-byte sequences.
*/
inline static unsigned char isLegalUTF8(const UTF8 *source, int length)
{
UTF8 a;
const UTF8 *srcptr = source+length;
switch (length) {
default: return 0;
/* Everything else falls through when "1"... */
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
switch (*source) {
/* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return 0; break;
case 0xED: if (a > 0x9F) return 0; break;
case 0xF0: if (a < 0x90) return 0; break;
case 0xF4: if (a > 0x8F) return 0; break;
default: if (a < 0x80) return 0;
}
case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
}
if (*source > 0xF4) return 0;
return 1;
}
void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags)
{
char buf[7];
const UTF8* source = (UTF8 *) RSTRING_PTR(string);
const UTF8* sourceEnd = source + RSTRING_LEN(string);
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (source + extraBytesToRead >= sourceEnd) {
rb_raise(rb_path2class("JSON::GeneratorError"),
"partial character in source, but hit end");
}
if (!isLegalUTF8(source, extraBytesToRead+1)) {
rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed");
}
/*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
case 3: ch += *source++; ch <<= 6;
case 2: ch += *source++; ch <<= 6;
case 1: ch += *source++; ch <<= 6;
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the illegal value itself */
rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed");
} else {
unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
}
} else {
/* normal case */
if (ch == '"') {
rb_str_buf_cat2(buffer, "\\\"");
} else if (ch == '\\') {
rb_str_buf_cat2(buffer, "\\\\");
} else if (ch == '/') {
rb_str_buf_cat2(buffer, "\\/");
} else if (ch >= 0x20 && ch <= 0x7f) {
rb_str_buf_cat(buffer, (char *) source - 1, 1);
} else if (ch == '\n') {
rb_str_buf_cat2(buffer, "\\n");
} else if (ch == '\r') {
rb_str_buf_cat2(buffer, "\\r");
} else if (ch == '\t') {
rb_str_buf_cat2(buffer, "\\t");
} else if (ch == '\f') {
rb_str_buf_cat2(buffer, "\\f");
} else if (ch == '\b') {
rb_str_buf_cat2(buffer, "\\b");
} else if (ch < 0x20) {
unicode_escape(buffer, (UTF16) ch);
} else {
unicode_escape(buffer, (UTF16) ch);
}
}
} else if (ch > UNI_MAX_UTF16) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the start */
rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed");
} else {
unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
ch -= halfBase;
unicode_escape(buffer, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
unicode_escape(buffer, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
}
}
}

View file

@ -0,0 +1,53 @@
#include "ruby.h"
#ifndef _GENERATOR_UNICODE_H_
#define _GENERATOR_UNICODE_H_
typedef enum {
conversionOK = 0, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
targetExhausted, /* insuff. room in target for conversion */
sourceIllegal /* source sequence is illegal/malformed */
} ConversionResult;
typedef enum {
strictConversion = 0,
lenientConversion
} ConversionFlags;
typedef unsigned long UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
#define UNI_MAX_BMP (UTF32)0x0000FFFF
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
#define UNI_SUR_HIGH_START (UTF32)0xD800
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
#define UNI_SUR_LOW_START (UTF32)0xDC00
#define UNI_SUR_LOW_END (UTF32)0xDFFF
static const int halfShift = 10; /* used for shifting by 10 bits */
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags);
#ifndef RARRAY_PTR
#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
#endif
#ifndef RARRAY_LEN
#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
#endif
#ifndef RSTRING_PTR
#define RSTRING_PTR(string) RSTRING(string)->ptr
#endif
#ifndef RSTRING_LEN
#define RSTRING_LEN(string) RSTRING(string)->len
#endif
#endif

View file

@ -0,0 +1,3 @@
#ifndef EXTCONF_H
#define EXTCONF_H
#endif

View file

@ -0,0 +1,9 @@
require 'mkmf'
require 'rbconfig'
if CONFIG['CC'] =~ /gcc/
#CONFIG['CC'] += ' -Wall -ggdb'
CONFIG['CC'] += ' -Wall'
end
create_makefile 'json/ext/parser'

1601
ext/json/ext/parser/parser.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,569 @@
/* vim: set cin et sw=4 ts=4: */
#include "ruby.h"
#include "re.h"
#include "st.h"
#include "unicode.h"
#define EVIL 0x666
static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_max_nesting;
typedef struct JSON_ParserStruct {
VALUE Vsource;
char *source;
long len;
char *memo;
VALUE create_id;
int max_nesting;
int current_nesting;
} JSON_Parser;
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
#define GET_STRUCT \
JSON_Parser *json; \
Data_Get_Struct(self, JSON_Parser, json);
%%{
machine JSON_common;
cr = '\n';
cr_neg = [^\n];
ws = [ \t\r\n];
c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
cpp_comment = '//' cr_neg* cr;
comment = c_comment | cpp_comment;
ignore = ws | comment;
name_separator = ':';
value_separator = ',';
Vnull = 'null';
Vfalse = 'false';
Vtrue = 'true';
begin_value = [nft"\-[{] | digit;
begin_object = '{';
end_object = '}';
begin_array = '[';
end_array = ']';
begin_string = '"';
begin_name = begin_string;
begin_number = digit | '-';
}%%
%%{
machine JSON_object;
include JSON_common;
write data;
action parse_value {
VALUE v = Qnil;
char *np = JSON_parse_value(json, fpc, pe, &v);
if (np == NULL) {
fbreak;
} else {
rb_hash_aset(*result, last_name, v);
fexec np;
}
}
action parse_name {
char *np = JSON_parse_string(json, fpc, pe, &last_name);
if (np == NULL) fbreak; else fexec np;
}
action exit { fbreak; }
a_pair = ignore* begin_name >parse_name
ignore* name_separator ignore*
begin_value >parse_value;
main := begin_object
(a_pair (ignore* value_separator a_pair)*)?
ignore* end_object @exit;
}%%
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
VALUE last_name = Qnil;
if (json->max_nesting && json->current_nesting > json->max_nesting) {
rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
}
*result = rb_hash_new();
%% write init;
%% write exec;
if (cs >= JSON_object_first_final) {
VALUE klassname = rb_hash_aref(*result, json->create_id);
if (!NIL_P(klassname)) {
VALUE klass = rb_path2class(StringValueCStr(klassname));
if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
*result = rb_funcall(klass, i_json_create, 1, *result);
}
}
return p + 1;
} else {
return NULL;
}
}
%%{
machine JSON_value;
include JSON_common;
write data;
action parse_null {
*result = Qnil;
}
action parse_false {
*result = Qfalse;
}
action parse_true {
*result = Qtrue;
}
action parse_string {
char *np = JSON_parse_string(json, fpc, pe, result);
if (np == NULL) fbreak; else fexec np;
}
action parse_number {
char *np;
np = JSON_parse_float(json, fpc, pe, result);
if (np != NULL) fexec np;
np = JSON_parse_integer(json, fpc, pe, result);
if (np != NULL) fexec np;
fbreak;
}
action parse_array {
char *np;
json->current_nesting += 1;
np = JSON_parse_array(json, fpc, pe, result);
json->current_nesting -= 1;
if (np == NULL) fbreak; else fexec np;
}
action parse_object {
char *np;
json->current_nesting += 1;
np = JSON_parse_object(json, fpc, pe, result);
json->current_nesting -= 1;
if (np == NULL) fbreak; else fexec np;
}
action exit { fbreak; }
main := (
Vnull @parse_null |
Vfalse @parse_false |
Vtrue @parse_true |
begin_number >parse_number |
begin_string >parse_string |
begin_array >parse_array |
begin_object >parse_object
) %*exit;
}%%
static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
%% write init;
%% write exec;
if (cs >= JSON_value_first_final) {
return p;
} else {
return NULL;
}
}
%%{
machine JSON_integer;
write data;
action exit { fbreak; }
main := '-'? ('0' | [1-9][0-9]*) (^[0-9] @exit);
}%%
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
%% write init;
json->memo = p;
%% write exec;
if (cs >= JSON_integer_first_final) {
long len = p - json->memo;
*result = rb_Integer(rb_str_new(json->memo, len));
return p + 1;
} else {
return NULL;
}
}
%%{
machine JSON_float;
include JSON_common;
write data;
action exit { fbreak; }
main := '-'? (
(('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
| (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
) (^[0-9Ee.\-] @exit );
}%%
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
%% write init;
json->memo = p;
%% write exec;
if (cs >= JSON_float_first_final) {
long len = p - json->memo;
*result = rb_Float(rb_str_new(json->memo, len));
return p + 1;
} else {
return NULL;
}
}
%%{
machine JSON_array;
include JSON_common;
write data;
action parse_value {
VALUE v = Qnil;
char *np = JSON_parse_value(json, fpc, pe, &v);
if (np == NULL) {
fbreak;
} else {
rb_ary_push(*result, v);
fexec np;
}
}
action exit { fbreak; }
next_element = value_separator ignore* begin_value >parse_value;
main := begin_array ignore*
((begin_value >parse_value ignore*)
(ignore* next_element ignore*)*)?
end_array @exit;
}%%
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
if (json->max_nesting && json->current_nesting > json->max_nesting) {
rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
}
*result = rb_ary_new();
%% write init;
%% write exec;
if(cs >= JSON_array_first_final) {
return p + 1;
} else {
rb_raise(eParserError, "unexpected token at '%s'", p);
}
}
static VALUE json_string_unescape(char *p, char *pe)
{
VALUE result = rb_str_buf_new(pe - p + 1);
while (p < pe) {
if (*p == '\\') {
p++;
if (p >= pe) return Qnil; /* raise an exception later, \ at end */
switch (*p) {
case '"':
case '\\':
rb_str_buf_cat(result, p, 1);
p++;
break;
case 'b':
rb_str_buf_cat2(result, "\b");
p++;
break;
case 'f':
rb_str_buf_cat2(result, "\f");
p++;
break;
case 'n':
rb_str_buf_cat2(result, "\n");
p++;
break;
case 'r':
rb_str_buf_cat2(result, "\r");
p++;
break;
case 't':
rb_str_buf_cat2(result, "\t");
p++;
break;
case 'u':
if (p > pe - 4) {
return Qnil;
} else {
p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
}
break;
default:
rb_str_buf_cat(result, p, 1);
p++;
break;
}
} else {
char *q = p;
while (*q != '\\' && q < pe) q++;
rb_str_buf_cat(result, p, q - p);
p = q;
}
}
return result;
}
%%{
machine JSON_string;
include JSON_common;
write data;
action parse_string {
*result = json_string_unescape(json->memo + 1, p);
if (NIL_P(*result)) fbreak; else fexec p + 1;
}
action exit { fbreak; }
main := '"' ((^(["\\] | 0..0x1f) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^(["\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
}%%
static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
*result = rb_str_new("", 0);
%% write init;
json->memo = p;
%% write exec;
if (cs >= JSON_string_first_final) {
return p + 1;
} else {
return NULL;
}
}
%%{
machine JSON;
write data;
include JSON_common;
action parse_object {
char *np;
json->current_nesting = 1;
np = JSON_parse_object(json, fpc, pe, &result);
if (np == NULL) fbreak; else fexec np;
}
action parse_array {
char *np;
json->current_nesting = 1;
np = JSON_parse_array(json, fpc, pe, &result);
if (np == NULL) fbreak; else fexec np;
}
main := ignore* (
begin_object >parse_object |
begin_array >parse_array
) ignore*;
}%%
/*
* Document-class: JSON::Ext::Parser
*
* This is the JSON parser implemented as a C extension. It can be configured
* to be used by setting
*
* JSON.parser = JSON::Ext::Parser
*
* with the method parser= in JSON.
*
*/
/*
* call-seq: new(source, opts => {})
*
* Creates a new JSON::Ext::Parser instance for the string _source_.
*
* Creates a new JSON::Ext::Parser instance for the string _source_.
*
* It will be configured by the _opts_ hash. _opts_ can have the following
* keys:
*
* _opts_ can have the following keys:
* * *max_nesting*: The maximum depth of nesting allowed in the parsed data
* structures. Disable depth checking with :max_nesting => false.
*/
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
{
char *ptr;
long len;
VALUE source, opts;
GET_STRUCT;
rb_scan_args(argc, argv, "11", &source, &opts);
source = StringValue(source);
ptr = RSTRING_PTR(source);
len = RSTRING_LEN(source);
if (len < 2) {
rb_raise(eParserError, "A JSON text must at least contain two octets!");
}
json->max_nesting = 19;
if (!NIL_P(opts)) {
opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
if (NIL_P(opts)) {
rb_raise(rb_eArgError, "opts needs to be like a hash");
} else {
VALUE s_max_nesting = ID2SYM(i_max_nesting);
if (st_lookup(RHASH(opts)->tbl, s_max_nesting, 0)) {
VALUE max_nesting = rb_hash_aref(opts, s_max_nesting);
if (RTEST(max_nesting)) {
Check_Type(max_nesting, T_FIXNUM);
json->max_nesting = FIX2INT(max_nesting);
} else {
json->max_nesting = 0;
}
}
}
}
json->current_nesting = 0;
/*
Convert these?
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
}
*/
json->len = len;
json->source = ptr;
json->Vsource = source;
json->create_id = rb_funcall(mJSON, i_create_id, 0);
return self;
}
/*
* call-seq: parse()
*
* Parses the current JSON text _source_ and returns the complete data
* structure as a result.
*/
static VALUE cParser_parse(VALUE self)
{
char *p, *pe;
int cs = EVIL;
VALUE result = Qnil;
GET_STRUCT;
%% write init;
p = json->source;
pe = p + json->len;
%% write exec;
if (cs >= JSON_first_final && p == pe) {
return result;
} else {
rb_raise(eParserError, "unexpected token at '%s'", p);
}
}
static JSON_Parser *JSON_allocate()
{
JSON_Parser *json = ALLOC(JSON_Parser);
MEMZERO(json, JSON_Parser, 1);
return json;
}
static void JSON_mark(JSON_Parser *json)
{
rb_gc_mark_maybe(json->Vsource);
rb_gc_mark_maybe(json->create_id);
}
static void JSON_free(JSON_Parser *json)
{
free(json);
}
static VALUE cJSON_parser_s_allocate(VALUE klass)
{
JSON_Parser *json = JSON_allocate();
return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json);
}
/*
* call-seq: source()
*
* Returns a copy of the current _source_ string, that was used to construct
* this Parser.
*/
static VALUE cParser_source(VALUE self)
{
GET_STRUCT;
return rb_str_dup(json->Vsource);
}
void Init_parser()
{
mJSON = rb_define_module("JSON");
mExt = rb_define_module_under(mJSON, "Ext");
cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
eParserError = rb_path2class("JSON::ParserError");
eNestingError = rb_path2class("JSON::NestingError");
rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
rb_define_method(cParser, "initialize", cParser_initialize, -1);
rb_define_method(cParser, "parse", cParser_parse, 0);
rb_define_method(cParser, "source", cParser_source, 0);
i_json_creatable_p = rb_intern("json_creatable?");
i_json_create = rb_intern("json_create");
i_create_id = rb_intern("create_id");
i_chr = rb_intern("chr");
i_max_nesting = rb_intern("max_nesting");
}

View file

@ -0,0 +1,156 @@
/* vim: set cin et sw=4 ts=4: */
#include "unicode.h"
/*
* Copyright 2001-2004 Unicode, Inc.
*
* Disclaimer
*
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
*
* Limitations on Rights to Redistribute This Code
*
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*/
/*
* Index into the table below with the first byte of a UTF-8 sequence to
* get the number of trailing bytes that are supposed to follow it.
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
* left as-is for anyone who may want to do such conversion, which was
* allowed in earlier algorithms.
*/
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
/*
* Magic values subtracted from a buffer value during UTF8 conversion.
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types.
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
char *JSON_convert_UTF16_to_UTF8 (
VALUE buffer,
char *source,
char *sourceEnd,
ConversionFlags flags)
{
UTF16 *tmp, *tmpPtr, *tmpEnd;
char buf[5];
long n = 0, i;
char *p = source - 1;
while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') {
p += 6;
n++;
}
p = source + 1;
buf[4] = 0;
tmpPtr = tmp = ALLOC_N(UTF16, n);
tmpEnd = tmp + n;
for (i = 0; i < n; i++) {
buf[0] = *p++;
buf[1] = *p++;
buf[2] = *p++;
buf[3] = *p++;
tmpPtr[i] = strtol(buf, NULL, 16);
p += 2;
}
while (tmpPtr < tmpEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
ch = *tmpPtr++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/* If the 16 bits following the high surrogate are in the source
* buffer... */
if (tmpPtr < tmpEnd) {
UTF32 ch2 = *tmpPtr;
/* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++tmpPtr;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
free(tmp);
rb_raise(rb_path2class("JSON::ParserError"),
"source sequence is illegal/malformed near %s", source);
}
} else { /* We don't have the 16 bits following the high surrogate. */
free(tmp);
rb_raise(rb_path2class("JSON::ParserError"),
"partial character in source, but hit end near %s", source);
break;
}
} else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
free(tmp);
rb_raise(rb_path2class("JSON::ParserError"),
"source sequence is illegal/malformed near %s", source);
}
}
/* Figure out how many bytes the result will require */
if (ch < (UTF32) 0x80) {
bytesToWrite = 1;
} else if (ch < (UTF32) 0x800) {
bytesToWrite = 2;
} else if (ch < (UTF32) 0x10000) {
bytesToWrite = 3;
} else if (ch < (UTF32) 0x110000) {
bytesToWrite = 4;
} else {
bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
}
buf[0] = 0;
buf[1] = 0;
buf[2] = 0;
buf[3] = 0;
p = buf + bytesToWrite;
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
case 3: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
case 2: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
case 1: *--p = (UTF8) (ch | firstByteMark[bytesToWrite]);
}
rb_str_buf_cat(buffer, p, bytesToWrite);
}
free(tmp);
source += 5 + (n - 1) * 6;
return source;
}

58
ext/json/ext/parser/unicode.h Executable file
View file

@ -0,0 +1,58 @@
#ifndef _PARSER_UNICODE_H_
#define _PARSER_UNICODE_H_
#include "ruby.h"
typedef unsigned long UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
#define UNI_MAX_BMP (UTF32)0x0000FFFF
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
#define UNI_SUR_HIGH_START (UTF32)0xD800
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
#define UNI_SUR_LOW_START (UTF32)0xDC00
#define UNI_SUR_LOW_END (UTF32)0xDFFF
static const int halfShift = 10; /* used for shifting by 10 bits */
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
typedef enum {
conversionOK = 0, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
targetExhausted, /* insuff. room in target for conversion */
sourceIllegal /* source sequence is illegal/malformed */
} ConversionResult;
typedef enum {
strictConversion = 0,
lenientConversion
} ConversionFlags;
char *JSON_convert_UTF16_to_UTF8 (
VALUE buffer,
char *source,
char *sourceEnd,
ConversionFlags flags);
#ifndef RARRAY_PTR
#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
#endif
#ifndef RARRAY_LEN
#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
#endif
#ifndef RSTRING_PTR
#define RSTRING_PTR(string) RSTRING(string)->ptr
#endif
#ifndef RSTRING_LEN
#define RSTRING_LEN(string) RSTRING(string)->len
#endif
#endif

View file

@ -41,7 +41,7 @@
***********************************************************************/
/* $Id$ */
#define NKF_VERSION "2.0.8"
#define NKF_RELEASE_DATE "2007-01-28"
#define NKF_RELEASE_DATE "2007-05-28"
#include "config.h"
#include "utf8tbl.h"
@ -351,10 +351,12 @@ static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
* 0: Shift_JIS, eucJP-ascii
* 1: eucJP-ms
* 2: CP932, CP51932
* 3: CP10001
*/
#define UCS_MAP_ASCII 0
#define UCS_MAP_MS 1
#define UCS_MAP_CP932 2
#define UCS_MAP_ASCII 0
#define UCS_MAP_MS 1
#define UCS_MAP_CP932 2
#define UCS_MAP_CP10001 3
static int ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#ifdef UTF8_INPUT_ENABLE
@ -1232,6 +1234,14 @@ void options(unsigned char *cp)
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
}else if(strcmp(codeset, "CP10001") == 0){
input_f = SJIS_INPUT;
#ifdef SHIFTJIS_CP932
cp51932_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP10001;
#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
@ -1370,6 +1380,11 @@ void options(unsigned char *cp)
output_conv = s_oconv;
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
}else if(strcmp(codeset, "CP10001") == 0){
output_conv = s_oconv;
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP10001;
#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
@ -2676,6 +2691,12 @@ nkf_char kanji_convert(FILE *f)
} else { /* bogus code, skip SSO and one byte */
NEXT;
}
} else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
(c1 == 0xFD || c1 == 0xFE)) {
/* CP10001 */
c2 = X0201;
c1 &= 0x7f;
SEND;
} else {
/* already established */
c2 = c1;
@ -2885,35 +2906,41 @@ nkf_char kanji_convert(FILE *f)
(*oconv)(0, ESC);
SEND;
}
} else if ((c1 == NL || c1 == CR) && broken_f&4) {
input_mode = ASCII; set_iconv(FALSE, 0);
SEND;
} else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
c1 = NL;
SEND;
} else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF) {
if (c1==SPACE) {
i_ungetc(SPACE,f);
continue;
} else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
} else if (c1 == NL || c1 == CR) {
if (broken_f&4) {
input_mode = ASCII; set_iconv(FALSE, 0);
SEND;
} else if (mime_decode_f && !mime_decode_mode){
if (c1 == NL) {
if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
c1 = NL;
SEND;
} else { /* if (c1 == CR)*/
if ((c1=(*i_getc)(f))!=EOF) {
if (c1==SPACE) {
i_ungetc(SPACE,f);
continue;
} else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
i_ungetc(NL,f);
} else {
i_ungetc(c1,f);
}
c1 = CR;
SEND;
}
i_ungetc(NL,f);
} else {
i_ungetc(c1,f);
}
c1 = CR;
SEND;
if (crmode_f == CR && c1 == NL) crmode_f = CRLF;
else crmode_f = c1;
} else if (c1 == DEL && input_mode == X0208 ) {
/* CP5022x */
c2 = c1;
@ -3125,9 +3152,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
#ifdef SHIFTJIS_CP932
if (!cp932inv_f && is_ibmext_in_sjis(c2)){
#if 0
extern const unsigned short shiftjis_cp932[3][189];
#endif
val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
if (val){
c2 = val >> 8;
@ -3136,9 +3160,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
}
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
#if 0
extern const unsigned short cp932inv[2][189];
#endif
nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
@ -3148,9 +3169,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
#endif /* SHIFTJIS_CP932 */
#ifdef X0212_ENABLE
if (!x0213_f && is_ibmext_in_sjis(c2)){
#if 0
extern const unsigned short shiftjis_x0212[3][189];
#endif
val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
if (val){
if (val > 0x7FFF){
@ -3481,14 +3499,6 @@ nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
{
#if 0
extern const unsigned short *const utf8_to_euc_2bytes[];
extern const unsigned short *const utf8_to_euc_2bytes_ms[];
extern const unsigned short *const utf8_to_euc_2bytes_932[];
extern const unsigned short *const *const utf8_to_euc_3bytes[];
extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
#endif
const unsigned short *const *pp;
const unsigned short *const *const *ppp;
static const int no_best_fit_chars_table_C2[] =
@ -3538,11 +3548,27 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
}
}else if(ms_ucs_map_f == UCS_MAP_MS){
if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
}else if(ms_ucs_map_f == UCS_MAP_CP10001){
switch(c2){
case 0xC2:
switch(c1){
case 0xA2:
case 0xA3:
case 0xA5:
case 0xA6:
case 0xAC:
case 0xAF:
case 0xB8:
return 1;
}
break;
}
}
}
pp =
ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
utf8_to_euc_2bytes;
ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
}else if(c0 < 0xF0){
@ -3565,6 +3591,19 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
if(c1 == 0x80 || c0 == 0x9C) return 1;
break;
}
}else if(ms_ucs_map_f == UCS_MAP_CP10001){
switch(c2){
case 0xE3:
switch(c1){
case 0x82:
if(c0 == 0x94) return 1;
break;
case 0x83:
if(c0 == 0xBB) return 1;
break;
}
break;
}
}else{
switch(c2){
case 0xE2:
@ -3596,8 +3635,10 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
ppp =
ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
utf8_to_euc_3bytes;
ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
// fprintf(stderr, "wret: %X %X %X -> %X %X\n",c2,c1,c0,*p2,*p1,ret);
}else return -1;
#ifdef SHIFTJIS_CP932
if (!ret && !cp932inv_f && is_eucg3(*p2)) {
@ -3739,15 +3780,17 @@ void encode_fallback_subchar(nkf_char c)
#ifdef UTF8_OUTPUT_ENABLE
nkf_char e2w_conv(nkf_char c2, nkf_char c1)
{
#if 0
extern const unsigned short euc_to_utf8_1byte[];
extern const unsigned short *const euc_to_utf8_2bytes[];
extern const unsigned short *const euc_to_utf8_2bytes_ms[];
extern const unsigned short *const x0212_to_utf8_2bytes[];
#endif
const unsigned short *p;
if (c2 == X0201) {
if (ms_ucs_map_f == UCS_MAP_CP10001) {
switch (c1) {
case 0x20:
return 0xA0;
case 0x7D:
return 0xA9;
}
}
p = euc_to_utf8_1byte;
#ifdef X0212_ENABLE
} else if (is_eucg3(c2)){
@ -3764,7 +3807,10 @@ nkf_char e2w_conv(nkf_char c2, nkf_char c1)
c2 &= 0x7f;
c2 = (c2&0x7f) - 0x21;
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
p =
ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
euc_to_utf8_2bytes_ms[c2];
else
return 0;
}
@ -4069,9 +4115,6 @@ nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
else if(nkf_isgraph(ndx)){
nkf_char val = 0;
const unsigned short *ptr;
#if 0
extern const unsigned short *const x0212_shiftjis[];
#endif
ptr = x0212_shiftjis[ndx - 0x21];
if (ptr){
val = ptr[(c1 & 0x7f) - 0x21];
@ -4147,9 +4190,6 @@ void s_oconv(nkf_char c2, nkf_char c1)
#ifdef SHIFTJIS_CP932
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
#if 0
extern const unsigned short cp932inv[2][189];
#endif
nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
@ -4539,6 +4579,10 @@ void z_conv(nkf_char c2, nkf_char c1)
/* if (c2) c1 &= 0x7f; assertion */
if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
(*o_zconv)(c2,c1);
return;
}
if (x0201_f && z_prev2==X0201) { /* X0201 */
if (c1==(0xde&0x7f)) { /* $BByE@(B */
z_prev2=0;
@ -4942,15 +4986,20 @@ void set_input_codename(char *codename)
void print_guessed_code(char *filename)
{
char *codename = "BINARY";
char *str_crmode = NULL;
if (!is_inputcode_mixed) {
if (strcmp(input_codename, "") == 0) {
codename = "ASCII";
} else {
codename = input_codename;
}
if (crmode_f == CR) str_crmode = "CR";
else if (crmode_f == NL) str_crmode = "LF";
else if (crmode_f == CRLF) str_crmode = "CRLF";
}
if (filename != NULL) printf("%s:", filename);
printf("%s\n", codename);
if (str_crmode != NULL) printf("%s (%s)\n", codename, str_crmode);
else printf("%s\n", codename);
}
#endif /*WIN32DLL*/
@ -5068,9 +5117,6 @@ nkf_char nfc_getc(FILE *f)
int i=0, j, k=1, lower, upper;
nkf_char buf[9];
const nkf_nfchar *array;
#if 0
extern const struct normalization_pair normalization_table[];
#endif
buf[i] = (*g)(f);
while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
@ -5437,7 +5483,7 @@ void open_mime(nkf_char mode)
int i;
int j;
p = mime_pattern[0];
for(i=0;mime_encode[i];i++) {
for(i=0;mime_pattern[i];i++) {
if (mode == mime_encode[i]) {
p = mime_pattern[i];
break;
@ -5643,10 +5689,21 @@ void mime_putc(nkf_char c)
if (mimeout_mode=='Q') {
if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
if (c <= SPACE) {
if (c == CR || c == NL) {
close_mime();
(*o_mputc)(c);
base64_count = 0;
return;
} else if (c <= SPACE) {
close_mime();
(*o_mputc)(SPACE);
base64_count++;
if (base64_count > 70) {
(*o_mputc)(NL);
base64_count = 0;
}
if (!nkf_isblank(c)) {
(*o_mputc)(SPACE);
base64_count++;
}
}
(*o_mputc)(c);
base64_count++;
@ -5678,7 +5735,8 @@ void mime_putc(nkf_char c)
mimeout_buf_count = 1;
}else{
if (base64_count > 1
&& base64_count + mimeout_buf_count > 76){
&& base64_count + mimeout_buf_count > 76
&& mimeout_buf[0] != CR && mimeout_buf[0] != NL){
(*o_mputc)(NL);
base64_count = 0;
if (!nkf_isspace(mimeout_buf[0])){

View file

@ -201,6 +201,20 @@ const unsigned short euc_to_utf8_AC[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
};
const unsigned short euc_to_utf8_AC_mac[] = {
0x2664, 0x2667, 0x2661, 0x2662, 0x2660, 0x2663, 0x2665,
0x2666, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0x3020, 0x260E, 0x3004,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0x261E, 0x261C, 0x261D, 0x261F, 0x21C6, 0x21C4, 0x21C5,
0, 0x21E8, 0x21E6, 0x21E7, 0x21E9, 0x2192, 0x2190, 0x2191,
0x2193, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
};
const unsigned short euc_to_utf8_AD[] = {
0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466,
0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E,
@ -215,6 +229,20 @@ const unsigned short euc_to_utf8_AD[] = {
0x2252, 0x2261, 0x222B, 0x222E, 0x2211, 0x221A, 0x22A5, 0x2220,
0x221F, 0x22BF, 0x2235, 0x2229, 0x222A, 0, 0x3299,
};
const unsigned short euc_to_utf8_AD_mac[] = {
0x65E5, 0x6708, 0x706B, 0x6C34, 0x6728, 0x91D1, 0x571F,
0x796D, 0x795D, 0x81EA, 0x81F3, 0x3239, 0x547C, 0x3231, 0x8CC7,
0x540D, 0x3232, 0x5B66, 0x8CA1, 0x793E, 0x7279, 0x76E3, 0x4F01,
0x5354, 0x52B4, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0,
0x3349, 0x3314, 0x3322, 0x334D, 0x3318, 0x3327, 0x3303, 0x3336,
0x3351, 0x3357, 0x330D, 0x3326, 0x3323, 0x332B, 0x334A, 0x333B,
0x339C, 0x339D, 0x339E, 0x338E, 0x338F, 0x33C4, 0x33A1, 0,
0, 0, 0, 0, 0, 0, 0, 0x337B,
0x301D, 0x301F, 0x2116, 0x33CD, 0x2121, 0x32A4, 0x32A5, 0x32A6,
0x32A7, 0x32A8, 0x3231, 0x3232, 0x3239, 0x337E, 0x337D, 0x337C,
0x2252, 0x5927, 0x5C0F, 0x32A4, 0x32A5, 0x32A6, 0x32A7, 0x32A8,
0x533B, 0x8CA1, 0x512A, 0x52B4, 0x5370, 0x63A7, 0x79D8,
};
const unsigned short euc_to_utf8_AE[] = {
0x3349, 0x3322, 0x334D, 0x3314, 0x3316, 0x3305, 0x3333,
0x334E, 0x3303, 0x3336, 0x3318, 0x3315, 0x3327, 0x3351, 0x334A,
@ -2346,6 +2374,33 @@ const unsigned short *const euc_to_utf8_2bytes_ms[] = {
0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
euc_to_utf8_FC_ms, 0, 0,
};
/* CP10001 */
const unsigned short *const euc_to_utf8_2bytes_mac[] = {
euc_to_utf8_A1_ms, euc_to_utf8_A2_ms, euc_to_utf8_A3,
euc_to_utf8_A4, euc_to_utf8_A5, euc_to_utf8_A6, euc_to_utf8_A7,
euc_to_utf8_A8, euc_to_utf8_A9, euc_to_utf8_AA, euc_to_utf8_AB,
euc_to_utf8_AC_mac, euc_to_utf8_AD_mac, euc_to_utf8_AE, euc_to_utf8_AF,
euc_to_utf8_B0, euc_to_utf8_B1, euc_to_utf8_B2, euc_to_utf8_B3,
euc_to_utf8_B4, euc_to_utf8_B5, euc_to_utf8_B6, euc_to_utf8_B7,
euc_to_utf8_B8, euc_to_utf8_B9, euc_to_utf8_BA, euc_to_utf8_BB,
euc_to_utf8_BC, euc_to_utf8_BD, euc_to_utf8_BE, euc_to_utf8_BF,
euc_to_utf8_C0, euc_to_utf8_C1, euc_to_utf8_C2, euc_to_utf8_C3,
euc_to_utf8_C4, euc_to_utf8_C5, euc_to_utf8_C6, euc_to_utf8_C7,
euc_to_utf8_C8, euc_to_utf8_C9, euc_to_utf8_CA, euc_to_utf8_CB,
euc_to_utf8_CC, euc_to_utf8_CD, euc_to_utf8_CE, euc_to_utf8_CF,
euc_to_utf8_D0, euc_to_utf8_D1, euc_to_utf8_D2, euc_to_utf8_D3,
euc_to_utf8_D4, euc_to_utf8_D5, euc_to_utf8_D6, euc_to_utf8_D7,
euc_to_utf8_D8, euc_to_utf8_D9, euc_to_utf8_DA, euc_to_utf8_DB,
euc_to_utf8_DC, euc_to_utf8_DD, euc_to_utf8_DE, euc_to_utf8_DF,
euc_to_utf8_E0, euc_to_utf8_E1, euc_to_utf8_E2, euc_to_utf8_E3,
euc_to_utf8_E4, euc_to_utf8_E5, euc_to_utf8_E6, euc_to_utf8_E7,
euc_to_utf8_E8, euc_to_utf8_E9, euc_to_utf8_EA, euc_to_utf8_EB,
euc_to_utf8_EC, euc_to_utf8_ED, euc_to_utf8_EE, euc_to_utf8_EF,
euc_to_utf8_F0, euc_to_utf8_F1, euc_to_utf8_F2, euc_to_utf8_F3,
euc_to_utf8_F4, euc_to_utf8_F5, 0, 0,
0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
euc_to_utf8_FC_ms, 0, 0,
};
#ifdef X0212_ENABLE
const unsigned short *const x0212_to_utf8_2bytes[] = {
@ -2397,6 +2452,16 @@ const unsigned short utf8_to_euc_C2_ms[] = {
0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
};
const unsigned short utf8_to_euc_C2_mac[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0x0220, 0xA242, 0x2171, 0x2172, 0xA270, 0x5C, 0xA243, 0x2178,
0x212F, 0x027D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234,
0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
};
const unsigned short utf8_to_euc_C2_932[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@ -2547,6 +2612,16 @@ const unsigned short utf8_to_euc_E284[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E284_mac[] = {
0, 0, 0, 0x216E, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x2B7B, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0x2B7D, 0x027E, 0, 0, 0, 0, 0,
0, 0, 0, 0x2272, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E285[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@ -2557,6 +2632,16 @@ const unsigned short utf8_to_euc_E285[] = {
0xF373, 0xF374, 0xF375, 0xF376, 0xF377, 0xF378, 0xF379, 0xF37A,
0xF37B, 0xF37C, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E285_mac[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0x2A21, 0x2A22, 0x2A23, 0x2A24, 0x2A25, 0x2A26, 0x2A27, 0x2A28,
0x2A29, 0x2A2A, 0, 0, 0, 0, 0, 0,
0x2A35, 0x2A36, 0x2A37, 0x2A38, 0x2A39, 0x2A3A, 0x2A3B, 0x2A3C,
0x2A3D, 0x2A3E, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E286[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@ -2597,6 +2682,16 @@ const unsigned short utf8_to_euc_E288_932[] = {
0, 0, 0, 0, 0x2168, 0x2268, 0, 0,
0, 0, 0, 0, 0, 0x2266, 0, 0,
};
const unsigned short utf8_to_euc_E288_mac[] = {
0x224F, 0, 0x225F, 0x2250, 0, 0, 0, 0x2260,
0x223A, 0, 0, 0x223B, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x2265, 0, 0, 0x2267, 0x2167, 0x2F22,
0x225C, 0, 0, 0, 0, 0x2142, 0, 0x224A,
0x224B, 0x2241, 0x2240, 0x2269, 0x226A, 0, 0x2F21, 0,
0, 0, 0, 0, 0x2168, 0x2268, 0, 0,
0, 0, 0, 0, 0, 0x2266, 0, 0,
};
const unsigned short utf8_to_euc_E289[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@ -2617,6 +2712,16 @@ const unsigned short utf8_to_euc_E28A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0x2D79,
};
const unsigned short utf8_to_euc_E28A_mac[] = {
0, 0, 0x223E, 0x223F, 0, 0, 0x223C, 0x223D,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0x225D, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0x2F23,
};
const unsigned short utf8_to_euc_E28C[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@ -2637,6 +2742,16 @@ const unsigned short utf8_to_euc_E291[] = {
0x2D31, 0x2D32, 0x2D33, 0x2D34, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E291_mac[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0x2921, 0x2922, 0x2923, 0x2924, 0x2925, 0x2926, 0x2927, 0x2928,
0x2929, 0x292A, 0x292B, 0x292C, 0x292D, 0x292E, 0x292F, 0x2930,
0x2931, 0x2932, 0x2933, 0x2934, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E294[] = {
0x2821, 0x282C, 0x2822, 0x282D, 0, 0, 0, 0,
0, 0, 0, 0, 0x2823, 0, 0, 0x282E,
@ -2767,6 +2882,16 @@ const unsigned short utf8_to_euc_E388[] = {
0, 0x2D6A, 0x2D6B, 0, 0, 0, 0, 0,
0, 0x2D6C, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E388_mac[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0x2D2E, 0x2D31, 0, 0, 0, 0, 0,
0, 0x2D2C, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@ -2777,6 +2902,16 @@ const unsigned short utf8_to_euc_E38A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38A_mac[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0x2D73, 0x2D74, 0x2D75, 0x2D76,
0x2D77, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38C[] = {
0, 0, 0, 0x2D46, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0x2D4A, 0, 0,
@ -2787,6 +2922,16 @@ const unsigned short utf8_to_euc_E38C[] = {
0, 0, 0, 0, 0, 0, 0x2D47, 0,
0, 0, 0, 0x2D4F, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38C_mac[] = {
0, 0, 0, 0x2E29, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0x2E32, 0, 0,
0, 0, 0, 0, 0x2E24, 0, 0, 0,
0x2E2B, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x2E22, 0x2E34, 0, 0, 0x2E35, 0x2E2D,
0, 0, 0, 0x2E37, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x2E2A, 0,
0, 0, 0, 0x2E36, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38D[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0x2D40, 0x2D4E, 0, 0, 0x2D43, 0, 0,
@ -2797,6 +2942,16 @@ const unsigned short utf8_to_euc_E38D[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0x2D5F, 0x2D6F, 0x2D6E, 0x2D6D, 0,
};
const unsigned short utf8_to_euc_E38D_mac[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0x2E21, 0x2E2F, 0, 0, 0x2E23, 0, 0,
0, 0x2E2E, 0, 0, 0, 0, 0, 0x2E31,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0x2E6A, 0x2E69, 0x2E68, 0x2E67, 0,
};
const unsigned short utf8_to_euc_E38E[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x2D53, 0x2D54,
@ -2807,6 +2962,16 @@ const unsigned short utf8_to_euc_E38E[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38E_mac[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x2B2B, 0x2B2D,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0x2B21, 0x2B23, 0x2B29, 0,
0, 0x2B27, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38F[] = {
0, 0, 0, 0, 0x2D55, 0, 0, 0,
0, 0, 0, 0, 0, 0x2D63, 0, 0,
@ -2817,6 +2982,16 @@ const unsigned short utf8_to_euc_E38F[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38F_mac[] = {
0, 0, 0, 0, 0x2B2E, 0, 0, 0,
0, 0, 0, 0, 0, 0x2B7C, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E4B8[] = {
0x306C, 0x437A, 0xB021, 0x3C37, 0xB022, 0xB023, 0, 0x4B7C,
0x3E66, 0x3B30, 0x3E65, 0x323C, 0xB024, 0x4954, 0x4D3F, 0,
@ -6171,6 +6346,24 @@ const unsigned short *const utf8_to_euc_E2_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
const unsigned short *const utf8_to_euc_E2_mac[] = {
utf8_to_euc_E280_932, 0, 0, 0,
utf8_to_euc_E284_mac, utf8_to_euc_E285_mac, utf8_to_euc_E286, utf8_to_euc_E287,
utf8_to_euc_E288_mac, utf8_to_euc_E289, utf8_to_euc_E28A_mac, 0,
utf8_to_euc_E28C, 0, 0, 0,
0, utf8_to_euc_E291_mac, 0, 0,
utf8_to_euc_E294, utf8_to_euc_E295, utf8_to_euc_E296, utf8_to_euc_E297,
utf8_to_euc_E298, utf8_to_euc_E299, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
const unsigned short *const utf8_to_euc_E3[] = {
utf8_to_euc_E380, utf8_to_euc_E381, utf8_to_euc_E382, utf8_to_euc_E383,
0, 0, 0, 0,
@ -6207,6 +6400,24 @@ const unsigned short *const utf8_to_euc_E3_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
const unsigned short *const utf8_to_euc_E3_mac[] = {
utf8_to_euc_E380_932, utf8_to_euc_E381, utf8_to_euc_E382_932, utf8_to_euc_E383,
0, 0, 0, 0,
utf8_to_euc_E388_mac, 0, utf8_to_euc_E38A_mac, 0,
utf8_to_euc_E38C_mac, utf8_to_euc_E38D_mac, utf8_to_euc_E38E_mac, utf8_to_euc_E38F_mac,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
const unsigned short *const utf8_to_euc_E4[] = {
0, 0, 0, 0,
0, 0, 0, 0,
@ -6441,6 +6652,36 @@ const unsigned short *const utf8_to_euc_2bytes_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
const unsigned short *const utf8_to_euc_2bytes_mac[] = {
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, utf8_to_euc_C2_mac, utf8_to_euc_C3,
utf8_to_euc_C4, utf8_to_euc_C5, 0, utf8_to_euc_C7,
0, 0, 0, utf8_to_euc_CB,
0, 0, utf8_to_euc_CE, utf8_to_euc_CF,
utf8_to_euc_D0, utf8_to_euc_D1, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
const unsigned short *const *const utf8_to_euc_3bytes[] = {
0, 0, utf8_to_euc_E2, utf8_to_euc_E3,
utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7,
@ -6459,6 +6700,12 @@ const unsigned short *const *const utf8_to_euc_3bytes_932[] = {
utf8_to_euc_E8, utf8_to_euc_E9, 0, 0,
0, 0, 0, utf8_to_euc_EF_ms,
};
const unsigned short *const *const utf8_to_euc_3bytes_mac[] = {
0, 0, utf8_to_euc_E2_mac, utf8_to_euc_E3_mac,
utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7,
utf8_to_euc_E8, utf8_to_euc_E9, 0, 0,
0, 0, 0, utf8_to_euc_EF_ms,
};
#ifdef UNICODE_NORMALIZATION

View file

@ -5,6 +5,7 @@
extern const unsigned short euc_to_utf8_1byte[];
extern const unsigned short *const euc_to_utf8_2bytes[];
extern const unsigned short *const euc_to_utf8_2bytes_ms[];
extern const unsigned short *const euc_to_utf8_2bytes_mac[];
extern const unsigned short *const x0212_to_utf8_2bytes[];
#endif /* UTF8_OUTPUT_ENABLE */
@ -12,9 +13,11 @@ extern const unsigned short *const x0212_to_utf8_2bytes[];
extern const unsigned short *const utf8_to_euc_2bytes[];
extern const unsigned short *const utf8_to_euc_2bytes_ms[];
extern const unsigned short *const utf8_to_euc_2bytes_932[];
extern const unsigned short *const utf8_to_euc_2bytes_mac[];
extern const unsigned short *const *const utf8_to_euc_3bytes[];
extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
extern const unsigned short *const *const utf8_to_euc_3bytes_mac[];
#endif /* UTF8_INPUT_ENABLE */
#ifdef UNICODE_NORMALIZATION

210
lib/json.rb Normal file
View file

@ -0,0 +1,210 @@
require 'json/common'
# = json - JSON for Ruby
#
# == Description
#
# This is a implementation of the JSON specification according to RFC 4627
# (http://www.ietf.org/rfc/rfc4627.txt). Starting from version 1.0.0 on there
# will be two variants available:
#
# * A pure ruby variant, that relies on the iconv and the stringscan
# extensions, which are both part of the ruby standard library.
# * The quite a bit faster C extension variant, which is in parts implemented
# in C and comes with its own unicode conversion functions and a parser
# generated by the ragel state machine compiler
# (http://www.cs.queensu.ca/~thurston/ragel).
#
# Both variants of the JSON generator escape all non-ASCII an control
# characters with \uXXXX escape sequences, and support UTF-16 surrogate pairs
# in order to be able to generate the whole range of unicode code points. This
# means that generated JSON text is encoded as UTF-8 (because ASCII is a subset
# of UTF-8) and at the same time avoids decoding problems for receiving
# endpoints, that don't expect UTF-8 encoded texts. On the negative side this
# may lead to a bit longer strings than necessarry.
#
# All strings, that are to be encoded as JSON strings, should be UTF-8 byte
# sequences on the Ruby side. To encode raw binary strings, that aren't UTF-8
# encoded, please use the to_json_raw_object method of String (which produces
# an object, that contains a byte array) and decode the result on the receiving
# endpoint.
#
# == Author
#
# Florian Frank <mailto:flori@ping.de>
#
# == License
#
# This software is distributed under the same license as Ruby itself, see
# http://www.ruby-lang.org/en/LICENSE.txt.
#
# == Download
#
# The latest version of this library can be downloaded at
#
# * http://rubyforge.org/frs?group_id=953
#
# Online Documentation should be located at
#
# * http://json.rubyforge.org
#
# == Speed Comparisons
#
# I have created some benchmark results (see the benchmarks subdir of the
# package) for the JSON-Parser to estimate the speed up in the C extension:
#
# JSON::Pure::Parser:: 28.90 calls/second
# JSON::Ext::Parser:: 505.50 calls/second
#
# This is ca. <b>17.5</b> times the speed of the pure Ruby implementation.
#
# I have benchmarked the JSON-Generator as well. This generates a few more
# values, because there are different modes, that also influence the achieved
# speed:
#
# * JSON::Pure::Generator:
# generate:: 35.06 calls/second
# pretty_generate:: 34.00 calls/second
# fast_generate:: 41.06 calls/second
#
# * JSON::Ext::Generator:
# generate:: 492.11 calls/second
# pretty_generate:: 348.85 calls/second
# fast_generate:: 541.60 calls/second
#
# * Speedup Ext/Pure:
# generate safe:: 14.0 times
# generate pretty:: 10.3 times
# generate fast:: 13.2 times
#
# The rails framework includes a generator as well, also it seems to be rather
# slow: I measured only 23.87 calls/second which is slower than any of my pure
# generator results. Here a comparison of the different speedups with the Rails
# measurement as the divisor:
#
# * Speedup Pure/Rails:
# generate safe:: 1.5 times
# generate pretty:: 1.4 times
# generate fast:: 1.7 times
#
# * Speedup Ext/Rails:
# generate safe:: 20.6 times
# generate pretty:: 14.6 times
# generate fast:: 22.7 times
#
# To achieve the fastest JSON text output, you can use the
# fast_generate/fast_unparse methods. Beware, that this will disable the
# checking for circular Ruby data structures, which may cause JSON to go into
# an infinite loop.
#
# == Examples
#
# To create a JSON text from a ruby data structure, you
# can call JSON.generate (or JSON.unparse) like that:
#
# json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10]
# # => "[1,2,{\"a\":3.141},false,true,null,\"4..10\"]"
#
# It's also possible to call the #to_json method directly.
#
# json = [1, 2, {"a"=>3.141}, false, true, nil, 4..10].to_json
# # => "[1,2,{\"a\":3.141},false,true,null,\"4..10\"]"
#
# To create a valid JSON text you have to make sure, that the output is
# embedded in either a JSON array [] or a JSON object {}. The easiest way to do
# this, is by putting your values in a Ruby Array or Hash instance.
#
# To get back a ruby data structure from a JSON text, you have to call
# JSON.parse on it:
#
# JSON.parse json
# # => [1, 2, {"a"=>3.141}, false, true, nil, "4..10"]
#
# Note, that the range from the original data structure is a simple
# string now. The reason for this is, that JSON doesn't support ranges
# or arbitrary classes. In this case the json library falls back to call
# Object#to_json, which is the same as #to_s.to_json.
#
# It's possible to extend JSON to support serialization of arbitrary classes by
# simply implementing a more specialized version of the #to_json method, that
# should return a JSON object (a hash converted to JSON with #to_json)
# like this (don't forget the *a for all the arguments):
#
# class Range
# def to_json(*a)
# {
# 'json_class' => self.class.name, # = 'Range'
# 'data' => [ first, last, exclude_end? ]
# }.to_json(*a)
# end
# end
#
# The hash key 'json_class' is the class, that will be asked to deserialize the
# JSON representation later. In this case it's 'Range', but any namespace of
# the form 'A::B' or '::A::B' will do. All other keys are arbitrary and can be
# used to store the necessary data to configure the object to be deserialized.
#
# If a the key 'json_class' is found in a JSON object, the JSON parser checks
# if the given class responds to the json_create class method. If so, it is
# called with the JSON object converted to a Ruby hash. So a range can
# be deserialized by implementing Range.json_create like this:
#
# class Range
# def self.json_create(o)
# new(*o['data'])
# end
# end
#
# Now it possible to serialize/deserialize ranges as well:
#
# json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10]
# # => "[1,2,{\"a\":3.141},false,true,null,{\"json_class\":\"Range\",\"data\":[4,10,false]}]"
# JSON.parse json
# # => [1, 2, {"a"=>3.141}, false, true, nil, 4..10]
#
# JSON.generate always creates the shortest possible string representation of a
# ruby data structure in one line. This good for data storage or network
# protocols, but not so good for humans to read. Fortunately there's also
# JSON.pretty_generate (or JSON.pretty_generate) that creates a more
# readable output:
#
# puts JSON.pretty_generate([1, 2, {"a"=>3.141}, false, true, nil, 4..10])
# [
# 1,
# 2,
# {
# "a": 3.141
# },
# false,
# true,
# null,
# {
# "json_class": "Range",
# "data": [
# 4,
# 10,
# false
# ]
# }
# ]
#
# There are also the methods Kernel#j for unparse, and Kernel#jj for
# pretty_unparse output to the console, that work analogous to Core Ruby's p
# and the pp library's pp methods.
#
# The script tools/server.rb contains a small example if you want to test, how
# receiving a JSON object from a webrick server in your browser with the
# javasript prototype library (http://www.prototypejs.org) works.
#
module JSON
require 'json/version'
if VARIANT_BINARY
require 'json/ext'
else
begin
require 'json/ext'
rescue LoadError
require 'json/pure'
end
end
end

21
lib/json/Array.xpm Normal file
View file

@ -0,0 +1,21 @@
/* XPM */
static char * Array_xpm[] = {
"16 16 2 1",
" c None",
". c #000000",
" ",
" ",
" ",
" .......... ",
" . . ",
" . . ",
" . . ",
" . . ",
" . . ",
" . . ",
" . . ",
" . . ",
" .......... ",
" ",
" ",
" "};

21
lib/json/FalseClass.xpm Normal file
View file

@ -0,0 +1,21 @@
/* XPM */
static char * False_xpm[] = {
"16 16 2 1",
" c None",
". c #FF0000",
" ",
" ",
" ",
" ...... ",
" . ",
" . ",
" . ",
" ...... ",
" . ",
" . ",
" . ",
" . ",
" . ",
" ",
" ",
" "};

21
lib/json/Hash.xpm Normal file
View file

@ -0,0 +1,21 @@
/* XPM */
static char * Hash_xpm[] = {
"16 16 2 1",
" c None",
". c #000000",
" ",
" ",
" ",
" . . ",
" . . ",
" . . ",
" ......... ",
" . . ",
" . . ",
" ......... ",
" . . ",
" . . ",
" . . ",
" ",
" ",
" "};

73
lib/json/Key.xpm Normal file
View file

@ -0,0 +1,73 @@
/* XPM */
static char * Key_xpm[] = {
"16 16 54 1",
" c None",
". c #110007",
"+ c #0E0900",
"@ c #000013",
"# c #070600",
"$ c #F6F006",
"% c #ECE711",
"& c #E5EE00",
"* c #16021E",
"= c #120900",
"- c #EDF12B",
"; c #000033",
"> c #0F0000",
", c #FFFE03",
"' c #E6E500",
") c #16021B",
"! c #F7F502",
"~ c #000E00",
"{ c #130000",
"] c #FFF000",
"^ c #FFE711",
"/ c #140005",
"( c #190025",
"_ c #E9DD27",
": c #E7DC04",
"< c #FFEC09",
"[ c #FFE707",
"} c #FFDE10",
"| c #150021",
"1 c #160700",
"2 c #FAF60E",
"3 c #EFE301",
"4 c #FEF300",
"5 c #E7E000",
"6 c #FFFF08",
"7 c #0E0206",
"8 c #040000",
"9 c #03052E",
"0 c #041212",
"a c #070300",
"b c #F2E713",
"c c #F9DE13",
"d c #36091E",
"e c #00001C",
"f c #1F0010",
"g c #FFF500",
"h c #DEDE00",
"i c #050A00",
"j c #FAF14A",
"k c #F5F200",
"l c #040404",
"m c #1A0D00",
"n c #EDE43D",
"o c #ECE007",
" ",
" ",
" .+@ ",
" #$%&* ",
" =-;>,') ",
" >!~{]^/ ",
" (_:<[}| ",
" 1234567 ",
" 890abcd ",
" efghi ",
" >jkl ",
" mnol ",
" >kl ",
" ll ",
" ",
" "};

21
lib/json/NilClass.xpm Normal file
View file

@ -0,0 +1,21 @@
/* XPM */
static char * False_xpm[] = {
"16 16 2 1",
" c None",
". c #000000",
" ",
" ",
" ",
" ... ",
" . . ",
" . . ",
" . . ",
" . . ",
" . . ",
" . . ",
" . . ",
" . . ",
" ... ",
" ",
" ",
" "};

28
lib/json/Numeric.xpm Normal file
View file

@ -0,0 +1,28 @@
/* XPM */
static char * Numeric_xpm[] = {
"16 16 9 1",
" c None",
". c #FF0000",
"+ c #0000FF",
"@ c #0023DB",
"# c #00EA14",
"$ c #00FF00",
"% c #004FAF",
"& c #0028D6",
"* c #00F20C",
" ",
" ",
" ",
" ... +++@#$$$$ ",
" .+ %& $$ ",
" . + $ ",
" . + $$ ",
" . ++$$$$ ",
" . + $$ ",
" . + $ ",
" . + $ ",
" . + $ $$ ",
" .....++++*$$ ",
" ",
" ",
" "};

96
lib/json/String.xpm Normal file
View file

@ -0,0 +1,96 @@
/* XPM */
static char * String_xpm[] = {
"16 16 77 1",
" c None",
". c #000000",
"+ c #040404",
"@ c #080806",
"# c #090606",
"$ c #EEEAE1",
"% c #E7E3DA",
"& c #E0DBD1",
"* c #D4B46F",
"= c #0C0906",
"- c #E3C072",
"; c #E4C072",
"> c #060505",
", c #0B0A08",
"' c #D5B264",
") c #D3AF5A",
"! c #080602",
"~ c #E1B863",
"{ c #DDB151",
"] c #DBAE4A",
"^ c #DDB152",
"/ c #DDB252",
"( c #070705",
"_ c #0C0A07",
": c #D3A33B",
"< c #020201",
"[ c #DAAA41",
"} c #040302",
"| c #E4D9BF",
"1 c #0B0907",
"2 c #030201",
"3 c #020200",
"4 c #C99115",
"5 c #080704",
"6 c #DBC8A2",
"7 c #E7D7B4",
"8 c #E0CD9E",
"9 c #080601",
"0 c #040400",
"a c #010100",
"b c #0B0B08",
"c c #DCBF83",
"d c #DCBC75",
"e c #DEB559",
"f c #040301",
"g c #BC8815",
"h c #120E07",
"i c #060402",
"j c #0A0804",
"k c #D4A747",
"l c #D6A12F",
"m c #0E0C05",
"n c #C8C1B0",
"o c #1D1B15",
"p c #D7AD51",
"q c #070502",
"r c #080804",
"s c #BC953B",
"t c #C4BDAD",
"u c #0B0807",
"v c #DBAC47",
"w c #1B150A",
"x c #B78A2C",
"y c #D8A83C",
"z c #D4A338",
"A c #0F0B03",
"B c #181105",
"C c #C59325",
"D c #C18E1F",
"E c #060600",
"F c #CC992D",
"G c #B98B25",
"H c #B3831F",
"I c #C08C1C",
"J c #060500",
"K c #0E0C03",
"L c #0D0A00",
" ",
" .+@# ",
" .$%&*= ",
" .-;>,')! ",
" .~. .{]. ",
" .^/. (_:< ",
" .[.}|$12 ",
" 345678}90 ",
" a2bcdefgh ",
" ijkl.mno ",
" <pq. rstu ",
" .]v. wx= ",
" .yzABCDE ",
" .FGHIJ ",
" 0KL0 ",
" "};

21
lib/json/TrueClass.xpm Normal file
View file

@ -0,0 +1,21 @@
/* XPM */
static char * TrueClass_xpm[] = {
"16 16 2 1",
" c None",
". c #0BF311",
" ",
" ",
" ",
" ......... ",
" . ",
" . ",
" . ",
" . ",
" . ",
" . ",
" . ",
" . ",
" . ",
" ",
" ",
" "};

194
lib/json/common.rb Normal file
View file

@ -0,0 +1,194 @@
require 'json/version'
module JSON
class << self
# If object is string like parse the string and return the parsed result as a
# Ruby data structure. Otherwise generate a JSON text from the Ruby data
# structure object and return it.
def [](object)
if object.respond_to? :to_str
JSON.parse(object.to_str)
else
JSON.generate(object)
end
end
# Returns the JSON parser class, that is used by JSON. This might be either
# JSON::Ext::Parser or JSON::Pure::Parser.
attr_reader :parser
# Set the JSON parser class _parser_ to be used by JSON.
def parser=(parser) # :nodoc:
@parser = parser
remove_const :Parser if const_defined? :Parser
const_set :Parser, parser
end
# Return the constant located at _path_. The format of _path_ has to be
# either ::A::B::C or A::B::C. In any case A has to be located at the top
# level (absolute namespace path?). If there doesn't exist a constant at
# the given path, an ArgumentError is raised.
def deep_const_get(path) # :nodoc:
path = path.to_s
path.split(/::/).inject(Object) do |p, c|
case
when c.empty? then p
when p.const_defined?(c) then p.const_get(c)
else raise ArgumentError, "can't find const #{path}"
end
end
end
# Set the module _generator_ to be used by JSON.
def generator=(generator) # :nodoc:
@generator = generator
generator_methods = generator::GeneratorMethods
for const in generator_methods.constants
klass = deep_const_get(const)
modul = generator_methods.const_get(const)
klass.class_eval do
instance_methods(false).each do |m|
m.to_s == 'to_json' and remove_method m
end
include modul
end
end
self.state = generator::State
const_set :State, self.state
end
# Returns the JSON generator modul, that is used by JSON. This might be
# either JSON::Ext::Generator or JSON::Pure::Generator.
attr_reader :generator
# Returns the JSON generator state class, that is used by JSON. This might
# be either JSON::Ext::Generator::State or JSON::Pure::Generator::State.
attr_accessor :state
# This is create identifier, that is used to decide, if the _json_create_
# hook of a class should be called. It defaults to 'json_class'.
attr_accessor :create_id
end
self.create_id = 'json_class'
# The base exception for JSON errors.
class JSONError < StandardError; end
# This exception is raised, if a parser error occurs.
class ParserError < JSONError; end
# This exception is raised, if the nesting of parsed datastructures is too
# deep.
class NestingError < ParserError; end
# This exception is raised, if a generator or unparser error occurs.
class GeneratorError < JSONError; end
# For backwards compatibility
UnparserError = GeneratorError
# If a circular data structure is encountered while unparsing
# this exception is raised.
class CircularDatastructure < GeneratorError; end
# This exception is raised, if the required unicode support is missing on the
# system. Usually this means, that the iconv library is not installed.
class MissingUnicodeSupport < JSONError; end
module_function
# Parse the JSON string _source_ into a Ruby data structure and return it.
#
# _opts_ can have the following
# keys:
# * *max_nesting*: The maximum depth of nesting allowed in the parsed data
# structures. Disable depth checking with :max_nesting => false.
def parse(source, opts = {})
JSON.parser.new(source, opts).parse
end
# Unparse the Ruby data structure _obj_ into a single line JSON string and
# return it. _state_ is a JSON::State object, that can be used to configure
# the output further.
#
# It defaults to a state object, that creates the shortest possible JSON text
# in one line and only checks for circular data structures. If you are sure,
# that the objects don't contain any circles, you can set _state_ to nil, to
# disable these checks in order to create the JSON text faster. See also
# fast_generate.
def generate(obj, state = JSON.state.new)
obj.to_json(state)
end
alias unparse generate
module_function :unparse
# Unparse the Ruby data structure _obj_ into a single line JSON string and
# return it. This method disables the checks for circles in Ruby objects.
#
# *WARNING*: Be careful not to pass any Ruby data structures with circles as
# _obj_ argument, because this will cause JSON to go into an infinite loop.
def fast_generate(obj)
obj.to_json(nil)
end
alias fast_unparse fast_generate
module_function :fast_unparse
# Unparse the Ruby data structure _obj_ into a JSON string and return it. The
# returned string is a prettier form of the string returned by #unparse.
def pretty_generate(obj)
state = JSON.state.new(
:indent => ' ',
:space => ' ',
:object_nl => "\n",
:array_nl => "\n",
:check_circular => true
)
obj.to_json(state)
end
alias pretty_unparse pretty_generate
module_function :pretty_unparse
end
module ::Kernel
# Outputs _objs_ to STDOUT as JSON strings in the shortest form, that is in
# one line.
def j(*objs)
objs.each do |obj|
puts JSON::generate(obj)
end
nil
end
# Ouputs _objs_ to STDOUT as JSON strings in a pretty format, with
# indentation and over many lines.
def jj(*objs)
objs.each do |obj|
puts JSON::pretty_generate(obj)
end
nil
end
# If object is string like parse the string and return the parsed result as a
# Ruby data structure. Otherwise generate a JSON text from the Ruby data
# structure object and return it.
def JSON(object)
if object.respond_to? :to_str
JSON.parse(object.to_str)
else
JSON.generate(object)
end
end
end
class ::Class
# Returns true, if this class can be used to create an instance
# from a serialised JSON string. The class has to implement a class
# method _json_create_ that expects a hash as first parameter, which includes
# the required data.
def json_creatable?
respond_to?(:json_create)
end
end
# vim: set et sw=2 ts=2:

1295
lib/json/editor.rb Normal file

File diff suppressed because it is too large Load diff

13
lib/json/ext.rb Normal file
View file

@ -0,0 +1,13 @@
require 'json/common'
module JSON
# This module holds all the modules/classes that implement JSON's
# functionality as C extensions.
module Ext
require 'json/ext/parser'
require 'json/ext/generator'
$DEBUG and warn "Using c extension for JSON."
JSON.parser = Parser
JSON.generator = Generator
end
end

1499
lib/json/json.xpm Normal file

File diff suppressed because it is too large Load diff

75
lib/json/pure.rb Normal file
View file

@ -0,0 +1,75 @@
require 'json/common'
require 'json/pure/parser'
require 'json/pure/generator'
module JSON
begin
require 'iconv'
# An iconv instance to convert from UTF8 to UTF16 Big Endian.
UTF16toUTF8 = Iconv.new('utf-8', 'utf-16be') # :nodoc:
# An iconv instance to convert from UTF16 Big Endian to UTF8.
UTF8toUTF16 = Iconv.new('utf-16be', 'utf-8') # :nodoc:
UTF8toUTF16.iconv('no bom')
rescue Errno::EINVAL, Iconv::InvalidEncoding
# Iconv doesn't support big endian utf-16. Let's try to hack this manually
# into the converters.
begin
old_verbose, $VERBSOSE = $VERBOSE, nil
# An iconv instance to convert from UTF8 to UTF16 Big Endian.
UTF16toUTF8 = Iconv.new('utf-8', 'utf-16') # :nodoc:
# An iconv instance to convert from UTF16 Big Endian to UTF8.
UTF8toUTF16 = Iconv.new('utf-16', 'utf-8') # :nodoc:
UTF8toUTF16.iconv('no bom')
if UTF8toUTF16.iconv("\xe2\x82\xac") == "\xac\x20"
swapper = Class.new do
def initialize(iconv) # :nodoc:
@iconv = iconv
end
def iconv(string) # :nodoc:
result = @iconv.iconv(string)
JSON.swap!(result)
end
end
UTF8toUTF16 = swapper.new(UTF8toUTF16) # :nodoc:
end
if UTF16toUTF8.iconv("\xac\x20") == "\xe2\x82\xac"
swapper = Class.new do
def initialize(iconv) # :nodoc:
@iconv = iconv
end
def iconv(string) # :nodoc:
string = JSON.swap!(string.dup)
@iconv.iconv(string)
end
end
UTF16toUTF8 = swapper.new(UTF16toUTF8) # :nodoc:
end
rescue Errno::EINVAL, Iconv::InvalidEncoding
raise MissingUnicodeSupport, "iconv doesn't seem to support UTF-8/UTF-16 conversions"
ensure
$VERBOSE = old_verbose
end
rescue LoadError
raise MissingUnicodeSupport,
"iconv couldn't be loaded, which is required for UTF-8/UTF-16 conversions"
end
# Swap consecutive bytes of _string_ in place.
def self.swap!(string) # :nodoc:
0.upto(string.size / 2) do |i|
break unless string[2 * i + 1]
string[2 * i], string[2 * i + 1] = string[2 * i + 1], string[2 * i]
end
string
end
# This module holds all the modules/classes that implement JSON's
# functionality in pure ruby.
module Pure
$DEBUG and warn "Using pure library for JSON."
JSON.parser = Parser
JSON.generator = Generator
end
end

321
lib/json/pure/generator.rb Normal file
View file

@ -0,0 +1,321 @@
module JSON
MAP = {
"\x0" => '\u0000',
"\x1" => '\u0001',
"\x2" => '\u0002',
"\x3" => '\u0003',
"\x4" => '\u0004',
"\x5" => '\u0005',
"\x6" => '\u0006',
"\x7" => '\u0007',
"\b" => '\b',
"\t" => '\t',
"\n" => '\n',
"\xb" => '\u000b',
"\f" => '\f',
"\r" => '\r',
"\xe" => '\u000e',
"\xf" => '\u000f',
"\x10" => '\u0010',
"\x11" => '\u0011',
"\x12" => '\u0012',
"\x13" => '\u0013',
"\x14" => '\u0014',
"\x15" => '\u0015',
"\x16" => '\u0016',
"\x17" => '\u0017',
"\x18" => '\u0018',
"\x19" => '\u0019',
"\x1a" => '\u001a',
"\x1b" => '\u001b',
"\x1c" => '\u001c',
"\x1d" => '\u001d',
"\x1e" => '\u001e',
"\x1f" => '\u001f',
'"' => '\"',
'\\' => '\\\\',
'/' => '\/',
} # :nodoc:
# Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with
# UTF16 big endian characters as \u????, and return it.
def utf8_to_json(string) # :nodoc:
string = string.gsub(/["\\\/\x0-\x1f]/) { |c| MAP[c] }
string.gsub!(/(
(?:
[\xc2-\xdf][\x80-\xbf] |
[\xe0-\xef][\x80-\xbf]{2} |
[\xf0-\xf4][\x80-\xbf]{3}
)+ |
[\x80-\xc1\xf5-\xff] # invalid
)/nx) { |c|
c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'"
s = JSON::UTF8toUTF16.iconv(c).unpack('H*')[0]
s.gsub!(/.{4}/n, '\\\\u\&')
}
string
rescue Iconv::Failure => e
raise GeneratorError, "Caught #{e.class}: #{e}"
end
module_function :utf8_to_json
module Pure
module Generator
# This class is used to create State instances, that are use to hold data
# while generating a JSON text from a a Ruby data structure.
class State
# Creates a State object from _opts_, which ought to be Hash to create
# a new State instance configured by _opts_, something else to create
# an unconfigured instance. If _opts_ is a State object, it is just
# returned.
def self.from_state(opts)
case opts
when self
opts
when Hash
new(opts)
else
new
end
end
# Instantiates a new State object, configured by _opts_.
#
# _opts_ can have the following keys:
#
# * *indent*: a string used to indent levels (default: ''),
# * *space*: a string that is put after, a : or , delimiter (default: ''),
# * *space_before*: a string that is put before a : pair delimiter (default: ''),
# * *object_nl*: a string that is put at the end of a JSON object (default: ''),
# * *array_nl*: a string that is put at the end of a JSON array (default: ''),
# * *check_circular*: true if checking for circular data structures
# should be done, false (the default) otherwise.
def initialize(opts = {})
@indent = opts[:indent] || ''
@space = opts[:space] || ''
@space_before = opts[:space_before] || ''
@object_nl = opts[:object_nl] || ''
@array_nl = opts[:array_nl] || ''
@check_circular = !!(opts[:check_circular] || false)
@seen = {}
end
# This string is used to indent levels in the JSON text.
attr_accessor :indent
# This string is used to insert a space between the tokens in a JSON
# string.
attr_accessor :space
# This string is used to insert a space before the ':' in JSON objects.
attr_accessor :space_before
# This string is put at the end of a line that holds a JSON object (or
# Hash).
attr_accessor :object_nl
# This string is put at the end of a line that holds a JSON array.
attr_accessor :array_nl
# Returns true, if circular data structures should be checked,
# otherwise returns false.
def check_circular?
@check_circular
end
# Returns _true_, if _object_ was already seen during this generating
# run.
def seen?(object)
@seen.key?(object.__id__)
end
# Remember _object_, to find out if it was already encountered (if a
# cyclic data structure is if a cyclic data structure is rendered).
def remember(object)
@seen[object.__id__] = true
end
# Forget _object_ for this generating run.
def forget(object)
@seen.delete object.__id__
end
end
module GeneratorMethods
module Object
# Converts this object to a string (calling #to_s), converts
# it to a JSON string, and returns the result. This is a fallback, if no
# special method #to_json was defined for some object.
def to_json(*) to_s.to_json end
end
module Hash
# Returns a JSON string containing a JSON object, that is unparsed from
# this Hash instance.
# _state_ is a JSON::State object, that can also be used to configure the
# produced JSON string output further.
# _depth_ is used to find out nesting depth, to indent accordingly.
def to_json(state = nil, depth = 0, *)
if state
state = JSON.state.from_state(state)
json_check_circular(state) { json_transform(state, depth) }
else
json_transform(state, depth)
end
end
private
def json_check_circular(state)
if state
state.seen?(self) and raise JSON::CircularDatastructure,
"circular data structures not supported!"
state.remember self
end
yield
ensure
state and state.forget self
end
def json_shift(state, depth)
state and not state.object_nl.empty? or return ''
state.indent * depth
end
def json_transform(state, depth)
delim = ','
delim << state.object_nl if state
result = '{'
result << state.object_nl if state
result << map { |key,value|
s = json_shift(state, depth + 1)
s << key.to_s.to_json(state, depth + 1)
s << state.space_before if state
s << ':'
s << state.space if state
s << value.to_json(state, depth + 1)
}.join(delim)
result << state.object_nl if state
result << json_shift(state, depth)
result << '}'
result
end
end
module Array
# Returns a JSON string containing a JSON array, that is unparsed from
# this Array instance.
# _state_ is a JSON::State object, that can also be used to configure the
# produced JSON string output further.
# _depth_ is used to find out nesting depth, to indent accordingly.
def to_json(state = nil, depth = 0, *)
if state
state = JSON.state.from_state(state)
json_check_circular(state) { json_transform(state, depth) }
else
json_transform(state, depth)
end
end
private
def json_check_circular(state)
if state
state.seen?(self) and raise JSON::CircularDatastructure,
"circular data structures not supported!"
state.remember self
end
yield
ensure
state and state.forget self
end
def json_shift(state, depth)
state and not state.array_nl.empty? or return ''
state.indent * depth
end
def json_transform(state, depth)
delim = ','
delim << state.array_nl if state
result = '['
result << state.array_nl if state
result << map { |value|
json_shift(state, depth + 1) << value.to_json(state, depth + 1)
}.join(delim)
result << state.array_nl if state
result << json_shift(state, depth)
result << ']'
result
end
end
module Integer
# Returns a JSON string representation for this Integer number.
def to_json(*) to_s end
end
module Float
# Returns a JSON string representation for this Float number.
def to_json(*) to_s end
end
module String
# This string should be encoded with UTF-8 A call to this method
# returns a JSON string encoded with UTF16 big endian characters as
# \u????.
def to_json(*)
'"' << JSON.utf8_to_json(self) << '"'
end
# Module that holds the extinding methods if, the String module is
# included.
module Extend
# Raw Strings are JSON Objects (the raw bytes are stored in an array for the
# key "raw"). The Ruby String can be created by this module method.
def json_create(o)
o['raw'].pack('C*')
end
end
# Extends _modul_ with the String::Extend module.
def self.included(modul)
modul.extend Extend
end
# This method creates a raw object hash, that can be nested into
# other data structures and will be unparsed as a raw string. This
# method should be used, if you want to convert raw strings to JSON
# instead of UTF-8 strings, e. g. binary data.
def to_json_raw_object
{
JSON.create_id => self.class.name,
'raw' => self.unpack('C*'),
}
end
# This method creates a JSON text from the result of
# a call to to_json_raw_object of this String.
def to_json_raw(*args)
to_json_raw_object.to_json(*args)
end
end
module TrueClass
# Returns a JSON string for true: 'true'.
def to_json(*) 'true' end
end
module FalseClass
# Returns a JSON string for false: 'false'.
def to_json(*) 'false' end
end
module NilClass
# Returns a JSON string for nil: 'null'.
def to_json(*) 'null' end
end
end
end
end
end

241
lib/json/pure/parser.rb Normal file
View file

@ -0,0 +1,241 @@
require 'strscan'
module JSON
module Pure
# This class implements the JSON parser that is used to parse a JSON string
# into a Ruby data structure.
class Parser < StringScanner
STRING = /" ((?:[^\x0-\x1f"\\] |
\\["\\\/bfnrt] |
\\u[0-9a-fA-F]{4} |
\\[\x20-\xff])*)
"/nx
INTEGER = /(-?0|-?[1-9]\d*)/
FLOAT = /(-?
(?:0|[1-9]\d*)
(?:
\.\d+(?i:e[+-]?\d+) |
\.\d+ |
(?i:e[+-]?\d+)
)
)/x
OBJECT_OPEN = /\{/
OBJECT_CLOSE = /\}/
ARRAY_OPEN = /\[/
ARRAY_CLOSE = /\]/
PAIR_DELIMITER = /:/
COLLECTION_DELIMITER = /,/
TRUE = /true/
FALSE = /false/
NULL = /null/
IGNORE = %r(
(?:
//[^\n\r]*[\n\r]| # line comments
/\* # c-style comments
(?:
[^*/]| # normal chars
/[^*]| # slashes that do not start a nested comment
\*[^/]| # asterisks that do not end this comment
/(?=\*/) # single slash before this comment's end
)*
\*/ # the End of this comment
|[ \t\r\n]+ # whitespaces: space, horicontal tab, lf, cr
)+
)mx
UNPARSED = Object.new
# Creates a new JSON::Pure::Parser instance for the string _source_.
#
# It will be configured by the _opts_ hash. _opts_ can have the following
# keys:
# * *max_nesting*: The maximum depth of nesting allowed in the parsed data
# structures. Disable depth checking with :max_nesting => false.
def initialize(source, opts = {})
super
if !opts.key?(:max_nesting) # defaults to 19
@max_nesting = 19
elsif opts[:max_nesting]
@max_nesting = opts[:max_nesting]
else
@max_nesting = 0
end
@create_id = JSON.create_id
end
alias source string
# Parses the current JSON string _source_ and returns the complete data
# structure as a result.
def parse
reset
obj = nil
until eos?
case
when scan(OBJECT_OPEN)
obj and raise ParserError, "source '#{peek(20)}' not in JSON!"
@current_nesting = 1
obj = parse_object
when scan(ARRAY_OPEN)
obj and raise ParserError, "source '#{peek(20)}' not in JSON!"
@current_nesting = 1
obj = parse_array
when skip(IGNORE)
;
else
raise ParserError, "source '#{peek(20)}' not in JSON!"
end
end
obj or raise ParserError, "source did not contain any JSON!"
obj
end
private
# Unescape characters in strings.
UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
UNESCAPE_MAP.update({
?" => '"',
?\\ => '\\',
?/ => '/',
?b => "\b",
?f => "\f",
?n => "\n",
?r => "\r",
?t => "\t",
?u => nil,
})
def parse_string
if scan(STRING)
return '' if self[1].empty?
self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c|
if u = UNESCAPE_MAP[c[1]]
u
else # \uXXXX
bytes = ''
i = 0
while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
i += 1
end
JSON::UTF16toUTF8.iconv(bytes)
end
end
else
UNPARSED
end
rescue Iconv::Failure => e
raise GeneratorError, "Caught #{e.class}: #{e}"
end
def parse_value
case
when scan(FLOAT)
Float(self[1])
when scan(INTEGER)
Integer(self[1])
when scan(TRUE)
true
when scan(FALSE)
false
when scan(NULL)
nil
when (string = parse_string) != UNPARSED
string
when scan(ARRAY_OPEN)
@current_nesting += 1
ary = parse_array
@current_nesting -= 1
ary
when scan(OBJECT_OPEN)
@current_nesting += 1
obj = parse_object
@current_nesting -= 1
obj
else
UNPARSED
end
end
def parse_array
raise NestingError, "nesting of #@current_nesting is to deep" if
@max_nesting.nonzero? && @current_nesting > @max_nesting
result = []
delim = false
until eos?
case
when (value = parse_value) != UNPARSED
delim = false
result << value
skip(IGNORE)
if scan(COLLECTION_DELIMITER)
delim = true
elsif match?(ARRAY_CLOSE)
;
else
raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!"
end
when scan(ARRAY_CLOSE)
if delim
raise ParserError, "expected next element in array at '#{peek(20)}'!"
end
break
when skip(IGNORE)
;
else
raise ParserError, "unexpected token in array at '#{peek(20)}'!"
end
end
result
end
def parse_object
raise NestingError, "nesting of #@current_nesting is to deep" if
@max_nesting.nonzero? && @current_nesting > @max_nesting
result = {}
delim = false
until eos?
case
when (string = parse_string) != UNPARSED
skip(IGNORE)
unless scan(PAIR_DELIMITER)
raise ParserError, "expected ':' in object at '#{peek(20)}'!"
end
skip(IGNORE)
unless (value = parse_value).equal? UNPARSED
result[string] = value
delim = false
skip(IGNORE)
if scan(COLLECTION_DELIMITER)
delim = true
elsif match?(OBJECT_CLOSE)
;
else
raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!"
end
else
raise ParserError, "expected value in object at '#{peek(20)}'!"
end
when scan(OBJECT_CLOSE)
if delim
raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!"
end
if klassname = result[@create_id]
klass = JSON.deep_const_get klassname
break unless klass and klass.json_creatable?
result = klass.json_create(result)
result
end
break
when skip(IGNORE)
;
else
raise ParserError, "unexpected token in object at '#{peek(20)}'!"
end
end
result
end
end
end
end

9
lib/json/version.rb Executable file
View file

@ -0,0 +1,9 @@
module JSON
# JSON version
VERSION = '1.1.0'
VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
VARIANT_BINARY = false
end

View file

@ -0,0 +1 @@
"A JSON payload should be an object or array, not a string."

View file

@ -0,0 +1 @@
{"Extra value after close": true} "misplaced quoted value"

View file

@ -0,0 +1 @@
{"Illegal expression": 1 + 2}

View file

@ -0,0 +1 @@
{"Illegal invocation": alert()}

View file

@ -0,0 +1 @@
{"Numbers cannot have leading zeroes": 013}

View file

@ -0,0 +1 @@
{"Numbers cannot be hex": 0x14}

View file

@ -0,0 +1 @@
[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]] // No, we don't limit our depth: Moved to pass...

View file

@ -0,0 +1 @@
{"Missing colon" null}

View file

@ -0,0 +1 @@
["Unclosed array"

View file

@ -0,0 +1 @@
{"Double colon":: null}

View file

@ -0,0 +1 @@
{"Comma instead of colon", null}

View file

@ -0,0 +1 @@
["Colon instead of comma": false]

View file

@ -0,0 +1 @@
["Bad value", truth]

View file

@ -0,0 +1 @@
['single quote']

View file

@ -0,0 +1 @@
["tab character in string "]

View file

@ -0,0 +1,2 @@
["line
break"]

View file

@ -0,0 +1,2 @@
["line\
break"]

View file

@ -0,0 +1 @@
{unquoted_key: "keys must be quoted"}

View file

@ -0,0 +1 @@
["extra comma",]

View file

@ -0,0 +1 @@
["double extra comma",,]

View file

@ -0,0 +1 @@
[ , "<-- missing value"]

View file

@ -0,0 +1 @@
["Comma after the close"],

View file

@ -0,0 +1 @@
["Extra close"]]

View file

@ -0,0 +1 @@
{"Extra comma": true,}

View file

@ -0,0 +1,56 @@
[
"JSON Test Pattern pass1",
{"object with 1 member":["array with 1 element"]},
{},
[],
-42,
true,
false,
null,
{
"integer": 1234567890,
"real": -9876.543210,
"e": 0.123456789e-12,
"E": 1.234567890E+34,
"": 23456789012E666,
"zero": 0,
"one": 1,
"space": " ",
"quote": "\"",
"backslash": "\\",
"controls": "\b\f\n\r\t",
"slash": "/ & \/",
"alpha": "abcdefghijklmnopqrstuvwyz",
"ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
"digit": "0123456789",
"special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
"hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
"true": true,
"false": false,
"null": null,
"array":[ ],
"object":{ },
"address": "50 St. James Street",
"url": "http://www.JSON.org/",
"comment": "// /* <!-- --",
"# -- --> */": " ",
" s p a c e d " :[1,2 , 3
,
4 , 5 , 6 ,7 ],
"compact": [1,2,3,4,5,6,7],
"jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
"quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
: "A key can be any string"
},
0.5 ,98.6
,
99.44
,
1066
,"rosebud"]

View file

@ -0,0 +1 @@
["Illegal backslash escape: \x15"]

View file

@ -0,0 +1 @@
["Illegal backslash escape: \'"]

View file

@ -0,0 +1 @@
["Illegal backslash escape: \017"]

View file

@ -0,0 +1 @@
[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]

View file

@ -0,0 +1 @@
["tab\ character\ in\ string\ "]

View file

@ -0,0 +1,6 @@
{
"JSON Test Pattern pass3": {
"The outermost value": "must be an object or array.",
"In this test": "It is an object."
}
}

24
test/json/runner.rb Normal file
View file

@ -0,0 +1,24 @@
#!/usr/bin/env ruby
require 'test/unit/ui/console/testrunner'
require 'test/unit/testsuite'
$:.unshift File.expand_path(File.dirname($0))
$:.unshift 'tests'
require 'test_json'
require 'test_json_generate'
require 'test_json_unicode'
require 'test_json_addition'
require 'test_json_fixtures'
class TS_AllTests
def self.suite
suite = Test::Unit::TestSuite.new name
suite << TC_JSONGenerate.suite
suite << TC_JSON.suite
suite << TC_JSONUnicode.suite
suite << TC_JSONAddition.suite
suite << TC_JSONFixtures.suite
end
end
Test::Unit::UI::Console::TestRunner.run(TS_AllTests)
# vim: set et sw=2 ts=2:

255
test/json/test_json.rb Executable file
View file

@ -0,0 +1,255 @@
#!/usr/bin/env ruby
require 'test/unit'
require 'json'
class TC_JSON < Test::Unit::TestCase
include JSON
def setup
$KCODE = 'UTF8'
@ary = [1, "foo", 3.14, 4711.0, 2.718, nil, [1,-2,3], false, true].map do
|x| [x]
end
@ary_to_parse = ["1", '"foo"', "3.14", "4711.0", "2.718", "null",
"[1,-2,3]", "false", "true"].map do
|x| "[#{x}]"
end
@hash = {
'a' => 2,
'b' => 3.141,
'c' => 'c',
'd' => [ 1, "b", 3.14 ],
'e' => { 'foo' => 'bar' },
'g' => "\"\0\037",
'h' => 1000.0,
'i' => 0.001
}
@json = '{"a":2,"b":3.141,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},' +
'"g":"\\"\\u0000\\u001f","h":1.0E3,"i":1.0E-3}'
end
suite << TC_JSON.suite
def test_construction
parser = JSON::Parser.new('test')
assert_equal 'test', parser.source
end
def assert_equal_float(expected, is)
assert_in_delta(expected.first, is.first, 1e-2)
end
def test_parse_simple_arrays
assert_equal([], parse('[]'))
assert_equal([], parse(' [ ] '))
assert_equal([nil], parse('[null]'))
assert_equal([false], parse('[false]'))
assert_equal([true], parse('[true]'))
assert_equal([-23], parse('[-23]'))
assert_equal([23], parse('[23]'))
assert_equal([0.23], parse('[0.23]'))
assert_equal([0.0], parse('[0e0]'))
assert_raises(JSON::ParserError) { parse('[+23.2]') }
assert_raises(JSON::ParserError) { parse('[+23]') }
assert_raises(JSON::ParserError) { parse('[.23]') }
assert_raises(JSON::ParserError) { parse('[023]') }
assert_equal_float [3.141], parse('[3.141]')
assert_equal_float [-3.141], parse('[-3.141]')
assert_equal_float [3.141], parse('[3141e-3]')
assert_equal_float [3.141], parse('[3141.1e-3]')
assert_equal_float [3.141], parse('[3141E-3]')
assert_equal_float [3.141], parse('[3141.0E-3]')
assert_equal_float [-3.141], parse('[-3141.0e-3]')
assert_equal_float [-3.141], parse('[-3141e-3]')
assert_equal([""], parse('[""]'))
assert_equal(["foobar"], parse('["foobar"]'))
assert_equal([{}], parse('[{}]'))
end
def test_parse_simple_objects
assert_equal({}, parse('{}'))
assert_equal({}, parse(' { } '))
assert_equal({ "a" => nil }, parse('{ "a" : null}'))
assert_equal({ "a" => nil }, parse('{"a":null}'))
assert_equal({ "a" => false }, parse('{ "a" : false } '))
assert_equal({ "a" => false }, parse('{"a":false}'))
assert_raises(JSON::ParserError) { parse('{false}') }
assert_equal({ "a" => true }, parse('{"a":true}'))
assert_equal({ "a" => true }, parse(' { "a" : true } '))
assert_equal({ "a" => -23 }, parse(' { "a" : -23 } '))
assert_equal({ "a" => -23 }, parse(' { "a" : -23 } '))
assert_equal({ "a" => 23 }, parse('{"a":23 } '))
assert_equal({ "a" => 23 }, parse(' { "a" : 23 } '))
assert_equal({ "a" => 0.23 }, parse(' { "a" : 0.23 } '))
assert_equal({ "a" => 0.23 }, parse(' { "a" : 0.23 } '))
end
begin
require 'permutation'
def test_parse_more_complex_arrays
a = [ nil, false, true, "foßbar", [ "n€st€d", true ], { "nested" => true, "n€ßt€ð2" => {} }]
perms = Permutation.for a
perms.each do |perm|
orig_ary = perm.project
json = pretty_generate(orig_ary)
assert_equal orig_ary, parse(json)
end
end
def test_parse_complex_objects
a = [ nil, false, true, "foßbar", [ "n€st€d", true ], { "nested" => true, "n€ßt€ð2" => {} }]
perms = Permutation.for a
perms.each do |perm|
s = "a"
orig_obj = perm.project.inject({}) { |h, x| h[s.dup] = x; s = s.succ; h }
json = pretty_generate(orig_obj)
assert_equal orig_obj, parse(json)
end
end
rescue LoadError
warn "Skipping permutation tests."
end
def test_parse_arrays
assert_equal([1,2,3], parse('[1,2,3]'))
assert_equal([1.2,2,3], parse('[1.2,2,3]'))
assert_equal([[],[[],[]]], parse('[[],[[],[]]]'))
end
def test_parse_values
assert_equal([""], parse('[""]'))
assert_equal(["\\"], parse('["\\\\"]'))
assert_equal(['"'], parse('["\""]'))
assert_equal(['\\"\\'], parse('["\\\\\\"\\\\"]'))
assert_equal(["\"\b\n\r\t\0\037"],
parse('["\"\b\n\r\t\u0000\u001f"]'))
for i in 0 ... @ary.size
assert_equal(@ary[i], parse(@ary_to_parse[i]))
end
end
def test_parse_array
assert_equal([], parse('[]'))
assert_equal([], parse(' [ ] '))
assert_equal([1], parse('[1]'))
assert_equal([1], parse(' [ 1 ] '))
assert_equal(@ary,
parse('[[1],["foo"],[3.14],[47.11e+2],[2718.0E-3],[null],[[1,-2,3]]'\
',[false],[true]]'))
assert_equal(@ary, parse(%Q{ [ [1] , ["foo"] , [3.14] \t , [47.11e+2]
, [2718.0E-3 ],\r[ null] , [[1, -2, 3 ]], [false ],[ true]\n ] }))
end
def test_parse_object
assert_equal({}, parse('{}'))
assert_equal({}, parse(' { } '))
assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}'))
assert_equal({'foo'=>'bar'}, parse(' { "foo" : "bar" } '))
end
def test_parser_reset
parser = Parser.new(@json)
assert_equal(@hash, parser.parse)
assert_equal(@hash, parser.parse)
end
def test_comments
json = <<EOT
{
"key1":"value1", // eol comment
"key2":"value2" /* multi line
* comment */,
"key3":"value3" /* multi line
// nested eol comment
* comment */
}
EOT
assert_equal(
{ "key1" => "value1", "key2" => "value2", "key3" => "value3" },
parse(json))
json = <<EOT
{
"key1":"value1" /* multi line
// nested eol comment
/* illegal nested multi line comment */
* comment */
}
EOT
assert_raises(ParserError) { parse(json) }
json = <<EOT
{
"key1":"value1" /* multi line
// nested eol comment
closed multi comment */
and again, throw an Error */
}
EOT
assert_raises(ParserError) { parse(json) }
json = <<EOT
{
"key1":"value1" /*/*/
}
EOT
assert_equal({ "key1" => "value1" }, parse(json))
end
def test_backslash
data = [ '\\.(?i:gif|jpe?g|png)$' ]
json = '["\\\\.(?i:gif|jpe?g|png)$"]'
assert_equal json, JSON.unparse(data)
assert_equal data, JSON.parse(json)
#
data = [ '\\"' ]
json = '["\\\\\""]'
assert_equal json, JSON.unparse(data)
assert_equal data, JSON.parse(json)
#
json = '["\/"]'
data = JSON.parse(json)
assert_equal ['/'], data
assert_equal json, JSON.unparse(data)
#
json = '["\""]'
data = JSON.parse(json)
assert_equal ['"'], data
assert_equal json, JSON.unparse(data)
json = '["\\\'"]'
data = JSON.parse(json)
assert_equal ["'"], data
assert_equal '["\'"]', JSON.unparse(data)
end
def test_wrong_inputs
assert_raises(ParserError) { JSON.parse('"foo"') }
assert_raises(ParserError) { JSON.parse('123') }
assert_raises(ParserError) { JSON.parse('[] bla') }
assert_raises(ParserError) { JSON.parse('[] 1') }
assert_raises(ParserError) { JSON.parse('[] []') }
assert_raises(ParserError) { JSON.parse('[] {}') }
assert_raises(ParserError) { JSON.parse('{} []') }
assert_raises(ParserError) { JSON.parse('{} {}') }
assert_raises(ParserError) { JSON.parse('[NULL]') }
assert_raises(ParserError) { JSON.parse('[FALSE]') }
assert_raises(ParserError) { JSON.parse('[TRUE]') }
assert_raises(ParserError) { JSON.parse('[07] ') }
assert_raises(ParserError) { JSON.parse('[0a]') }
assert_raises(ParserError) { JSON.parse('[1.]') }
assert_raises(ParserError) { JSON.parse(' ') }
end
def test_nesting
to_deep = '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]'
assert_raises(JSON::NestingError) { JSON.parse to_deep }
assert_raises(JSON::NestingError) { JSON.parser.new(to_deep).parse }
assert_raises(JSON::NestingError) { JSON.parse to_deep, :max_nesting => 19 }
ok = JSON.parse to_deep, :max_nesting => 20
assert_kind_of Array, ok
ok = JSON.parse to_deep, :max_nesting => nil
assert_kind_of Array, ok
ok = JSON.parse to_deep, :max_nesting => false
assert_kind_of Array, ok
ok = JSON.parse to_deep, :max_nesting => 0
assert_kind_of Array, ok
end
end
# vim: set et sw=2 ts=2:

94
test/json/test_json_addition.rb Executable file
View file

@ -0,0 +1,94 @@
#!/usr/bin/env ruby
require 'test/unit'
require 'json'
class TC_JSONAddition < Test::Unit::TestCase
include JSON
class A
def initialize(a)
@a = a
end
attr_reader :a
def ==(other)
a == other.a
end
def self.json_create(object)
new(*object['args'])
end
def to_json(*args)
{
'json_class' => self.class,
'args' => [ @a ],
}.to_json(*args)
end
end
class B
def to_json(*args)
{
'json_class' => self.class,
}.to_json(*args)
end
end
class C
def to_json(*args)
{
'json_class' => 'TC_JSONAddition::Nix',
}.to_json(*args)
end
end
def setup
$KCODE = 'UTF8'
end
def test_extended_json
a = A.new(666)
assert A.json_creatable?
json = generate(a)
a_again = JSON.parse(json)
assert_kind_of a.class, a_again
assert_equal a, a_again
end
def test_extended_json_fail
b = B.new
assert !B.json_creatable?
json = generate(b)
assert_equal({ 'json_class' => B.name }, JSON.parse(json))
end
def test_extended_json_fail
c = C.new
assert !C.json_creatable?
json = generate(c)
assert_raises(ArgumentError) { JSON.parse(json) }
end
def test_raw_strings
raw = ''
raw_array = []
for i in 0..255
raw << i
raw_array << i
end
json = raw.to_json_raw
json_raw_object = raw.to_json_raw_object
hash = { 'json_class' => 'String', 'raw'=> raw_array }
assert_equal hash, json_raw_object
json_raw = <<EOT.chomp
{\"json_class\":\"String\",\"raw\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255]}
EOT
# "
assert_equal json_raw, json
raw_again = JSON.parse(json)
assert_equal raw, raw_again
end
end

30
test/json/test_json_fixtures.rb Executable file
View file

@ -0,0 +1,30 @@
#!/usr/bin/env ruby
require 'test/unit'
require 'json'
class TC_JSONFixtures < Test::Unit::TestCase
def setup
$KCODE = 'UTF8'
fixtures = File.join(File.dirname(__FILE__), 'fixtures/*.json')
passed, failed = Dir[fixtures].partition { |f| f['pass'] }
@passed = passed.inject([]) { |a, f| a << [ f, File.read(f) ] }.sort
@failed = failed.inject([]) { |a, f| a << [ f, File.read(f) ] }.sort
end
def test_passing
for name, source in @passed
assert JSON.parse(source),
"Did not pass for fixture '#{name}'"
end
end
def test_failing
for name, source in @failed
assert_raises(JSON::ParserError, JSON::NestingError,
"Did not fail for fixture '#{name}'") do
JSON.parse(source)
end
end
end
end

81
test/json/test_json_generate.rb Executable file
View file

@ -0,0 +1,81 @@
require 'test/unit'
require 'json'
class TC_JSONGenerate < Test::Unit::TestCase
include JSON
def setup
$KCODE = 'UTF8'
@hash = {
'a' => 2,
'b' => 3.141,
'c' => 'c',
'd' => [ 1, "b", 3.14 ],
'e' => { 'foo' => 'bar' },
'g' => "\"\0\037",
'h' => 1000.0,
'i' => 0.001
}
@json2 = '{"a":2,"b":3.141,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},' +
'"g":"\\"\\u0000\\u001f","h":1000.0,"i":0.001}'
@json3 = <<'EOT'.chomp
{
"a": 2,
"b": 3.141,
"c": "c",
"d": [
1,
"b",
3.14
],
"e": {
"foo": "bar"
},
"g": "\"\u0000\u001f",
"h": 1000.0,
"i": 0.001
}
EOT
end
def test_unparse
json = unparse(@hash)
assert_equal(@json2, json)
parsed_json = parse(json)
assert_equal(@hash, parsed_json)
json = generate({1=>2})
assert_equal('{"1":2}', json)
parsed_json = parse(json)
assert_equal({"1"=>2}, parsed_json)
end
def test_unparse_pretty
json = pretty_unparse(@hash)
assert_equal(@json3, json)
parsed_json = parse(json)
assert_equal(@hash, parsed_json)
json = pretty_generate({1=>2})
assert_equal(<<'EOT'.chomp, json)
{
"1": 2
}
EOT
parsed_json = parse(json)
assert_equal({"1"=>2}, parsed_json)
end
def test_states
json = generate({1=>2}, nil)
assert_equal('{"1":2}', json)
s = JSON.state.new(:check_circular => true)
#assert s.check_circular
h = { 1=>2 }
h[3] = h
assert_raises(JSON::CircularDatastructure) { generate(h, s) }
s = JSON.state.new(:check_circular => true)
#assert s.check_circular
a = [ 1, 2 ]
a << a
assert_raises(JSON::CircularDatastructure) { generate(a, s) }
end
end

59
test/json/test_json_unicode.rb Executable file
View file

@ -0,0 +1,59 @@
#!/usr/bin/env ruby
require 'test/unit'
require 'json'
class TC_JSONUnicode < Test::Unit::TestCase
include JSON
def setup
$KCODE = 'UTF8'
end
def test_unicode
assert_equal '""', ''.to_json
assert_equal '"\\b"', "\b".to_json
assert_equal '"\u0001"', 0x1.chr.to_json
assert_equal '"\u001f"', 0x1f.chr.to_json
assert_equal '" "', ' '.to_json
assert_equal "\"#{0x7f.chr}\"", 0x7f.chr.to_json
utf8 = [ "© ≠ €! \01" ]
json = '["\u00a9 \u2260 \u20ac! \u0001"]'
assert_equal json, utf8.to_json
assert_equal utf8, parse(json)
utf8 = ["\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"]
json = "[\"\\u3042\\u3044\\u3046\\u3048\\u304a\"]"
assert_equal json, utf8.to_json
assert_equal utf8, parse(json)
utf8 = ['საქართველო']
json = "[\"\\u10e1\\u10d0\\u10e5\\u10d0\\u10e0\\u10d7\\u10d5\\u10d4\\u10da\\u10dd\"]"
assert_equal json, utf8.to_json
assert_equal utf8, parse(json)
assert_equal '["\\u00c3"]', JSON.generate(["Ã"])
assert_equal [""], JSON.parse('["\u20ac"]')
utf8 = ["\xf0\xa0\x80\x81"]
json = '["\ud840\udc01"]'
assert_equal json, JSON.generate(utf8)
assert_equal utf8, JSON.parse(json)
end
def test_chars
(0..0x7f).each do |i|
c = ('%c' % i)[0] # c is a character object
json = '["\u%04x"]' % i
assert_equal c, JSON.parse(json).first
if c == ?\b
generated = JSON.generate(["" << i])
assert '["\b"]' == generated || '["\10"]' == generated
elsif [?\n, ?\r, ?\t, ?\f].include?(c)
assert_equal '[' << ('' << i).dump << ']', JSON.generate(["" << i])
elsif i < 0x20
assert_equal json, JSON.generate(["" << i])
end
end
assert_raises(JSON::GeneratorError) do
JSON.generate(["" << 0x80])
end
assert_equal "\302\200", JSON.parse('["\u0080"]').first
end
end