ruby--ruby/marshal.c

869 lines
16 KiB
C
Raw Normal View History

/************************************************
marshal.c -
$Author$
$Revision$
$Date$
created at: Thu Apr 27 16:30:01 JST 1995
************************************************/
#include "ruby.h"
#include "io.h"
#include "st.h"
#define MARSHAL_MAJOR 4
#define MARSHAL_MINOR 0
#define TYPE_NIL '0'
#define TYPE_TRUE 'T'
#define TYPE_FALSE 'F'
#define TYPE_FIXNUM 'i'
#define TYPE_UCLASS 'C'
#define TYPE_OBJECT 'o'
#define TYPE_USERDEF 'u'
#define TYPE_FLOAT 'f'
#define TYPE_BIGNUM 'l'
#define TYPE_STRING '"'
#define TYPE_REGEXP '/'
#define TYPE_ARRAY '['
#define TYPE_HASH '{'
#define TYPE_STRUCT 'S'
#define TYPE_MODULE 'M'
#define TYPE_SYMBOL ':'
#define TYPE_SYMLINK ';'
#define TYPE_LINK '@'
extern VALUE cString;
extern VALUE cRegexp;
extern VALUE cArray;
extern VALUE cHash;
VALUE rb_path2class();
static ID s_dump, s_load;
struct dump_arg {
VALUE obj;
FILE *fp;
VALUE str;
st_table *symbol;
st_table *data;
};
struct dump_call_arg {
VALUE obj;
struct dump_arg *arg;
int limit;
};
static void w_long _((long, struct dump_arg*));
static void
w_byte(c, arg)
char c;
struct dump_arg *arg;
{
if (arg->fp) putc(c, arg->fp);
else str_cat(arg->str, (UCHAR*)&c, 1);
}
static void
w_bytes(s, n, arg)
char *s;
int n;
struct dump_arg *arg;
{
w_long(n, arg);
if (arg->fp) {
fwrite(s, 1, n, arg->fp);
}
else {
str_cat(arg->str, s, n);
}
}
static void
w_short(x, arg)
int x;
struct dump_arg *arg;
{
int i;
for (i=0; i<sizeof(USHORT); i++) {
w_byte((x >> (i*8)) & 0xff, arg);
}
}
static void
w_long(x, arg)
long x;
struct dump_arg *arg;
{
char buf[sizeof(long)+1];
int i, len = 0;
if (x == 0) {
w_byte(0, arg);
return;
}
for (i=1;i<sizeof(long)+1;i++) {
buf[i] = x & 0xff;
x = RSHIFT(x,8);
if (x == 0) {
buf[0] = i;
break;
}
if (x == -1) {
buf[0] = -i;
break;
}
}
len = i;
for (i=0;i<=len;i++) {
w_byte(buf[i], arg);
}
}
static void
w_float(d, arg)
double d;
struct dump_arg *arg;
{
char buf[100];
sprintf(buf, "%.12g", d);
w_bytes(buf, strlen(buf), arg);
}
static void
w_symbol(id, arg)
ID id;
struct dump_arg *arg;
{
char *sym = rb_id2name(id);
int num;
if (st_lookup(arg->symbol, id, &num)) {
w_byte(TYPE_SYMLINK, arg);
w_long(num, arg);
}
else {
w_byte(TYPE_SYMBOL, arg);
w_bytes(sym, strlen(sym), arg);
st_insert(arg->symbol, id, arg->symbol->num_entries);
}
}
static void
w_unique(s, arg)
char *s;
struct dump_arg *arg;
{
w_symbol(rb_intern(s), arg);
}
static void w_object _((VALUE,struct dump_arg*,int));
extern VALUE cIO, cBignum, cStruct;
static int
hash_each(key, value, arg)
VALUE key, value;
struct dump_call_arg *arg;
{
w_object(key, arg->arg, arg->limit);
w_object(value, arg->arg, arg->limit);
return ST_CONTINUE;
}
static int
obj_each(id, value, arg)
ID id;
VALUE value;
struct dump_call_arg *arg;
{
w_symbol(id, arg->arg);
w_object(value, arg->arg, arg->limit);
return ST_CONTINUE;
}
static void
w_uclass(obj, class, arg)
VALUE obj, class;
struct dump_arg *arg;
{
if (CLASS_OF(obj) != class) {
w_byte(TYPE_UCLASS, arg);
w_unique(rb_class2name(CLASS_OF(obj)), arg);
}
}
static void
w_object(obj, arg, limit)
VALUE obj;
struct dump_arg *arg;
int limit;
{
int n;
struct dump_call_arg c_arg;
if (limit == 0) {
Fail("exceed depth limit");
}
limit--;
c_arg.limit = limit;
c_arg.arg = arg;
if (obj == Qnil) {
w_byte(TYPE_NIL, arg);
}
else if (obj == TRUE) {
w_byte(TYPE_TRUE, arg);
}
else if (obj == FALSE) {
w_byte(TYPE_FALSE, arg);
}
else if (FIXNUM_P(obj)) {
#if SIZEOF_LONG <= 4
w_byte(TYPE_FIXNUM, arg);
w_long(FIX2INT(obj), arg);
#else
if (RSHIFT(obj, 32) == 0 || RSHIFT(obj, 32) == -1) {
w_byte(TYPE_FIXNUM, arg);
w_long(FIX2INT(obj), arg);
}
else {
obj = int2big(FIX2INT(obj));
goto write_bignum;
}
#endif
}
else {
int num;
if (st_lookup(arg->data, obj, &num)) {
w_byte(TYPE_LINK, arg);
w_long(num, arg);
return;
}
st_insert(arg->data, obj, arg->data->num_entries);
if (rb_respond_to(obj, s_dump)) {
VALUE v;
w_byte(TYPE_USERDEF, arg);
w_unique(rb_class2name(CLASS_OF(obj)), arg);
v = rb_funcall(obj, s_dump, 1, limit);
if (TYPE(v) != T_STRING) {
TypeError("_dump_to must return String");
}
w_bytes(RSTRING(v)->ptr, RSTRING(v)->len, arg);
return;
}
switch (BUILTIN_TYPE(obj)) {
case T_MODULE:
case T_CLASS:
w_byte(TYPE_MODULE, arg);
{
VALUE path = rb_class_path(obj);
w_bytes(RSTRING(path)->ptr, RSTRING(path)->len, arg);
}
return;
case T_FLOAT:
w_byte(TYPE_FLOAT, arg);
w_float(RFLOAT(obj)->value, arg);
return;
case T_BIGNUM:
write_bignum:
w_byte(TYPE_BIGNUM, arg);
{
char sign = RBIGNUM(obj)->sign?'+':'-';
int len = RBIGNUM(obj)->len;
USHORT *d = RBIGNUM(obj)->digits;
w_byte(sign, arg);
w_long(len, arg);
while (len--) {
w_short(*d, arg);
d++;
}
}
return;
case T_STRING:
w_uclass(obj, cString, arg);
w_byte(TYPE_STRING, arg);
w_bytes(RSTRING(obj)->ptr, RSTRING(obj)->len, arg);
return;
case T_REGEXP:
w_uclass(obj, cRegexp, arg);
w_byte(TYPE_REGEXP, arg);
w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
w_byte(FL_TEST(obj, FL_USER1), arg);
return;
case T_ARRAY:
w_uclass(obj, cArray, arg);
w_byte(TYPE_ARRAY, arg);
{
int len = RARRAY(obj)->len;
VALUE *ptr = RARRAY(obj)->ptr;
w_long(len, arg);
while (len--) {
w_object(*ptr, arg, limit);
ptr++;
}
}
break;
case T_HASH:
w_uclass(obj, cHash, arg);
w_byte(TYPE_HASH, arg);
w_long(RHASH(obj)->tbl->num_entries, arg);
st_foreach(RHASH(obj)->tbl, hash_each, &c_arg);
break;
case T_STRUCT:
w_byte(TYPE_STRUCT, arg);
{
int len = RSTRUCT(obj)->len;
char *path = rb_class2name(CLASS_OF(obj));
VALUE mem;
int i;
w_unique(path, arg);
w_long(len, arg);
mem = rb_ivar_get(CLASS_OF(obj), rb_intern("__member__"));
if (mem == Qnil) {
Fatal("non-initialized struct");
}
for (i=0; i<len; i++) {
w_symbol(FIX2INT(RARRAY(mem)->ptr[i]), arg);
w_object(RSTRUCT(obj)->ptr[i], arg, limit);
}
}
break;
case T_OBJECT:
w_byte(TYPE_OBJECT, arg);
{
VALUE class = CLASS_OF(obj);
char *path;
if (FL_TEST(class, FL_SINGLETON)) {
TypeError("singleton can't be dumped");
}
path = rb_class2name(class);
w_unique(path, arg);
if (ROBJECT(obj)->iv_tbl) {
w_long(ROBJECT(obj)->iv_tbl->num_entries, arg);
st_foreach(ROBJECT(obj)->iv_tbl, obj_each, &c_arg);
}
else {
w_long(0, arg);
}
}
break;
default:
TypeError("can't dump %s", rb_class2name(CLASS_OF(obj)));
break;
}
}
}
static VALUE
dump(arg)
struct dump_call_arg *arg;
{
w_object(arg->obj, arg->arg, arg->limit);
}
static VALUE
dump_ensure(arg)
struct dump_arg *arg;
{
st_free_table(arg->symbol);
st_free_table(arg->data);
}
static VALUE
marshal_dump(argc, argv)
int argc;
VALUE* argv;
{
VALUE obj, port, a1, a2;
int limit = -1;
extern VALUE cIO;
struct dump_arg arg;
struct dump_call_arg c_arg;
port = 0;
rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
if (argc == 3) {
limit = NUM2INT(a2);
port = a1;
}
else if (argc == 2) {
if (FIXNUM_P(a1)) limit = FIX2INT(a1);
else port = a1;
}
if (port) {
if (obj_is_kind_of(port, cIO)) {
OpenFile *fptr;
io_binmode(port);
GetOpenFile(port, fptr);
io_writable(fptr);
arg.fp = (fptr->f2) ? fptr->f2 : fptr->f;
}
else {
TypeError("instance of IO needed");
}
}
else {
arg.fp = 0;
port = str_new(0, 0);
arg.str = port;
}
arg.symbol = st_init_numtable();
arg.data = st_init_numtable();
c_arg.obj = obj;
c_arg.arg = &arg;
c_arg.limit = limit;
w_byte(MARSHAL_MAJOR, &arg);
w_byte(MARSHAL_MINOR, &arg);
rb_ensure(dump, &c_arg, dump_ensure, &arg);
return port;
}
struct load_arg {
FILE *fp;
UCHAR *ptr, *end;
st_table *symbol;
st_table *data;
VALUE proc;
};
static int
r_byte(arg)
struct load_arg *arg;
{
if (arg->fp) return getc(arg->fp);
if (arg->ptr < arg->end) return *arg->ptr++;
return EOF;
}
static USHORT
r_short(arg)
struct load_arg *arg;
{
USHORT x;
int i;
x = 0;
for (i=0; i<sizeof(USHORT); i++) {
x |= r_byte(arg)<<(i*8);
}
return x;
}
static void
long_toobig(size)
int size;
{
TypeError("long too big for this architecture (size %d, given %d)",
sizeof(long), size);
}
static long
r_long(arg)
struct load_arg *arg;
{
int c = r_byte(arg), i;
register long x;
if (c == 0) return 0;
if (c > 0) {
if (c > sizeof(long)) long_toobig((int)c);
x = 0;
for (i=0;i<c;i++) {
x |= (long)r_byte(arg) << (8*i);
}
}
else if (c < 0) {
c = -c;
if (c > sizeof(long)) long_toobig((int)c);
x = -1;
for (i=0;i<c;i++) {
x &= ~(0xff << (8*i));
x |= (long)r_byte(arg) << (8*i);
}
}
return x;
}
#define r_bytes(s, arg) \
(s = (char*)r_long(arg), r_bytes0(&s,ALLOCA_N(char,(long)s),(long)s,arg))
static int
r_bytes0(sp, s, len, arg)
char **sp, *s;
int len;
struct load_arg *arg;
{
if (arg->fp) {
len = fread(s, 1, len, arg->fp);
}
else {
if (arg->ptr + len > arg->end) {
len = arg->end - arg->ptr;
}
memcpy(s, arg->ptr, len);
arg->ptr += len;
}
(s)[len] = '\0';
*sp = s;
return len;
}
static ID
r_symbol(arg)
struct load_arg *arg;
{
char *buf;
ID id;
char type;
if (r_byte(arg) == TYPE_SYMLINK) {
int num = r_long(arg);
if (st_lookup(arg->symbol, num, &id)) {
return id;
}
TypeError("bad symbol");
}
r_bytes(buf, arg);
id = rb_intern(buf);
st_insert(arg->symbol, arg->symbol->num_entries, id);
return id;
}
static char*
r_unique(arg)
struct load_arg *arg;
{
return rb_id2name(r_symbol(arg));
}
static VALUE
r_string(arg)
struct load_arg *arg;
{
char *buf;
int len = r_bytes(buf, arg);
return str_taint(str_new(buf, len));
}
static VALUE
r_regist(v, arg)
VALUE v;
struct load_arg *arg;
{
if (arg->proc) {
rb_funcall(arg->proc, rb_intern("call"), 1, v);
}
st_insert(arg->data, arg->data->num_entries, v);
return v;
}
static VALUE
r_object(arg)
struct load_arg *arg;
{
VALUE v;
int type = r_byte(arg);
switch (type) {
case EOF:
eof_error();
return Qnil;
case TYPE_LINK:
if (st_lookup(arg->data, r_long(arg), &v)) {
return v;
}
ArgError("dump format error (unlinked)");
break;
case TYPE_UCLASS:
{
VALUE c = rb_path2class(r_unique(arg));
v = r_object(arg);
if (rb_special_const_p(v)) {
ArgError("dump format error (user class)");
}
RBASIC(v)->class = c;
return v;
}
case TYPE_NIL:
return Qnil;
case TYPE_TRUE:
return TRUE;
case TYPE_FALSE:
return FALSE;
case TYPE_FIXNUM:
{
int i = r_long(arg);
return INT2FIX(i);
}
case TYPE_FLOAT:
{
#ifndef atof
double atof();
#endif
char *buf;
r_bytes(buf, arg);
v = float_new(atof(buf));
return r_regist(v, arg);
}
case TYPE_BIGNUM:
{
int len;
USHORT *digits;
NEWOBJ(big, struct RBignum);
OBJSETUP(big, cBignum, T_BIGNUM);
big->sign = (r_byte(arg) == '+');
big->len = len = r_long(arg);
big->digits = digits = ALLOC_N(USHORT, len);
while (len--) {
*digits++ = r_short(arg);
}
big = RBIGNUM(big_norm((VALUE)big));
if (TYPE(big) == T_BIGNUM) {
r_regist(big, arg);
}
return (VALUE)big;
}
case TYPE_STRING:
return r_regist(r_string(arg), arg);
case TYPE_REGEXP:
{
char *buf;
int len = r_bytes(buf, arg);
int ci = r_byte(arg);
return r_regist(reg_new(buf, len, ci), arg);
}
case TYPE_ARRAY:
{
volatile int len = r_long(arg);
v = ary_new2(len);
r_regist(v, arg);
while (len--) {
ary_push(v, r_object(arg));
}
return v;
}
case TYPE_HASH:
{
int len = r_long(arg);
v = hash_new();
r_regist(v, arg);
while (len--) {
VALUE key = r_object(arg);
VALUE value = r_object(arg);
hash_aset(v, key, value);
}
return v;
}
case TYPE_STRUCT:
{
VALUE class, mem, values;
volatile int i; /* gcc 2.7.2.3 -O2 bug?? */
int len;
ID slot;
class = rb_path2class(r_unique(arg));
mem = rb_ivar_get(class, rb_intern("__member__"));
if (mem == Qnil) {
Fatal("non-initialized struct");
}
len = r_long(arg);
values = ary_new2(len);
for (i=0; i<len; i++) {
ary_push(values, Qnil);
}
v = struct_alloc(class, values);
r_regist(v, arg);
for (i=0; i<len; i++) {
slot = r_symbol(arg);
if (RARRAY(mem)->ptr[i] != INT2FIX(slot)) {
TypeError("struct %s not compatible (:%s for :%s)",
rb_class2name(class),
rb_id2name(slot),
rb_id2name(FIX2INT(RARRAY(mem)->ptr[i])));
}
struct_aset(v, INT2FIX(i), r_object(arg));
}
return v;
}
break;
case TYPE_USERDEF:
{
VALUE class;
int len;
class = rb_path2class(r_unique(arg));
if (rb_respond_to(class, s_load)) {
v = rb_funcall(class, s_load, 1, r_string(arg));
return r_regist(v, arg);
}
TypeError("class %s needs to have method `_load_from'",
rb_class2name(class));
}
break;
case TYPE_OBJECT:
{
VALUE class;
int len;
class = rb_path2class(r_unique(arg));
len = r_long(arg);
v = obj_alloc(class);
r_regist(v, arg);
if (len > 0) {
while (len--) {
ID id = r_symbol(arg);
VALUE val = r_object(arg);
rb_ivar_set(v, id, val);
}
}
return v;
}
break;
case TYPE_MODULE:
{
char *buf;
r_bytes(buf, arg);
return rb_path2class(buf);
}
default:
ArgError("dump format error(0x%x)", type);
break;
}
}
static VALUE
load(arg)
struct load_arg *arg;
{
return r_object(arg);
}
static VALUE
load_ensure(arg)
struct load_arg *arg;
{
st_free_table(arg->symbol);
st_free_table(arg->data);
}
static VALUE
marshal_load(argc, argv)
int argc;
VALUE *argv;
{
VALUE port, proc;
FILE *fp;
int major;
VALUE v;
OpenFile *fptr;
struct load_arg arg;
rb_scan_args(argc, argv, "11", &port, &proc);
if (TYPE(port) == T_STRING) {
arg.fp = 0;
arg.ptr = RSTRING(port)->ptr;
arg.end = arg.ptr + RSTRING(port)->len;
}
else {
if (obj_is_kind_of(port, cIO)) {
io_binmode(port);
GetOpenFile(port, fptr);
io_readable(fptr);
arg.fp = fptr->f;
}
else {
TypeError("instance of IO needed");
}
}
major = r_byte(&arg);
if (major == MARSHAL_MAJOR) {
if (r_byte(&arg) != MARSHAL_MINOR) {
Warning("Old marshal file format (can be read)");
}
arg.symbol = st_init_numtable();
arg.data = st_init_numtable();
if (NIL_P(proc)) arg.proc = 0;
else arg.proc = proc;
v = rb_ensure(load, &arg, load_ensure, &arg);
}
else {
TypeError("Old marshal file format (can't read)");
}
return v;
}
Init_marshal()
{
VALUE mMarshal = rb_define_module("Marshal");
s_dump = rb_intern("_dump_to");
s_load = rb_intern("_load_from");
rb_define_module_function(mMarshal, "dump", marshal_dump, -1);
rb_define_module_function(mMarshal, "load", marshal_load, -1);
rb_define_module_function(mMarshal, "restore", marshal_load, 1);
rb_provide("marshal.o"); /* for backward compatibility */
}