mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
91c798b670
former. [ruby-dev:22609] * ext/iconv/iconv.c (iconv_create): raise InvalidEncoding exception when EINVAL. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5493 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
892 lines
22 KiB
C
892 lines
22 KiB
C
/* -*- mode:c; c-file-style:"ruby" -*- */
|
||
/**********************************************************************
|
||
|
||
iconv.c -
|
||
|
||
$Author$
|
||
$Date$
|
||
created at: Wed Dec 1 20:28:09 JST 1999
|
||
|
||
All the files in this distribution are covered under the Ruby's
|
||
license (see the file COPYING).
|
||
|
||
**********************************************************************/
|
||
|
||
/*
|
||
=begin
|
||
= Summary
|
||
Ruby extension for codeset conversion.
|
||
|
||
= Abstract
|
||
Iconv is a wrapper class for UNIX 95 (({iconv()})) function family, which
|
||
translates string between various coding systems.
|
||
|
||
See ((<Open Group|URL:http://www.opengroup.org/>))'s on-line documents for more details.
|
||
* ((<iconv.h|URL:http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html>))
|
||
* ((<iconv_open()|URL:http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html>))
|
||
* ((<iconv()|URL:http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html>))
|
||
* ((<iconv_close()|URL:http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html>))
|
||
|
||
Which coding systems are available, it depends on the platform.
|
||
|
||
=end
|
||
*/
|
||
|
||
#include "ruby.h"
|
||
#include <errno.h>
|
||
#include <iconv.h>
|
||
#include <assert.h>
|
||
#include "st.h"
|
||
#include "intern.h"
|
||
|
||
/* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is
|
||
big enough to keep iconv_t */
|
||
#define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1))
|
||
#define ICONV2VALUE(c) ((VALUE)(c) ^ -1)
|
||
|
||
struct iconv_env_t
|
||
{
|
||
iconv_t cd;
|
||
int argc;
|
||
VALUE *argv;
|
||
VALUE ret;
|
||
VALUE (*append)_((VALUE, VALUE));
|
||
};
|
||
|
||
static VALUE rb_eIconvInvalidEncoding;
|
||
static VALUE rb_eIconvFailure;
|
||
static VALUE rb_eIconvIllegalSeq;
|
||
static VALUE rb_eIconvInvalidChar;
|
||
static VALUE rb_eIconvOutOfRange;
|
||
|
||
static ID rb_success, rb_failed;
|
||
static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
|
||
static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed));
|
||
static VALUE iconv_failure_success _((VALUE self));
|
||
static VALUE iconv_failure_failed _((VALUE self));
|
||
|
||
static iconv_t iconv_create _((VALUE to, VALUE from));
|
||
static void iconv_dfree _((void *cd));
|
||
static VALUE iconv_free _((VALUE cd));
|
||
static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen));
|
||
static VALUE rb_str_derive _((VALUE str, const char* ptr, int len));
|
||
static VALUE iconv_convert _((iconv_t cd, VALUE str, int start, int length, struct iconv_env_t* env));
|
||
static VALUE iconv_s_allocate _((VALUE klass));
|
||
static VALUE iconv_initialize _((VALUE self, VALUE to, VALUE from));
|
||
static VALUE iconv_s_open _((VALUE self, VALUE to, VALUE from));
|
||
static VALUE iconv_s_convert _((struct iconv_env_t* env));
|
||
static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self));
|
||
static VALUE iconv_init_state _((VALUE cd));
|
||
static VALUE iconv_finish _((VALUE self));
|
||
static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self));
|
||
|
||
|
||
/*
|
||
=begin
|
||
= Classes & Modules
|
||
=end
|
||
*/
|
||
|
||
/*
|
||
=begin
|
||
== Iconv
|
||
=end
|
||
*/
|
||
static VALUE charset_map;
|
||
|
||
static VALUE charset_map_get _((void))
|
||
{
|
||
return charset_map;
|
||
}
|
||
|
||
static char *
|
||
map_charset
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE *code)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(code)
|
||
VALUE *code;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
VALUE val = *code;
|
||
|
||
if (RHASH(charset_map)->tbl && RHASH(charset_map)->tbl->num_entries) {
|
||
VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0);
|
||
StringValuePtr(key);
|
||
if (st_lookup(RHASH(charset_map)->tbl, key, &val)) {
|
||
*code = val;
|
||
}
|
||
}
|
||
return StringValuePtr(*code);
|
||
}
|
||
|
||
static iconv_t
|
||
iconv_create
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE to, VALUE from)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(to, from)
|
||
VALUE to;
|
||
VALUE from;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
const char* tocode = map_charset(&to);
|
||
const char* fromcode = map_charset(&from);
|
||
|
||
iconv_t cd = iconv_open(tocode, fromcode);
|
||
|
||
if (cd == (iconv_t)-1) {
|
||
switch (errno) {
|
||
case EMFILE:
|
||
case ENFILE:
|
||
case ENOMEM:
|
||
rb_gc();
|
||
cd = iconv_open(tocode, fromcode);
|
||
}
|
||
if (cd == (iconv_t)-1) {
|
||
int inval = errno == EINVAL;
|
||
volatile VALUE msg = rb_str_new2("iconv(\"" + (inval ? 5 : 0));
|
||
char *s;
|
||
|
||
rb_str_buf_cat2(rb_str_buf_append(msg, to), "\", \"");
|
||
rb_str_buf_cat2(rb_str_buf_append(msg, from), "\")");
|
||
s = StringValuePtr(msg);
|
||
if (!inval) rb_sys_fail(s);
|
||
rb_raise(rb_eIconvInvalidEncoding, "invalid encoding %s", s);
|
||
}
|
||
}
|
||
|
||
return cd;
|
||
}
|
||
|
||
static void
|
||
iconv_dfree
|
||
#ifdef HAVE_PROTOTYPES
|
||
(void *cd)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(cd)
|
||
void *cd;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
iconv_close(VALUE2ICONV(cd));
|
||
}
|
||
|
||
#define ICONV_FREE iconv_dfree
|
||
|
||
static VALUE
|
||
iconv_free
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE cd)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(cd)
|
||
VALUE cd;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
if (cd && iconv_close(VALUE2ICONV(cd)) == -1)
|
||
rb_sys_fail("iconv_close");
|
||
return Qnil;
|
||
}
|
||
|
||
static VALUE
|
||
check_iconv
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE obj)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(obj)
|
||
VALUE obj;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
Check_Type(obj, T_DATA);
|
||
if (RDATA(obj)->dfree != ICONV_FREE) {
|
||
rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj)));
|
||
}
|
||
return (VALUE)DATA_PTR(obj);
|
||
}
|
||
|
||
static VALUE
|
||
iconv_try
|
||
#ifdef HAVE_PROTOTYPES
|
||
(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(cd, inptr, inlen, outptr, outlen)
|
||
iconv_t cd;
|
||
const char **inptr;
|
||
size_t *inlen;
|
||
char **outptr;
|
||
size_t *outlen;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
size_t ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen);
|
||
if (ret == (size_t)-1) {
|
||
if (!*inlen)
|
||
return Qfalse;
|
||
switch (errno) {
|
||
case E2BIG:
|
||
/* try the left in next loop */
|
||
break;
|
||
case EILSEQ:
|
||
return rb_eIconvIllegalSeq;
|
||
case EINVAL:
|
||
return rb_eIconvInvalidChar;
|
||
default:
|
||
rb_sys_fail("iconv");
|
||
}
|
||
}
|
||
else if (*inlen > 0) {
|
||
/* something goes wrong */
|
||
return rb_eIconvIllegalSeq;
|
||
}
|
||
else if (ret) {
|
||
return Qnil; /* conversion */
|
||
}
|
||
return Qfalse;
|
||
}
|
||
|
||
#define FAILED_MAXLEN 16
|
||
|
||
static VALUE iconv_failure_initialize
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE error, VALUE mesg, VALUE success, VALUE failed)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(error, mesg, success, failed)
|
||
VALUE error, mesg, success, failed;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
rb_call_super(1, &mesg);
|
||
rb_ivar_set(error, rb_success, success);
|
||
rb_ivar_set(error, rb_failed, failed);
|
||
return error;
|
||
}
|
||
|
||
static VALUE
|
||
iconv_fail
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(error, success, failed, env, mesg)
|
||
VALUE error, success, failed;
|
||
struct iconv_env_t *env;
|
||
const char *mesg;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
VALUE args[3];
|
||
|
||
if (mesg && *mesg) {
|
||
args[0] = rb_str_new2(mesg);
|
||
}
|
||
else if (TYPE(failed) != T_STRING || RSTRING(failed)->len < FAILED_MAXLEN) {
|
||
args[0] = rb_inspect(failed);
|
||
}
|
||
else {
|
||
args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN));
|
||
rb_str_cat2(args[0], "...");
|
||
}
|
||
args[1] = success;
|
||
args[2] = failed;
|
||
if (env) {
|
||
args[1] = env->append(rb_obj_dup(env->ret), success);
|
||
if (env->argc > 0) {
|
||
*(env->argv) = failed;
|
||
args[2] = rb_ary_new4(env->argc, env->argv);
|
||
}
|
||
}
|
||
error = rb_class_new_instance(3, args, error);
|
||
if (!rb_block_given_p()) rb_exc_raise(error);
|
||
ruby_errinfo = error;
|
||
return rb_yield(failed);
|
||
}
|
||
|
||
static VALUE
|
||
rb_str_derive
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE str, const char* ptr, int len)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(str, ptr, len)
|
||
VALUE str;
|
||
const char *ptr;
|
||
int len;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
VALUE ret;
|
||
|
||
if (NIL_P(str))
|
||
return rb_str_new(ptr, len);
|
||
if (RSTRING(str)->ptr == ptr && RSTRING(str)->len == len)
|
||
return str;
|
||
if (RSTRING(str)->ptr + RSTRING(str)->len == ptr + len)
|
||
ret = rb_str_substr(str, ptr - RSTRING(str)->ptr, len);
|
||
else
|
||
ret = rb_str_new(ptr, len);
|
||
OBJ_INFECT(ret, str);
|
||
return ret;
|
||
}
|
||
|
||
static VALUE
|
||
iconv_convert
|
||
#ifdef HAVE_PROTOTYPES
|
||
(iconv_t cd, VALUE str, int start, int length, struct iconv_env_t* env)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(cd, str, start, length, env)
|
||
iconv_t cd;
|
||
VALUE str;
|
||
int start;
|
||
int length;
|
||
struct iconv_env_t *env;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
VALUE ret = Qfalse;
|
||
VALUE error = Qfalse;
|
||
VALUE rescue;
|
||
const char *inptr, *instart;
|
||
size_t inlen;
|
||
/* I believe ONE CHARACTER never exceed this. */
|
||
char buffer[BUFSIZ];
|
||
char *outptr;
|
||
size_t outlen;
|
||
|
||
if (cd == (iconv_t)-1)
|
||
rb_raise(rb_eArgError, "closed iconv");
|
||
|
||
if (NIL_P(str)) {
|
||
/* Reset output pointer or something. */
|
||
inptr = "";
|
||
inlen = 0;
|
||
outptr = buffer;
|
||
outlen = sizeof(buffer);
|
||
error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
|
||
if (RTEST(error)) {
|
||
unsigned int i;
|
||
rescue = iconv_fail(error, Qnil, Qnil, env, 0);
|
||
if (TYPE(rescue) == T_ARRAY) {
|
||
str = RARRAY(rescue)->len > 0 ? RARRAY(rescue)->ptr[0] : Qnil;
|
||
}
|
||
if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) {
|
||
char c = i;
|
||
str = rb_str_new(&c, 1);
|
||
}
|
||
else if (!NIL_P(str)) {
|
||
StringValue(str);
|
||
}
|
||
}
|
||
|
||
inptr = NULL;
|
||
length = 0;
|
||
}
|
||
else {
|
||
int slen;
|
||
|
||
StringValue(str);
|
||
slen = RSTRING(str)->len;
|
||
inptr = RSTRING(str)->ptr;
|
||
|
||
if (start < 0 ? (start += slen) < 0 : start >= slen)
|
||
length = 0;
|
||
else if (length < 0 && (length += slen + 1) < 0)
|
||
length = 0;
|
||
else if ((length -= start) < 0)
|
||
length = 0;
|
||
else
|
||
inptr += start;
|
||
}
|
||
instart = inptr;
|
||
inlen = length;
|
||
|
||
do {
|
||
char errmsg[50];
|
||
const char *tmpstart = inptr;
|
||
outptr = buffer;
|
||
outlen = sizeof(buffer);
|
||
|
||
errmsg[0] = 0;
|
||
error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
|
||
|
||
if (0 <= outlen && outlen <= sizeof(buffer)) {
|
||
outlen = sizeof(buffer) - outlen;
|
||
if (NIL_P(error) || /* something converted */
|
||
outlen > inptr - tmpstart || /* input can't contain output */
|
||
(outlen < inptr - tmpstart && inlen > 0) || /* something skipped */
|
||
memcmp(buffer, tmpstart, outlen)) /* something differs */
|
||
{
|
||
if (NIL_P(str)) {
|
||
ret = rb_str_new(buffer, outlen);
|
||
}
|
||
else {
|
||
if (ret) {
|
||
ret = rb_str_buf_cat(ret, instart, tmpstart - instart);
|
||
}
|
||
else {
|
||
ret = rb_str_new(instart, tmpstart - instart);
|
||
OBJ_INFECT(ret, str);
|
||
}
|
||
ret = rb_str_buf_cat(ret, buffer, outlen);
|
||
instart = inptr;
|
||
}
|
||
}
|
||
else if (!inlen) {
|
||
inptr = tmpstart + outlen;
|
||
}
|
||
}
|
||
else {
|
||
/* Some iconv() have a bug, return *outlen out of range */
|
||
sprintf(errmsg, "bug?(output length = %ld)", (long)(sizeof(buffer) - outlen));
|
||
error = rb_eIconvOutOfRange;
|
||
}
|
||
|
||
if (RTEST(error)) {
|
||
long len = 0;
|
||
|
||
if (!ret)
|
||
ret = rb_str_derive(str, instart, inptr - instart);
|
||
else if (inptr > instart)
|
||
rb_str_cat(ret, instart, inptr - instart);
|
||
str = rb_str_derive(str, inptr, inlen);
|
||
rescue = iconv_fail(error, ret, str, env, errmsg);
|
||
if (TYPE(rescue) == T_ARRAY) {
|
||
if ((len = RARRAY(rescue)->len) > 0)
|
||
rb_str_concat(ret, RARRAY(rescue)->ptr[0]);
|
||
if (len > 1 && !NIL_P(str = RARRAY(rescue)->ptr[1])) {
|
||
StringValue(str);
|
||
inlen = length = RSTRING(str)->len;
|
||
instart = inptr = RSTRING(str)->ptr;
|
||
continue;
|
||
}
|
||
}
|
||
else if (!NIL_P(rescue)) {
|
||
rb_str_concat(ret, rescue);
|
||
}
|
||
break;
|
||
}
|
||
} while (inlen > 0);
|
||
|
||
if (!ret)
|
||
ret = rb_str_derive(str, instart, inptr - instart);
|
||
else if (inptr > instart)
|
||
rb_str_cat(ret, instart, inptr - instart);
|
||
return ret;
|
||
}
|
||
|
||
|
||
/*
|
||
=begin
|
||
=== Class methods
|
||
=end
|
||
*/
|
||
/*
|
||
=begin
|
||
--- Iconv.new(to, from) {|cd| ...}
|
||
Creates new code converter from a coding-system designated with ((|from|))
|
||
to another one designated with ((|to|)).
|
||
:Parameters
|
||
:((|to|))
|
||
coding-system name for destination.
|
||
:((|from|))
|
||
coding-system name for source.
|
||
:Exceptions
|
||
:(({TypeError}))
|
||
if ((|to|)) or ((|from|)) aren't String
|
||
:(({ArgumentError}))
|
||
if designated converter couldn't find out.
|
||
:(({SystemCallError}))
|
||
when (({iconv_open(3)})) failed.
|
||
|
||
--- Iconv.open(to, from)
|
||
Equivalents to ((<Iconv.new>)) except with in the case of called
|
||
with a block, yields with the new instance and closes it, and
|
||
returns the result which returned from the block.
|
||
=end
|
||
*/
|
||
static VALUE
|
||
iconv_s_allocate
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE klass)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(klass)
|
||
VALUE klass;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0);
|
||
}
|
||
|
||
static VALUE
|
||
iconv_initialize
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE self, VALUE to, VALUE from)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(self, to, from)
|
||
VALUE self;
|
||
VALUE to;
|
||
VALUE from;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
iconv_free(check_iconv(self));
|
||
DATA_PTR(self) = NULL;
|
||
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from));
|
||
return self;
|
||
}
|
||
|
||
static VALUE
|
||
iconv_s_open
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE self, VALUE to, VALUE from)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(self, to, from)
|
||
VALUE self;
|
||
VALUE to;
|
||
VALUE from;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
VALUE cd = ICONV2VALUE(iconv_create(to, from));
|
||
|
||
self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
|
||
if (rb_block_given_p()) {
|
||
return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
|
||
}
|
||
else {
|
||
return self;
|
||
}
|
||
}
|
||
|
||
/*
|
||
=begin
|
||
--- Iconv.iconv(to, from, *strs)
|
||
Shorthand for
|
||
Iconv.open(to, from) {|cd| (strs + [nil]).collect {|s| cd.iconv(s)}}
|
||
:Parameters
|
||
:((|to|)), ((|from|))
|
||
see ((<Iconv.new>)).
|
||
:((|strs|))
|
||
strings to be converted.
|
||
:Exceptions
|
||
exceptions thrown by ((<Iconv.new>)), ((<Iconv.open>)) and
|
||
((<Iconv#iconv>)).
|
||
=end
|
||
*/
|
||
|
||
static VALUE
|
||
iconv_s_convert
|
||
#ifdef HAVE_PROTOTYPES
|
||
(struct iconv_env_t* env)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(env)
|
||
struct iconv_env_t *env;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
VALUE last = 0;
|
||
|
||
for (; env->argc > 0; --env->argc, ++env->argv) {
|
||
VALUE s = iconv_convert(env->cd, last = *(env->argv), 0, -1, env);
|
||
env->append(env->ret, s);
|
||
}
|
||
|
||
if (!NIL_P(last)) {
|
||
VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env);
|
||
if (RSTRING(s)->len)
|
||
env->append(env->ret, s);
|
||
}
|
||
|
||
return env->ret;
|
||
}
|
||
|
||
static VALUE
|
||
iconv_s_iconv
|
||
#ifdef HAVE_PROTOTYPES
|
||
(int argc, VALUE *argv, VALUE self)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(argc, argv, self)
|
||
int argc;
|
||
VALUE *argv;
|
||
VALUE self;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
struct iconv_env_t arg;
|
||
|
||
if (argc < 2) /* needs `to' and `from' arguments at least */
|
||
rb_raise(rb_eArgError, "wrong # of arguments (%d for %d)", argc, 2);
|
||
|
||
arg.argc = argc -= 2;
|
||
arg.argv = argv + 2;
|
||
arg.append = rb_ary_push;
|
||
arg.ret = rb_ary_new2(argc);
|
||
arg.cd = iconv_create(argv[0], argv[1]);
|
||
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
|
||
}
|
||
|
||
static VALUE
|
||
iconv_s_conv
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE self, VALUE to, VALUE from, VALUE str)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(self, to, from, str)
|
||
VALUE self, to, from, str;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
struct iconv_env_t arg;
|
||
|
||
arg.argc = 1;
|
||
arg.argv = &str;
|
||
arg.append = rb_str_append;
|
||
arg.ret = rb_str_new(0, 0);
|
||
arg.cd = iconv_create(to, from);
|
||
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
|
||
}
|
||
|
||
|
||
/*
|
||
=begin
|
||
=== Instance methods
|
||
=end
|
||
*/
|
||
/*
|
||
=begin
|
||
--- Iconv#close
|
||
Finishes conversion.
|
||
* After calling this, invoking method ((<Iconv#iconv>)) will cause
|
||
exception, but multiple calls of (({close})) are guaranteed to
|
||
end successfully.
|
||
* Returns a string contains the byte sequence to change the
|
||
output buffer to its initial shift state.
|
||
=end
|
||
*/
|
||
static VALUE
|
||
iconv_init_state
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE cd)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(cd)
|
||
VALUE cd;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
return iconv_convert(VALUE2ICONV(cd), Qnil, 0, 0, NULL);
|
||
}
|
||
|
||
static VALUE
|
||
iconv_finish
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE self)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(self)
|
||
VALUE self;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
VALUE cd = check_iconv(self);
|
||
|
||
if (!cd) return Qnil;
|
||
DATA_PTR(self) = NULL;
|
||
|
||
return rb_ensure(iconv_init_state, cd, iconv_free, cd);
|
||
}
|
||
|
||
/*
|
||
=begin
|
||
--- Iconv#iconv(str, [ start = 0, [ length = -1 ] ])
|
||
Converts string and returns converted one.
|
||
* In the case of ((|str|)) is (({String})), converts (({str[start, length]})).
|
||
Returns converted string.
|
||
* In the case of ((|str|)) is (({nil})), places ((|converter|))
|
||
itself into initial shift state and just returns a string contains
|
||
the byte sequence to change the output buffer to its initial shift
|
||
state.
|
||
* Otherwise, causes exception.
|
||
:Parameters
|
||
:((|str|))
|
||
string to be converted or (({nil})).
|
||
:((|start|))
|
||
starting offset.
|
||
:((|length|))
|
||
conversion length,
|
||
(({nil})) or (({-1})) means whole string from (({start})).
|
||
:Exceptions
|
||
* ((<Iconv::IllegalSequence>))
|
||
* ((<Iconv::InvalidCharacter>))
|
||
* ((<Iconv::OutOfRange>))
|
||
=end
|
||
*/
|
||
static VALUE
|
||
iconv_iconv
|
||
#ifdef HAVE_PROTOTYPES
|
||
(int argc, VALUE *argv, VALUE self)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(argc, argv, self)
|
||
int argc;
|
||
VALUE *argv;
|
||
VALUE self;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
VALUE str, n1, n2;
|
||
VALUE cd = check_iconv(self);
|
||
|
||
n1 = n2 = Qnil;
|
||
rb_scan_args(argc, argv, "12", &str, &n1, &n2);
|
||
|
||
return iconv_convert(VALUE2ICONV(cd), str,
|
||
NIL_P(n1) ? 0 : NUM2INT(n1),
|
||
NIL_P(n2) ? -1 : NUM2INT(n1),
|
||
NULL);
|
||
}
|
||
|
||
|
||
/*
|
||
=begin
|
||
= Exceptions
|
||
=end
|
||
*/
|
||
/*
|
||
=begin
|
||
== Iconv::Failure
|
||
Base exceptional attributes from ((<Iconv>)).
|
||
|
||
=== Instance methods
|
||
=end
|
||
*/
|
||
/*
|
||
=begin
|
||
--- Iconv::Failure#success
|
||
Returns string(s) translated successfully until the exception occurred.
|
||
* In the case of failure occurred within ((<Iconv.iconv>)), returned
|
||
value is an array of strings translated successfully preceding
|
||
failure and the last element is string on the way.
|
||
=end
|
||
*/
|
||
static VALUE
|
||
iconv_failure_success
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE self)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(self)
|
||
VALUE self;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
return rb_attr_get(self, rb_success);
|
||
}
|
||
|
||
/*
|
||
=begin
|
||
--- Iconv::Failure#failed
|
||
Returns substring of the original string passed to ((<Iconv>)) that
|
||
starts at the character caused the exception.
|
||
=end
|
||
*/
|
||
static VALUE
|
||
iconv_failure_failed
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE self)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(self)
|
||
VALUE self;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
return rb_attr_get(self, rb_failed);
|
||
}
|
||
|
||
/*
|
||
=begin
|
||
--- Iconv::Failure#inspect
|
||
Returns inspected string like as: #<(({type})): "(({success}))", "(({failed}))">
|
||
=end
|
||
*/
|
||
static VALUE
|
||
iconv_failure_inspect
|
||
#ifdef HAVE_PROTOTYPES
|
||
(VALUE self)
|
||
#else /* HAVE_PROTOTYPES */
|
||
(self)
|
||
VALUE self;
|
||
#endif /* HAVE_PROTOTYPES */
|
||
{
|
||
char *cname = rb_class2name(CLASS_OF(self));
|
||
VALUE success = rb_attr_get(self, rb_success);
|
||
VALUE failed = rb_attr_get(self, rb_failed);
|
||
VALUE str = rb_str_buf_cat2(rb_str_new2("#<"), cname);
|
||
str = rb_str_buf_cat(str, ": ", 2);
|
||
str = rb_str_buf_append(str, rb_inspect(success));
|
||
str = rb_str_buf_cat(str, ", ", 2);
|
||
str = rb_str_buf_append(str, rb_inspect(failed));
|
||
return rb_str_buf_cat(str, ">", 1);
|
||
}
|
||
|
||
/*
|
||
Hmmm, I don't like to write RD inside of function :-<.
|
||
|
||
=begin
|
||
== Iconv::IllegalSequence
|
||
Input conversion stopped due to an input byte that does not belong to
|
||
the input codeset, or the output codeset does not contain the
|
||
character.
|
||
=== Superclass
|
||
(({ArgumentError}))
|
||
=== Included Modules
|
||
((<Iconv::Failure>))
|
||
|
||
== Iconv::InvalidCharacter
|
||
Input conversion stopped due to an incomplete character or shift
|
||
sequence at the end of the input buffer.
|
||
=== Superclass
|
||
(({ArgumentError}))
|
||
=== Included Modules
|
||
((<Iconv::Failure>))
|
||
|
||
== Iconv::OutOfRange
|
||
Iconv library internal error. Must not occur.
|
||
=== Superclass
|
||
(({RuntimeError}))
|
||
=== Included Modules
|
||
((<Iconv::Failure>))
|
||
=end
|
||
*/
|
||
|
||
void
|
||
Init_iconv _((void))
|
||
{
|
||
VALUE rb_cIconv = rb_define_class("Iconv", rb_cData);
|
||
|
||
rb_define_alloc_func(rb_cIconv, iconv_s_allocate);
|
||
rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, 2);
|
||
rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1);
|
||
rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3);
|
||
rb_define_method(rb_cIconv, "initialize", iconv_initialize, 2);
|
||
rb_define_method(rb_cIconv, "close", iconv_finish, 0);
|
||
rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1);
|
||
|
||
rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure");
|
||
rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3);
|
||
rb_define_method(rb_eIconvFailure, "success", iconv_failure_success, 0);
|
||
rb_define_method(rb_eIconvFailure, "failed", iconv_failure_failed, 0);
|
||
rb_define_method(rb_eIconvFailure, "inspect", iconv_failure_inspect, 0);
|
||
|
||
rb_eIconvInvalidEncoding = rb_define_class_under(rb_cIconv, "InvalidEncoding", rb_eArgError);
|
||
rb_eIconvIllegalSeq = rb_define_class_under(rb_cIconv, "IllegalSequence", rb_eArgError);
|
||
rb_eIconvInvalidChar = rb_define_class_under(rb_cIconv, "InvalidCharacter", rb_eArgError);
|
||
rb_eIconvOutOfRange = rb_define_class_under(rb_cIconv, "OutOfRange", rb_eRuntimeError);
|
||
rb_include_module(rb_eIconvIllegalSeq, rb_eIconvFailure);
|
||
rb_include_module(rb_eIconvInvalidChar, rb_eIconvFailure);
|
||
rb_include_module(rb_eIconvOutOfRange, rb_eIconvFailure);
|
||
|
||
rb_success = rb_intern("success");
|
||
rb_failed = rb_intern("failed");
|
||
|
||
charset_map = rb_hash_new();
|
||
rb_gc_register_address(&charset_map);
|
||
rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0);
|
||
}
|
||
|
||
|
||
/*
|
||
=begin
|
||
== Example
|
||
(1) Instantiate a new ((<Iconv>)), use method ((<Iconv#iconv>)).
|
||
cd = Iconv.new(to, from)
|
||
begin
|
||
input.each {|s| output << cd.iconv(s)}
|
||
output << cd.iconv(nil) # don't forget this
|
||
ensure
|
||
cd.close
|
||
end
|
||
(2) Invoke ((<Iconv.open>)) with a block.
|
||
Iconv.open(to, from) do |cd|
|
||
input.each {|s| output << cd.iconv(s)}
|
||
output << cd.iconv(nil)
|
||
end
|
||
(3) Shorthand for (2).
|
||
Iconv.iconv(to, from, *input.to_a)
|
||
=end
|
||
*/
|