mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* encoding.c (rb_enc_alias): allow encodings multiple aliases.
* encoding.c (rb_enc_find_index): search the encoding which has the given name and return its index if found, or -1. * st.c (type_strcasehash): case-insensitive string hash type. * string.c (rb_str_force_encoding): force encoding of self. this name comes from [ruby-dev:31894] by Martin Duerst. [ruby-dev:31744] * include/ruby/encoding.h (rb_enc_find_index, rb_enc_associate_index): prototyped. * include/ruby/encoding.h (rb_enc_isctype): direct interface to ctype. * include/ruby/st.h (st_init_strcasetable): prototyped. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13556 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
534d057e58
commit
c351afc372
6 changed files with 128 additions and 12 deletions
19
ChangeLog
19
ChangeLog
|
@ -1,3 +1,22 @@
|
|||
Sat Sep 29 04:27:08 2007 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* encoding.c (rb_enc_alias): allow encodings multiple aliases.
|
||||
|
||||
* encoding.c (rb_enc_find_index): search the encoding which has the
|
||||
given name and return its index if found, or -1.
|
||||
|
||||
* st.c (type_strcasehash): case-insensitive string hash type.
|
||||
|
||||
* string.c (rb_str_force_encoding): force encoding of self. this name
|
||||
comes from [ruby-dev:31894] by Martin Duerst. [ruby-dev:31744]
|
||||
|
||||
* include/ruby/encoding.h (rb_enc_find_index, rb_enc_associate_index):
|
||||
prototyped.
|
||||
|
||||
* include/ruby/encoding.h (rb_enc_isctype): direct interface to ctype.
|
||||
|
||||
* include/ruby/st.h (st_init_strcasetable): prototyped.
|
||||
|
||||
Sat Sep 29 03:53:26 2007 Koichi Sasada <ko1@atdot.net>
|
||||
|
||||
* cont.c: Thread local storage should be fiber local.
|
||||
|
|
51
encoding.c
51
encoding.c
|
@ -23,6 +23,7 @@ struct rb_encoding_entry {
|
|||
|
||||
static struct rb_encoding_entry *enc_table;
|
||||
static int enc_table_size;
|
||||
static st_table *enc_table_alias;
|
||||
|
||||
void
|
||||
rb_enc_register(const char *name, rb_encoding *encoding)
|
||||
|
@ -42,13 +43,26 @@ rb_enc_register(const char *name, rb_encoding *encoding)
|
|||
ent->enc = encoding;
|
||||
}
|
||||
|
||||
void
|
||||
rb_enc_alias(const char *alias, const char *orig)
|
||||
{
|
||||
if (!enc_table_alias) {
|
||||
enc_table_alias = st_init_strcasetable();
|
||||
}
|
||||
st_insert(enc_table_alias, (st_data_t)alias, (st_data_t)orig);
|
||||
}
|
||||
|
||||
void
|
||||
rb_enc_init(void)
|
||||
{
|
||||
rb_enc_register("ascii", ONIG_ENCODING_ASCII);
|
||||
rb_enc_register("sjis", ONIG_ENCODING_SJIS);
|
||||
rb_enc_register("euc-jp", ONIG_ENCODING_EUC_JP);
|
||||
rb_enc_register("utf-8", ONIG_ENCODING_UTF8);
|
||||
#define ENC_REGISTER(enc) rb_enc_register(rb_enc_name(enc), enc)
|
||||
ENC_REGISTER(ONIG_ENCODING_ASCII);
|
||||
ENC_REGISTER(ONIG_ENCODING_SJIS);
|
||||
ENC_REGISTER(ONIG_ENCODING_EUC_JP);
|
||||
ENC_REGISTER(ONIG_ENCODING_UTF8);
|
||||
#undef ENC_REGISTER
|
||||
rb_enc_alias("binary", "ascii");
|
||||
rb_enc_alias("sjis", "shift_jis");
|
||||
}
|
||||
|
||||
rb_encoding *
|
||||
|
@ -63,20 +77,37 @@ rb_enc_from_index(int index)
|
|||
return enc_table[index].enc;
|
||||
}
|
||||
|
||||
rb_encoding *
|
||||
rb_enc_find(const char *name)
|
||||
int
|
||||
rb_enc_find_index(const char *name)
|
||||
{
|
||||
int i;
|
||||
st_data_t alias = 0;
|
||||
|
||||
if (!name) return -1;
|
||||
if (!enc_table) {
|
||||
rb_enc_init();
|
||||
}
|
||||
find:
|
||||
for (i=0; i<enc_table_size; i++) {
|
||||
if (strcmp(name, enc_table[i].name) == 0) {
|
||||
return enc_table[i].enc;
|
||||
if (strcasecmp(name, enc_table[i].name) == 0) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return ONIG_ENCODING_ASCII;
|
||||
if (!alias && enc_table_alias) {
|
||||
if (st_lookup(enc_table_alias, (st_data_t)name, &alias)) {
|
||||
name = (const char *)alias;
|
||||
goto find;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
rb_encoding *
|
||||
rb_enc_find(const char *name)
|
||||
{
|
||||
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(name));
|
||||
if (!enc) enc = ONIG_ENCODING_ASCII;
|
||||
return enc;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -163,7 +194,7 @@ rb_enc_get_index(VALUE obj)
|
|||
{
|
||||
int i;
|
||||
|
||||
enc_check_capable(obj);
|
||||
if (!enc_capable(obj)) return -1;
|
||||
i = ENCODING_GET(obj);
|
||||
if (i == ENCODING_INLINE_MAX) {
|
||||
VALUE iv;
|
||||
|
|
|
@ -39,8 +39,10 @@ typedef OnigEncodingType rb_encoding;
|
|||
|
||||
int rb_enc_to_index(rb_encoding*);
|
||||
int rb_enc_get_index(VALUE obj);
|
||||
int rb_enc_find_index(const char *name);
|
||||
rb_encoding* rb_enc_get(VALUE);
|
||||
rb_encoding* rb_enc_check(VALUE,VALUE);
|
||||
void rb_enc_associate_index(VALUE, int);
|
||||
void rb_enc_associate(VALUE, rb_encoding*);
|
||||
void rb_enc_copy(VALUE, VALUE);
|
||||
|
||||
|
@ -76,6 +78,7 @@ int rb_enc_codelen(int, rb_encoding*);
|
|||
/* ptr, ptr, encoding -> prev_char */
|
||||
#define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p)
|
||||
|
||||
#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
|
||||
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
|
||||
#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
|
||||
#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c)
|
||||
|
|
|
@ -71,6 +71,8 @@ st_table *st_init_numtable(void);
|
|||
st_table *st_init_numtable_with_size(int);
|
||||
st_table *st_init_strtable(void);
|
||||
st_table *st_init_strtable_with_size(int);
|
||||
st_table *st_init_strcasetable(void);
|
||||
st_table *st_init_strcasetable_with_size(int);
|
||||
int st_delete(st_table *, st_data_t *, st_data_t *);
|
||||
int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t);
|
||||
int st_insert(st_table *, st_data_t, st_data_t);
|
||||
|
|
37
st.c
37
st.c
|
@ -52,6 +52,12 @@ static const struct st_hash_type type_strhash = {
|
|||
strhash,
|
||||
};
|
||||
|
||||
static int strcasehash(const char *);
|
||||
static const struct st_hash_type type_strcasehash = {
|
||||
strcasecmp,
|
||||
strcasehash,
|
||||
};
|
||||
|
||||
static void rehash(st_table *);
|
||||
|
||||
#ifdef RUBY
|
||||
|
@ -202,6 +208,18 @@ st_init_strtable_with_size(int size)
|
|||
return st_init_table_with_size(&type_strhash, size);
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_strcasetable(void)
|
||||
{
|
||||
return st_init_table(&type_strcasehash);
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_strcasetable_with_size(int size)
|
||||
{
|
||||
return st_init_table_with_size(&type_strcasehash, size);
|
||||
}
|
||||
|
||||
void
|
||||
st_clear(st_table *table)
|
||||
{
|
||||
|
@ -814,6 +832,25 @@ strhash(register const char *string)
|
|||
return hval;
|
||||
}
|
||||
|
||||
static int
|
||||
strcasehash(register const char *string)
|
||||
{
|
||||
register unsigned int hval = FNV1_32A_INIT;
|
||||
|
||||
/*
|
||||
* FNV-1a hash each octet in the buffer
|
||||
*/
|
||||
while (*string) {
|
||||
unsigned int c = (unsigned char)*string++;
|
||||
if ((unsigned int)(c - 'A') > ('Z' - 'A')) c += 'a' - 'A';
|
||||
hval ^= c;
|
||||
|
||||
/* multiply by the 32 bit FNV magic prime mod 2^32 */
|
||||
hval *= FNV_32_PRIME;
|
||||
}
|
||||
return hval;
|
||||
}
|
||||
|
||||
int
|
||||
st_numcmp(st_data_t x, st_data_t y)
|
||||
{
|
||||
|
|
28
string.c
28
string.c
|
@ -228,7 +228,7 @@ rb_tainted_str_new2(const char *ptr)
|
|||
}
|
||||
|
||||
static VALUE
|
||||
str_new3(VALUE klass, VALUE str)
|
||||
str_new_shared(VALUE klass, VALUE str)
|
||||
{
|
||||
VALUE str2 = str_alloc(klass);
|
||||
|
||||
|
@ -244,11 +244,19 @@ str_new3(VALUE klass, VALUE str)
|
|||
RSTRING(str2)->as.heap.aux.shared = str;
|
||||
FL_SET(str2, ELTS_SHARED);
|
||||
}
|
||||
rb_enc_copy((VALUE)str2, str);
|
||||
|
||||
return str2;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
str_new3(VALUE klass, VALUE str)
|
||||
{
|
||||
VALUE str2 = str_new_shared(klass, str);
|
||||
|
||||
rb_enc_copy(str2, str);
|
||||
return str2;
|
||||
}
|
||||
|
||||
VALUE
|
||||
rb_str_new3(VALUE str)
|
||||
{
|
||||
|
@ -5108,6 +5116,21 @@ str_encoding(VALUE str)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* str.force_encoding(encoding) => str
|
||||
*
|
||||
* Changes the encoding to +encoding+ and returns self.
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_str_force_encoding(VALUE str, VALUE encname)
|
||||
{
|
||||
str_modifiable(str);
|
||||
rb_enc_associate(str, rb_enc_find(StringValueCStr(encname)));
|
||||
return str;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* Document-class: Symbol
|
||||
*
|
||||
|
@ -5519,6 +5542,7 @@ Init_String(void)
|
|||
rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
|
||||
|
||||
rb_define_method(rb_cString, "encoding", str_encoding, 0);
|
||||
rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
|
||||
|
||||
id_to_s = rb_intern("to_s");
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue