mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
hash: keep fstrings of tainted strings for string keys
The same hash keys may be loaded from tainted data sources frequently (e.g. parsing headers from socket or loading YAML data from a file). If a non-tainted fstring already exists (because the application expects the hash key), cache and deduplicate the tainted version in the new tainted_frozen_strings table. For non-embedded strings, this also allows sharing with the underlying malloc-ed data. * vm_core.h (rb_vm_struct): add tainted_frozen_strings * vm.c (ruby_vm_destruct): free tainted_frozen_strings (Init_vm_objects): initialize tainted_frozen_strings (rb_vm_tfstring_table): accessor for tainted_frozen_strings * internal.h: declare rb_fstring_existing, rb_vm_tfstring_table * hash.c (fstring_existing_str): remove (moved to string.c) (hash_aset_str): use rb_fstring_existing * string.c (rb_fstring_existing): new, based on fstring_existing_str (tainted_fstr_update): new (rb_fstring_existing0): new, based on fstring_existing_str (rb_tainted_fstring_existing): new, special case for tainted strings (rb_str_free): delete from tainted_frozen_strings table * test/ruby/test_optimization.rb (test_hash_reuse_fstring): new test [ruby-core:82012] [Bug #13737] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59354 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
ed831a6c5b
commit
d04c085b3c
6 changed files with 141 additions and 23 deletions
23
hash.c
23
hash.c
|
@ -18,7 +18,6 @@
|
|||
#include "probes.h"
|
||||
#include "id.h"
|
||||
#include "symbol.h"
|
||||
#include "gc.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
# ifdef HAVE_CRT_EXTERNS_H
|
||||
|
@ -1516,33 +1515,13 @@ hash_aset(st_data_t *key, st_data_t *val, struct update_arg *arg, int existing)
|
|||
return ST_CONTINUE;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
fstring_existing_str(VALUE str)
|
||||
{
|
||||
st_data_t fstr;
|
||||
st_table *tbl = rb_vm_fstring_table();
|
||||
|
||||
if (st_lookup(tbl, str, &fstr)) {
|
||||
if (rb_objspace_garbage_object_p(fstr)) {
|
||||
return rb_fstring(str);
|
||||
}
|
||||
else {
|
||||
return (VALUE)fstr;
|
||||
}
|
||||
}
|
||||
else {
|
||||
return Qnil;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hash_aset_str(st_data_t *key, st_data_t *val, struct update_arg *arg, int existing)
|
||||
{
|
||||
if (!existing && !RB_OBJ_FROZEN(*key)) {
|
||||
VALUE k;
|
||||
|
||||
if (!RB_OBJ_TAINTED(*key) &&
|
||||
(k = fstring_existing_str(*key)) != Qnil) {
|
||||
if ((k = rb_fstring_existing(*key)) != Qnil) {
|
||||
*key = k;
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -1574,6 +1574,7 @@ VALUE rb_strftime(const char *format, size_t format_len, rb_encoding *enc,
|
|||
/* string.c */
|
||||
VALUE rb_fstring(VALUE);
|
||||
VALUE rb_fstring_new(const char *ptr, long len);
|
||||
VALUE rb_fstring_existing(VALUE);
|
||||
#define rb_fstring_lit(str) rb_fstring_new((str), rb_strlen_lit(str))
|
||||
#define rb_fstring_literal(str) rb_fstring_lit(str)
|
||||
VALUE rb_fstring_cstr(const char *str);
|
||||
|
@ -1726,7 +1727,7 @@ void rb_vm_check_redefinition_by_prepend(VALUE klass);
|
|||
VALUE rb_yield_refine_block(VALUE refinement, VALUE refinements);
|
||||
VALUE ruby_vm_special_exception_copy(VALUE);
|
||||
PUREFUNC(st_table *rb_vm_fstring_table(void));
|
||||
|
||||
PUREFUNC(st_table *rb_vm_tfstring_table(void));
|
||||
|
||||
/* vm_dump.c */
|
||||
void rb_print_backtrace(void);
|
||||
|
|
94
string.c
94
string.c
|
@ -349,6 +349,99 @@ register_fstring(VALUE str)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
tainted_fstr_update(st_data_t *key, st_data_t *val, st_data_t arg, int existing)
|
||||
{
|
||||
VALUE *fstr = (VALUE *)arg;
|
||||
VALUE str = (VALUE)*key;
|
||||
|
||||
if (existing) {
|
||||
/* because of lazy sweep, str may be unmarked already and swept
|
||||
* at next time */
|
||||
if (rb_objspace_garbage_object_p(str)) {
|
||||
*fstr = Qundef;
|
||||
return ST_DELETE;
|
||||
}
|
||||
|
||||
*fstr = str;
|
||||
return ST_STOP;
|
||||
}
|
||||
else {
|
||||
str = rb_str_resurrect(str);
|
||||
RB_OBJ_TAINT_RAW(str);
|
||||
RB_FL_SET_RAW(str, RSTRING_FSTR);
|
||||
RB_OBJ_FREEZE_RAW(str);
|
||||
|
||||
*key = *val = *fstr = str;
|
||||
return ST_CONTINUE;
|
||||
}
|
||||
}
|
||||
|
||||
static VALUE
|
||||
rb_fstring_existing0(VALUE str)
|
||||
{
|
||||
st_table *frozen_strings = rb_vm_fstring_table();
|
||||
st_data_t fstr;
|
||||
|
||||
if (st_lookup(frozen_strings, str, &fstr)) {
|
||||
if (rb_objspace_garbage_object_p(fstr)) {
|
||||
return register_fstring(str);
|
||||
}
|
||||
else {
|
||||
return (VALUE)fstr;
|
||||
}
|
||||
}
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
rb_tainted_fstring_existing(VALUE str)
|
||||
{
|
||||
VALUE ret;
|
||||
st_data_t fstr;
|
||||
st_table *tfstrings = rb_vm_tfstring_table();
|
||||
|
||||
if (st_lookup(tfstrings, str, &fstr)) {
|
||||
ret = (VALUE)fstr;
|
||||
if (!rb_objspace_garbage_object_p(ret)) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ret = rb_fstring_existing0(str);
|
||||
if (NIL_P(ret)) {
|
||||
return Qnil;
|
||||
}
|
||||
if (!RB_FL_TEST_RAW(ret, RSTRING_FSTR)) {
|
||||
return Qnil;
|
||||
}
|
||||
do {
|
||||
fstr = (st_data_t)ret;
|
||||
st_update(tfstrings, fstr, tainted_fstr_update, (st_data_t)&fstr);
|
||||
} while ((VALUE)fstr == Qundef);
|
||||
|
||||
ret = (VALUE)fstr;
|
||||
assert(OBJ_FROZEN_RAW(ret));
|
||||
assert(!FL_TEST_RAW(ret, STR_FAKESTR));
|
||||
assert(!FL_TEST_RAW(ret, FL_EXIVAR));
|
||||
assert(FL_TEST_RAW(ret, RSTRING_FSTR));
|
||||
assert(FL_TEST_RAW(ret, FL_TAINT));
|
||||
assert(RBASIC_CLASS(ret) == rb_cString);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
VALUE
|
||||
rb_fstring_existing(VALUE str)
|
||||
{
|
||||
if (FL_TEST_RAW(str, RSTRING_FSTR))
|
||||
return str;
|
||||
|
||||
if (!RB_OBJ_TAINTED_RAW(str))
|
||||
return rb_fstring_existing0(str);
|
||||
|
||||
return rb_tainted_fstring_existing(str);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
setup_fake_str(struct RString *fake_str, const char *name, long len, int encidx)
|
||||
{
|
||||
|
@ -1311,6 +1404,7 @@ rb_str_free(VALUE str)
|
|||
if (FL_TEST(str, RSTRING_FSTR)) {
|
||||
st_data_t fstr = (st_data_t)str;
|
||||
st_delete(rb_vm_fstring_table(), &fstr, NULL);
|
||||
st_delete(rb_vm_tfstring_table(), &fstr, NULL);
|
||||
RB_DEBUG_COUNTER_INC(obj_str_fstr);
|
||||
}
|
||||
|
||||
|
|
|
@ -181,6 +181,37 @@ class TestRubyOptimization < Test::Unit::TestCase
|
|||
assert_redefine_method('Hash', 'empty?', 'assert_nil({}.empty?); assert_nil({1=>1}.empty?)')
|
||||
end
|
||||
|
||||
def test_hash_reuse_fstring
|
||||
embed = -'h e l l o'
|
||||
shared = -'this string is too long to be embedded and should be shared'
|
||||
assert_not_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE],
|
||||
ObjectSpace.memsize_of(shared),
|
||||
'update this test if string embedding changed'
|
||||
|
||||
[ embed, shared ].each do |exp|
|
||||
key = exp.split(' ').join(' ')
|
||||
h = {}
|
||||
h[key] = true
|
||||
assert_not_same key, h.keys[0]
|
||||
assert_predicate h.keys[0], :frozen?
|
||||
assert_same exp, h.keys[0]
|
||||
|
||||
h1 = {}
|
||||
h2 = {}
|
||||
key.taint
|
||||
h1[key] = true
|
||||
h2[key] = true
|
||||
k1 = h1.keys[0]
|
||||
k2 = h2.keys[0]
|
||||
assert_same k1, k2
|
||||
assert_predicate k1, :tainted?
|
||||
|
||||
assert_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE],
|
||||
ObjectSpace.memsize_of(k1),
|
||||
'tainted string should share with untainted fstring'
|
||||
end
|
||||
end
|
||||
|
||||
def test_hash_aref_with
|
||||
h = { "foo" => 1 }
|
||||
assert_equal 1, h["foo"]
|
||||
|
|
12
vm.c
12
vm.c
|
@ -2206,6 +2206,10 @@ ruby_vm_destruct(rb_vm_t *vm)
|
|||
st_free_table(vm->frozen_strings);
|
||||
vm->frozen_strings = 0;
|
||||
}
|
||||
if (vm->tainted_frozen_strings) {
|
||||
st_free_table(vm->tainted_frozen_strings);
|
||||
vm->tainted_frozen_strings = 0;
|
||||
}
|
||||
rb_vm_gvl_destroy(vm);
|
||||
if (objspace) {
|
||||
rb_objspace_free(objspace);
|
||||
|
@ -3142,6 +3146,8 @@ Init_vm_objects(void)
|
|||
vm->mark_object_ary = rb_ary_tmp_new(128);
|
||||
vm->loading_table = st_init_strtable();
|
||||
vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 1000);
|
||||
vm->tainted_frozen_strings =
|
||||
st_init_table_with_size(&rb_fstring_hash_type, 1000);
|
||||
}
|
||||
|
||||
/* top self */
|
||||
|
@ -3203,6 +3209,12 @@ rb_vm_fstring_table(void)
|
|||
return GET_VM()->frozen_strings;
|
||||
}
|
||||
|
||||
st_table *
|
||||
rb_vm_tfstring_table(void)
|
||||
{
|
||||
return GET_VM()->tainted_frozen_strings;
|
||||
}
|
||||
|
||||
#if VM_COLLECT_USAGE_DETAILS
|
||||
|
||||
#define HASH_ASET(h, k, v) rb_hash_aset((h), (st_data_t)(k), (st_data_t)(v))
|
||||
|
|
|
@ -574,6 +574,7 @@ typedef struct rb_vm_struct {
|
|||
|
||||
VALUE *defined_strings;
|
||||
st_table *frozen_strings;
|
||||
st_table *tainted_frozen_strings;
|
||||
|
||||
/* params */
|
||||
struct { /* size in byte */
|
||||
|
|
Loading…
Add table
Reference in a new issue