1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Import StringScanner 1.0.3 (#2553)

This commit is contained in:
Sutou Kouhei 2019-10-14 12:40:50 +09:00 committed by GitHub
parent 6fa3492362
commit 95c420c4a6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
Notes: git 2019-10-14 12:41:16 +09:00
Merged-By: kou <kou@clear-code.com>
7 changed files with 451 additions and 142 deletions

5
NEWS
View file

@ -416,6 +416,11 @@ RubyGems::
* Upgrade to RubyGems 3.1.0.pre1
Bundled from https://github.com/rubygems/rubygems/commit/97b264f0fa248c864b6ee9a23d3ff1cdd217dddb
StringScanner::
* Upgrade to 1.0.3.
See https://github.com/ruby/strscan/blob/master/NEWS.md.
=== Compatibility issues (excluding feature bug fixes)
* Removed unmaintained libraries.

View file

@ -1,4 +1,5 @@
# frozen_string_literal: true
require 'mkmf'
$INCFLAGS << " -I$(top_srcdir)"
$INCFLAGS << " -I$(top_srcdir)" if $extmk
have_func("onig_region_memsize", "ruby.h")
create_makefile 'strscan'

View file

@ -11,9 +11,18 @@
#include "ruby/ruby.h"
#include "ruby/re.h"
#include "ruby/encoding.h"
#include "regint.h"
#define STRSCAN_VERSION "0.7.0"
#ifdef RUBY_EXTCONF_H
# include RUBY_EXTCONF_H
#endif
#ifdef HAVE_ONIG_REGION_MEMSIZE
extern size_t onig_region_memsize(const struct re_registers *regs);
#endif
#include <stdbool.h>
#define STRSCAN_VERSION "1.0.3"
/* =======================================================================
Data Type Definitions
@ -41,6 +50,9 @@ struct strscanner
/* regexp used for last scan */
VALUE regex;
/* anchor mode */
bool fixed_anchor_p;
};
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
@ -186,7 +198,11 @@ static size_t
strscan_memsize(const void *ptr)
{
const struct strscanner *p = ptr;
return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
size_t size = sizeof(*p) - sizeof(p->regs);
#ifdef HAVE_ONIG_REGION_MEMSIZE
size += onig_region_memsize(&p->regs);
#endif
return size;
}
static const rb_data_type_t strscanner_type = {
@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass)
}
/*
* call-seq: StringScanner.new(string, dup = false)
* call-seq:
* StringScanner.new(string, fixed_anchor: false)
* StringScanner.new(string, dup = false)
*
* Creates a new StringScanner object to scan over the given +string+.
*
* If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
* the string. Otherwise, +\A+ always matches the current position.
*
* +dup+ argument is obsolete and not used now.
*/
static VALUE
strscan_initialize(int argc, VALUE *argv, VALUE self)
{
struct strscanner *p;
VALUE str, need_dup;
VALUE str, options;
p = check_strscan(self);
rb_scan_args(argc, argv, "11", &str, &need_dup);
rb_scan_args(argc, argv, "11", &str, &options);
options = rb_check_hash_type(options);
if (!NIL_P(options)) {
VALUE fixed_anchor;
ID keyword_ids[1];
keyword_ids[0] = rb_intern("fixed_anchor");
rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
if (fixed_anchor == Qundef) {
p->fixed_anchor_p = false;
}
else {
p->fixed_anchor_p = RTEST(fixed_anchor);
}
}
else {
p->fixed_anchor_p = false;
}
StringValue(str);
p->str = str;
@ -294,7 +332,7 @@ strscan_reset(VALUE self)
* terminate
* clear
*
* Set the scan pointer to the end of the string and clear matching data.
* Sets the scan pointer to the end of the string and clear matching data.
*/
static VALUE
strscan_terminate(VALUE self)
@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self)
/*
* call-seq: pos=(n)
*
* Set the byte position of the scan pointer.
* Sets the byte position of the scan pointer.
*
* s = StringScanner.new('test string')
* s.pos = 7 # -> 7
@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v)
return INT2NUM(i);
}
static VALUE
strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
static inline UChar *
match_target(struct strscanner *p)
{
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
struct strscanner *p;
regex_t *re;
long ret;
int tmpreg;
if (p->fixed_anchor_p) {
return (UChar *)S_PBEG(p);
}
else
{
return (UChar *)CURPTR(p);
}
}
Check_Type(regex, T_REGEXP);
static inline void
set_registers(struct strscanner *p, size_t length)
{
onig_region_clear(&(p->regs));
if (p->fixed_anchor_p) {
onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
}
else
{
onig_region_set(&(p->regs), 0, 0, length);
}
}
static inline void
succ(struct strscanner *p)
{
if (p->fixed_anchor_p) {
p->curr = p->regs.end[0];
}
else
{
p->curr += p->regs.end[0];
}
}
static inline long
last_match_length(struct strscanner *p)
{
if (p->fixed_anchor_p) {
return p->regs.end[0] - p->prev;
}
else
{
return p->regs.end[0];
}
}
static inline long
adjust_register_position(struct strscanner *p, long position)
{
if (p->fixed_anchor_p) {
return position;
}
else {
return p->prev + position;
}
}
static VALUE
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
{
struct strscanner *p;
if (headonly) {
if (!RB_TYPE_P(pattern, T_REGEXP)) {
StringValue(pattern);
}
}
else {
Check_Type(pattern, T_REGEXP);
}
GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);
@ -463,30 +564,42 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
return Qnil;
}
p->regex = regex;
re = rb_reg_prepare_re(regex, p->str);
tmpreg = re != RREGEXP_PTR(regex);
if (!tmpreg) RREGEXP(regex)->usecnt++;
if (RB_TYPE_P(pattern, T_REGEXP)) {
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
regex_t *re;
long ret;
int tmpreg;
p->regex = pattern;
re = rb_reg_prepare_re(pattern, p->str);
tmpreg = re != RREGEXP_PTR(pattern);
if (!tmpreg) RREGEXP(pattern)->usecnt++;
if (headonly) {
ret = onig_match(re, (UChar* )CURPTR(p),
ret = onig_match(re,
match_target(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
(UChar* )CURPTR(p),
&(p->regs),
ONIG_OPTION_NONE);
}
else {
ret = onig_search(re,
(UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
&(p->regs), ONIG_OPTION_NONE);
match_target(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
&(p->regs),
ONIG_OPTION_NONE);
}
if (!tmpreg) RREGEXP(regex)->usecnt--;
if (!tmpreg) RREGEXP(pattern)->usecnt--;
if (tmpreg) {
if (RREGEXP(regex)->usecnt) {
if (RREGEXP(pattern)->usecnt) {
onig_free(re);
}
else {
onig_free(RREGEXP_PTR(regex));
RREGEXP_PTR(regex) = re;
onig_free(RREGEXP_PTR(pattern));
RREGEXP_PTR(pattern) = re;
}
}
@ -495,17 +608,32 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
/* not matched */
return Qnil;
}
}
else {
rb_enc_check(p->str, pattern);
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
return Qnil;
}
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
return Qnil;
}
set_registers(p, RSTRING_LEN(pattern));
}
MATCHED(p);
p->prev = p->curr;
if (succptr) {
p->curr += p->regs.end[0];
succ(p);
}
{
const long length = last_match_length(p);
if (getstr) {
return extract_beg_len(p, p->prev, p->regs.end[0]);
return extract_beg_len(p, p->prev, length);
}
else {
return INT2FIX(p->regs.end[0]);
return INT2FIX(length);
}
}
}
@ -520,7 +648,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
* p s.scan(/\w+/) # -> "test"
* p s.scan(/\w+/) # -> nil
* p s.scan(/\s+/) # -> " "
* p s.scan(/\w+/) # -> "string"
* p s.scan("str") # -> "str"
* p s.scan(/\w+/) # -> "ing"
* p s.scan(/./) # -> nil
*
*/
@ -539,6 +668,7 @@ strscan_scan(VALUE self, VALUE re)
* s = StringScanner.new('test string')
* p s.match?(/\w+/) # -> 4
* p s.match?(/\w+/) # -> 4
* p s.match?("test") # -> 4
* p s.match?(/\s+/) # -> nil
*/
static VALUE
@ -560,7 +690,8 @@ strscan_match_p(VALUE self, VALUE re)
* p s.skip(/\w+/) # -> 4
* p s.skip(/\w+/) # -> nil
* p s.skip(/\s+/) # -> 1
* p s.skip(/\w+/) # -> 6
* p s.skip("st") # -> 2
* p s.skip(/\w+/) # -> 4
* p s.skip(/./) # -> nil
*
*/
@ -704,8 +835,13 @@ static void
adjust_registers_to_matched(struct strscanner *p)
{
onig_region_clear(&(p->regs));
if (p->fixed_anchor_p) {
onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
}
else {
onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
}
}
/*
* Scans one character and returns it.
@ -738,8 +874,9 @@ strscan_getch(VALUE self)
p->curr += len;
MATCHED(p);
adjust_registers_to_matched(p);
return extract_range(p, p->prev + p->regs.beg[0],
p->prev + p->regs.end[0]);
return extract_range(p,
adjust_register_position(p, p->regs.beg[0]),
adjust_register_position(p, p->regs.end[0]));
}
/*
@ -772,8 +909,9 @@ strscan_get_byte(VALUE self)
p->curr++;
MATCHED(p);
adjust_registers_to_matched(p);
return extract_range(p, p->prev + p->regs.beg[0],
p->prev + p->regs.end[0]);
return extract_range(p,
adjust_register_position(p, p->regs.beg[0]),
adjust_register_position(p, p->regs.end[0]));
}
/*
@ -826,7 +964,7 @@ strscan_peep(VALUE self, VALUE vlen)
}
/*
* Set the scan pointer to the previous position. Only one previous position is
* Sets the scan pointer to the previous position. Only one previous position is
* remembered, and it changes with each scanning operation.
*
* s = StringScanner.new('test string')
@ -951,8 +1089,9 @@ strscan_matched(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
return extract_range(p, p->prev + p->regs.beg[0],
p->prev + p->regs.end[0]);
return extract_range(p,
adjust_register_position(p, p->regs.beg[0]),
adjust_register_position(p, p->regs.end[0]));
}
/*
@ -1048,8 +1187,9 @@ strscan_aref(VALUE self, VALUE idx)
if (i >= p->regs.num_regs) return Qnil;
if (p->regs.beg[i] == -1) return Qnil;
return extract_range(p, p->prev + p->regs.beg[i],
p->prev + p->regs.end[i]);
return extract_range(p,
adjust_register_position(p, p->regs.beg[i]),
adjust_register_position(p, p->regs.end[i]));
}
/*
@ -1098,8 +1238,9 @@ strscan_captures(VALUE self)
new_ary = rb_ary_new2(num_regs);
for (i = 1; i < num_regs; i++) {
VALUE str = extract_range(p, p->prev + p->regs.beg[i],
p->prev + p->regs.end[i]);
VALUE str = extract_range(p,
adjust_register_position(p, p->regs.beg[i]),
adjust_register_position(p, p->regs.end[i]));
rb_ary_push(new_ary, str);
}
@ -1154,7 +1295,9 @@ strscan_pre_match(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
return extract_range(p, 0, p->prev + p->regs.beg[0]);
return extract_range(p,
0,
adjust_register_position(p, p->regs.beg[0]));
}
/*
@ -1173,7 +1316,9 @@ strscan_post_match(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
return extract_range(p,
adjust_register_position(p, p->regs.end[0]),
S_LEN(p));
}
/*
@ -1302,6 +1447,23 @@ inspect2(struct strscanner *p)
return rb_str_dump(str);
}
/*
* call-seq:
* scanner.fixed_anchor? -> true or false
*
* Whether +scanner+ uses fixed anchor mode or not.
*
* If fixed anchor mode is used, +\A+ always matches the beginning of
* the string. Otherwise, +\A+ always matches the current position.
*/
static VALUE
strscan_fixed_anchor_p(VALUE self)
{
struct strscanner *p;
p = check_strscan(self);
return p->fixed_anchor_p ? Qtrue : Qfalse;
}
/* =======================================================================
Ruby Interface
======================================================================= */
@ -1488,4 +1650,6 @@ Init_strscan(void)
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
}

View file

@ -1,19 +1,20 @@
# frozen_string_literal: true
Gem::Specification.new do |s|
s.name = "strscan"
s.version = '1.0.0'
s.version = '1.0.3'
s.summary = "Provides lexical scanning operations on a String."
s.description = "Provides lexical scanning operations on a String."
s.require_path = %w{lib}
s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c ext/strscan/regenc.h ext/strscan/regint.h}
s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c}
s.extensions = %w{ext/strscan/extconf.rb}
s.required_ruby_version = ">= 2.4.0"
s.authors = ["Minero Aoki"]
s.email = [nil]
s.authors = ["Minero Aoki", "Sutou Kouhei"]
s.email = [nil, "kou@cozmixng.org"]
s.homepage = "https://github.com/ruby/strscan"
s.license = "BSD-2-Clause"
s.add_development_dependency "rake-compiler"
s.add_development_dependency "benchmark-driver"
end

View file

@ -12,7 +12,7 @@ describe "StringScanner#dup" do
s.string.should == @string
end
it "copies the passed StringSCanner's position to self" do
it "copies the passed StringScanner's position to self" do
@orig_s.pos = 5
s = @orig_s.dup
s.pos.should eql(5)

View file

@ -50,10 +50,48 @@ describe "StringScanner#scan" do
@s.scan(/./).should be_nil
end
it "raises a TypeError if pattern isn't a Regexp" do
ruby_version_is ""..."2.7" do
it "raises a TypeError if pattern is a String" do
-> { @s.scan("aoeu") }.should raise_error(TypeError)
end
end
ruby_version_is "2.7" do
it "treats String as the pattern itself" do
@s.scan("this").should be_nil
@s.scan("This").should == "This"
end
end
it "raises a TypeError if pattern isn't a Regexp nor String" do
-> { @s.scan(5) }.should raise_error(TypeError)
-> { @s.scan(:test) }.should raise_error(TypeError)
-> { @s.scan(mock('x')) }.should raise_error(TypeError)
end
end
describe "StringScanner#scan with fixed_anchor: true" do
before :each do
@s = StringScanner.new("This\nis\na\ntest", fixed_anchor: true)
end
ruby_version_is "2.7" do
it "returns the matched string" do
@s.scan(/\w+/).should == "This"
@s.scan(/.../m).should == "\nis"
@s.scan(//).should == ""
@s.scan(/\s+/).should == "\n"
end
it "treats ^ as matching from the beginning of line" do
@s.scan(/\w+\n/).should == "This\n"
@s.scan(/^\w/).should == "i"
@s.scan(/^\w/).should be_nil
end
it "treats \\A as matching from the beginning of string" do
@s.scan(/\A\w/).should == "T"
@s.scan(/\A\w/).should be_nil
end
end
end

View file

@ -8,15 +8,19 @@ require 'strscan'
require 'test/unit'
class TestStringScanner < Test::Unit::TestCase
def create_string_scanner(string, *args)
StringScanner.new(string, *args)
end
def test_s_new
s = StringScanner.new('test string')
s = create_string_scanner('test string')
assert_instance_of StringScanner, s
assert_equal false, s.eos?
assert_equal false, s.tainted?
str = 'test string'.dup
str.taint
s = StringScanner.new(str, false)
s = create_string_scanner(str, false)
assert_instance_of StringScanner, s
assert_equal false, s.eos?
assert_same str, s.string
@ -24,7 +28,7 @@ class TestStringScanner < Test::Unit::TestCase
str = 'test string'.dup
str.taint
s = StringScanner.new(str)
s = create_string_scanner(str)
assert_equal true, s.string.tainted?
end
@ -48,7 +52,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_dup
s = StringScanner.new('test string')
s = create_string_scanner('test string')
d = s.dup
assert_equal s.inspect, d.inspect
assert_equal s.string, d.string
@ -56,7 +60,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal s.matched?, d.matched?
assert_equal s.eos?, d.eos?
s = StringScanner.new('test string')
s = create_string_scanner('test string')
s.scan(/test/)
d = s.dup
assert_equal s.inspect, d.inspect
@ -65,7 +69,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal s.matched?, d.matched?
assert_equal s.eos?, d.eos?
s = StringScanner.new('test string')
s = create_string_scanner('test string')
s.scan(/test/)
s.scan(/NOT MATCH/)
d = s.dup
@ -75,7 +79,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal s.matched?, d.matched?
assert_equal s.eos?, d.eos?
s = StringScanner.new('test string')
s = create_string_scanner('test string')
s.terminate
d = s.dup
assert_equal s.inspect, d.inspect
@ -98,7 +102,7 @@ class TestStringScanner < Test::Unit::TestCase
def test_inspect
str = 'test string'.dup
str.taint
s = StringScanner.new(str, false)
s = create_string_scanner(str, false)
assert_instance_of String, s.inspect
assert_equal s.inspect, s.inspect
assert_equal '#<StringScanner 0/11 @ "test ...">', s.inspect.sub(/StringScanner_C/, 'StringScanner')
@ -106,12 +110,12 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal '#<StringScanner 1/11 "t" @ "est s...">', s.inspect.sub(/StringScanner_C/, 'StringScanner')
assert_equal true, s.inspect.tainted?
s = StringScanner.new("\n")
s = create_string_scanner("\n")
assert_equal '#<StringScanner 0/1 @ "\n">', s.inspect
end
def test_eos?
s = StringScanner.new('test string')
s = create_string_scanner('test string')
assert_equal false, s.eos?
assert_equal false, s.eos?
s.scan(/\w+/)
@ -124,14 +128,14 @@ class TestStringScanner < Test::Unit::TestCase
s.scan(/\w+/)
assert_equal true, s.eos?
s = StringScanner.new('test'.dup)
s = create_string_scanner('test'.dup)
s.scan(/te/)
s.string.replace ''
assert_equal true, s.eos?
end
def test_bol?
s = StringScanner.new("a\nbbb\n\ncccc\nddd\r\neee")
s = create_string_scanner("a\nbbb\n\ncccc\nddd\r\neee")
assert_equal true, s.bol?
assert_equal true, s.bol?
s.scan(/a/)
@ -161,7 +165,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_string
s = StringScanner.new('test')
s = create_string_scanner('test')
assert_equal 'test', s.string
s.string = 'a'
assert_equal 'a', s.string
@ -173,7 +177,7 @@ class TestStringScanner < Test::Unit::TestCase
def test_string_set_is_equal
name = 'tenderlove'
s = StringScanner.new(name)
s = create_string_scanner(name)
assert_equal name.object_id, s.string.object_id
s.string = name
@ -181,7 +185,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_string_append
s = StringScanner.new('tender'.dup)
s = create_string_scanner('tender'.dup)
s << 'love'
assert_equal 'tenderlove', s.string
@ -191,7 +195,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_pos
s = StringScanner.new('test string')
s = create_string_scanner('test string')
assert_equal 0, s.pos
s.get_byte
assert_equal 1, s.pos
@ -202,7 +206,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_pos_unicode
s = StringScanner.new("abcädeföghi")
s = create_string_scanner("abcädeföghi")
assert_equal 0, s.charpos
assert_equal "abcä", s.scan_until(/ä/)
assert_equal 4, s.charpos
@ -213,7 +217,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_concat
s = StringScanner.new('a'.dup)
s = create_string_scanner('a'.dup)
s.scan(/a/)
s.concat 'b'
assert_equal false, s.eos?
@ -226,7 +230,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_scan
s = StringScanner.new('stra strb strc', true)
s = create_string_scanner('stra strb strc', true)
tmp = s.scan(/\w+/)
assert_equal 'stra', tmp
assert_equal false, tmp.tainted?
@ -248,7 +252,7 @@ class TestStringScanner < Test::Unit::TestCase
str = 'stra strb strc'.dup
str.taint
s = StringScanner.new(str, false)
s = create_string_scanner(str, false)
tmp = s.scan(/\w+/)
assert_equal 'stra', tmp
assert_equal true, tmp.tainted?
@ -267,7 +271,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_nil s.scan(/\w+/)
assert_nil s.scan(/\w+/)
s = StringScanner.new('test'.dup)
s = create_string_scanner('test'.dup)
s.scan(/te/)
# This assumes #string does not duplicate string,
# but it is implementation specific issue.
@ -277,13 +281,29 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal nil, s.scan(/test/)
# [ruby-bugs:4361]
s = StringScanner.new("")
s = create_string_scanner("")
assert_equal "", s.scan(//)
assert_equal "", s.scan(//)
end
def test_scan_string
s = create_string_scanner('stra strb strc')
assert_equal 'str', s.scan('str')
assert_equal 'str', s[0]
assert_equal 3, s.pos
assert_equal false, s.tainted?
assert_equal 'a ', s.scan('a ')
str = 'stra strb strc'.dup
str.taint
s = create_string_scanner(str, false)
matched = s.scan('str')
assert_equal 'str', matched
assert_equal true, matched.tainted?
end
def test_skip
s = StringScanner.new('stra strb strc', true)
s = create_string_scanner('stra strb strc', true)
assert_equal 4, s.skip(/\w+/)
assert_equal 1, s.skip(/\s+/)
assert_equal 4, s.skip(/\w+/)
@ -293,19 +313,32 @@ class TestStringScanner < Test::Unit::TestCase
assert_nil s.skip(/\s+/)
assert_equal true, s.eos?
s = StringScanner.new('test'.dup)
s = create_string_scanner('test'.dup)
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.skip(/./)
# [ruby-bugs:4361]
s = StringScanner.new("")
s = create_string_scanner("")
assert_equal 0, s.skip(//)
assert_equal 0, s.skip(//)
end
def test_skip_with_begenning_of_string_anchor_match
s = create_string_scanner("a\nb")
assert_equal 2, s.skip(/a\n/)
assert_equal 1, s.skip(/\Ab/)
end
def test_skip_with_begenning_of_line_anchor_match
s = create_string_scanner("a\nbc")
assert_equal 2, s.skip(/a\n/)
assert_equal 1, s.skip(/^b/)
assert_equal 1, s.skip(/^c/)
end
def test_getch
s = StringScanner.new('abcde')
s = create_string_scanner('abcde')
assert_equal 'a', s.getch
assert_equal 'b', s.getch
assert_equal 'c', s.getch
@ -315,24 +348,24 @@ class TestStringScanner < Test::Unit::TestCase
str = 'abc'.dup
str.taint
s = StringScanner.new(str)
s = create_string_scanner(str)
assert_equal true, s.getch.tainted?
assert_equal true, s.getch.tainted?
assert_equal true, s.getch.tainted?
assert_nil s.getch
s = StringScanner.new("\244\242".dup.force_encoding("euc-jp"))
s = create_string_scanner("\244\242".dup.force_encoding("euc-jp"))
assert_equal "\244\242".dup.force_encoding("euc-jp"), s.getch
assert_nil s.getch
s = StringScanner.new('test'.dup)
s = create_string_scanner('test'.dup)
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.getch
end
def test_get_byte
s = StringScanner.new('abcde')
s = create_string_scanner('abcde')
assert_equal 'a', s.get_byte
assert_equal 'b', s.get_byte
assert_equal 'c', s.get_byte
@ -343,32 +376,34 @@ class TestStringScanner < Test::Unit::TestCase
str = 'abc'.dup
str.taint
s = StringScanner.new(str)
s = create_string_scanner(str)
assert_equal true, s.get_byte.tainted?
assert_equal true, s.get_byte.tainted?
assert_equal true, s.get_byte.tainted?
assert_nil s.get_byte
s = StringScanner.new("\244\242".dup.force_encoding("euc-jp"))
s = create_string_scanner("\244\242".dup.force_encoding("euc-jp"))
assert_equal "\244".dup.force_encoding("euc-jp"), s.get_byte
assert_equal "\242".dup.force_encoding("euc-jp"), s.get_byte
assert_nil s.get_byte
s = StringScanner.new('test'.dup)
s = create_string_scanner('test'.dup)
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.get_byte
end
def test_matched
s = StringScanner.new('stra strb strc')
s = create_string_scanner('stra strb strc')
s.scan(/\w+/)
assert_equal 'stra', s.matched
assert_equal false, s.matched.tainted?
s.scan(/\s+/)
assert_equal ' ', s.matched
s.scan('st')
assert_equal 'st', s.matched
s.scan(/\w+/)
assert_equal 'strb', s.matched
assert_equal 'rb', s.matched
s.scan(/\s+/)
assert_equal ' ', s.matched
s.scan(/\w+/)
@ -378,7 +413,7 @@ class TestStringScanner < Test::Unit::TestCase
s.getch
assert_nil s.matched
s = StringScanner.new('stra strb strc')
s = create_string_scanner('stra strb strc')
s.getch
assert_equal 's', s.matched
assert_equal false, s.matched.tainted?
@ -389,14 +424,14 @@ class TestStringScanner < Test::Unit::TestCase
str = 'test'.dup
str.taint
s = StringScanner.new(str)
s = create_string_scanner(str)
s.scan(/\w+/)
assert_equal true, s.matched.tainted?
assert_equal true, s.matched.tainted?
end
def test_AREF
s = StringScanner.new('stra strb strc')
s = create_string_scanner('stra strb strc')
s.scan(/\w+/)
assert_nil s[-2]
@ -447,13 +482,13 @@ class TestStringScanner < Test::Unit::TestCase
assert_nil s[0]
s = StringScanner.new("\244\242".dup.force_encoding("euc-jp"))
s = create_string_scanner("\244\242".dup.force_encoding("euc-jp"))
s.getch
assert_equal "\244\242".dup.force_encoding("euc-jp"), s[0]
str = 'test'.dup
str.taint
s = StringScanner.new(str)
s = create_string_scanner(str)
s.scan(/(t)(e)(s)(t)/)
assert_equal true, s[0].tainted?
assert_equal true, s[1].tainted?
@ -461,7 +496,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal true, s[3].tainted?
assert_equal true, s[4].tainted?
s = StringScanner.new("foo bar baz")
s = create_string_scanner("foo bar baz")
s.scan(/(?<a>\w+) (?<b>\w+) (\w+)/)
assert_equal 'foo', s[1]
assert_equal 'bar', s[2]
@ -476,14 +511,14 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_pre_match
s = StringScanner.new('a b c d e')
s = create_string_scanner('a b c d e')
s.scan(/\w/)
assert_equal '', s.pre_match
assert_equal false, s.pre_match.tainted?
s.skip(/\s/)
assert_equal 'a', s.pre_match
assert_equal false, s.pre_match.tainted?
s.scan(/\w/)
s.scan('b')
assert_equal 'a ', s.pre_match
s.scan_until(/c/)
assert_equal 'a b ', s.pre_match
@ -498,7 +533,7 @@ class TestStringScanner < Test::Unit::TestCase
str = 'test string'.dup
str.taint
s = StringScanner.new(str)
s = create_string_scanner(str)
s.scan(/\w+/)
assert_equal true, s.pre_match.tainted?
s.scan(/\s+/)
@ -508,12 +543,12 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_post_match
s = StringScanner.new('a b c d e')
s = create_string_scanner('a b c d e')
s.scan(/\w/)
assert_equal ' b c d e', s.post_match
s.skip(/\s/)
assert_equal 'b c d e', s.post_match
s.scan(/\w/)
s.scan('b')
assert_equal ' c d e', s.post_match
s.scan_until(/c/)
assert_equal ' d e', s.post_match
@ -532,7 +567,7 @@ class TestStringScanner < Test::Unit::TestCase
str = 'test string'.dup
str.taint
s = StringScanner.new(str)
s = create_string_scanner(str)
s.scan(/\w+/)
assert_equal true, s.post_match.tainted?
s.scan(/\s+/)
@ -542,7 +577,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_terminate
s = StringScanner.new('ssss')
s = create_string_scanner('ssss')
s.getch
s.terminate
assert_equal true, s.eos?
@ -551,7 +586,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_reset
s = StringScanner.new('ssss')
s = create_string_scanner('ssss')
s.getch
s.reset
assert_equal 0, s.pos
@ -563,7 +598,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_matched_size
s = StringScanner.new('test string')
s = create_string_scanner('test string')
assert_nil s.matched_size
s.scan(/test/)
assert_equal 4, s.matched_size
@ -576,7 +611,7 @@ class TestStringScanner < Test::Unit::TestCase
s.terminate
assert_nil s.matched_size
s = StringScanner.new('test string')
s = create_string_scanner('test string')
assert_nil s.matched_size
s.scan(/test/)
assert_equal 4, s.matched_size
@ -585,31 +620,45 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_encoding
ss = StringScanner.new("\xA1\xA2".dup.force_encoding("euc-jp"))
ss = create_string_scanner("\xA1\xA2".dup.force_encoding("euc-jp"))
assert_equal(Encoding::EUC_JP, ss.scan(/./e).encoding)
end
def test_encoding_string
str = "\xA1\xA2".dup.force_encoding("euc-jp")
ss = create_string_scanner(str)
assert_equal(str.dup, ss.scan(str.dup))
end
def test_invalid_encoding_string
str = "\xA1\xA2".dup.force_encoding("euc-jp")
ss = create_string_scanner(str)
assert_raise(Encoding::CompatibilityError) do
ss.scan(str.encode("UTF-8"))
end
end
def test_generic_regexp
ss = StringScanner.new("\xA1\xA2".dup.force_encoding("euc-jp"))
ss = create_string_scanner("\xA1\xA2".dup.force_encoding("euc-jp"))
t = ss.scan(/./)
assert_equal("\xa1\xa2".dup.force_encoding("euc-jp"), t)
end
def test_set_pos
s = StringScanner.new("test string")
s = create_string_scanner("test string")
s.pos = 7
assert_equal("ring", s.rest)
end
def test_match_p
s = StringScanner.new("test string")
s = create_string_scanner("test string")
assert_equal(4, s.match?(/\w+/))
assert_equal(4, s.match?(/\w+/))
assert_equal(nil, s.match?(/\s+/))
end
def test_check
s = StringScanner.new("Foo Bar Baz")
s = create_string_scanner("Foo Bar Baz")
assert_equal("Foo", s.check(/Foo/))
assert_equal(0, s.pos)
assert_equal("Foo", s.matched)
@ -618,7 +667,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_scan_full
s = StringScanner.new("Foo Bar Baz")
s = create_string_scanner("Foo Bar Baz")
assert_equal(4, s.scan_full(/Foo /, false, false))
assert_equal(0, s.pos)
assert_equal(nil, s.scan_full(/Baz/, false, false))
@ -634,7 +683,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_exist_p
s = StringScanner.new("test string")
s = create_string_scanner("test string")
assert_equal(3, s.exist?(/s/))
assert_equal(0, s.pos)
s.scan(/test/)
@ -643,8 +692,15 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal(nil, s.exist?(/e/))
end
def test_exist_p_string
s = create_string_scanner("test string")
assert_raise(TypeError) do
s.exist?(" ")
end
end
def test_skip_until
s = StringScanner.new("Foo Bar Baz")
s = create_string_scanner("Foo Bar Baz")
assert_equal(3, s.skip_until(/Foo/))
assert_equal(3, s.pos)
assert_equal(4, s.skip_until(/Bar/))
@ -653,7 +709,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_check_until
s = StringScanner.new("Foo Bar Baz")
s = create_string_scanner("Foo Bar Baz")
assert_equal("Foo", s.check_until(/Foo/))
assert_equal(0, s.pos)
assert_equal("Foo Bar", s.check_until(/Bar/))
@ -662,7 +718,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_search_full
s = StringScanner.new("Foo Bar Baz")
s = create_string_scanner("Foo Bar Baz")
assert_equal(8, s.search_full(/Bar /, false, false))
assert_equal(0, s.pos)
assert_equal("Foo Bar ", s.search_full(/Bar /, false, true))
@ -674,7 +730,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_peek
s = StringScanner.new("test string")
s = create_string_scanner("test string")
assert_equal("test st", s.peek(7))
assert_equal("test st", s.peek(7))
s.scan(/test/)
@ -685,7 +741,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_unscan
s = StringScanner.new('test string')
s = create_string_scanner('test string')
assert_equal("test", s.scan(/\w+/))
s.unscan
assert_equal("te", s.scan(/../))
@ -694,7 +750,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_rest
s = StringScanner.new('test string')
s = create_string_scanner('test string')
assert_equal("test string", s.rest)
s.scan(/test/)
assert_equal(" string", s.rest)
@ -704,7 +760,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_rest_size
s = StringScanner.new('test string')
s = create_string_scanner('test string')
assert_equal(11, s.rest_size)
s.scan(/test/)
assert_equal(7, s.rest_size)
@ -714,13 +770,13 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_inspect2
s = StringScanner.new('test string test')
s = create_string_scanner('test string test')
s.scan(/test strin/)
assert_equal('#<StringScanner 10/16 "...strin" @ "g tes...">', s.inspect)
end
def test_aref_without_regex
s = StringScanner.new('abc')
s = create_string_scanner('abc')
s.get_byte
assert_nil(s[:c])
assert_nil(s["c"])
@ -730,13 +786,14 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_size
s = StringScanner.new("Fri Dec 12 1975 14:39")
s = create_string_scanner("Fri Dec 12 1975 14:39")
s.scan(/(\w+) (\w+) (\d+) /)
assert_equal(4, s.size)
end
def test_captures
s = StringScanner.new("Fri Dec 12 1975 14:39")
s = create_string_scanner("Timestamp: Fri Dec 12 1975 14:39")
s.scan("Timestamp: ")
s.scan(/(\w+) (\w+) (\d+) /)
assert_equal(["Fri", "Dec", "12"], s.captures)
s.scan(/(\w+) (\w+) (\d+) /)
@ -744,10 +801,53 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_values_at
s = StringScanner.new("Fri Dec 12 1975 14:39")
s = create_string_scanner("Timestamp: Fri Dec 12 1975 14:39")
s.scan("Timestamp: ")
s.scan(/(\w+) (\w+) (\d+) /)
assert_equal(["Fri Dec 12 ", "12", nil, "Dec"], s.values_at(0, -1, 5, 2))
s.scan(/(\w+) (\w+) (\d+) /)
assert_nil(s.values_at(0, -1, 5, 2))
end
def test_fixed_anchor_true
assert_equal(true, StringScanner.new("a", fixed_anchor: true).fixed_anchor?)
end
def test_fixed_anchor_false
assert_equal(false, StringScanner.new("a").fixed_anchor?)
assert_equal(false, StringScanner.new("a", true).fixed_anchor?)
assert_equal(false, StringScanner.new("a", false).fixed_anchor?)
assert_equal(false, StringScanner.new("a", {}).fixed_anchor?)
assert_equal(false, StringScanner.new("a", fixed_anchor: nil).fixed_anchor?)
assert_equal(false, StringScanner.new("a", fixed_anchor: false).fixed_anchor?)
end
end
class TestStringScannerFixedAnchor < TestStringScanner
def create_string_scanner(string, *args)
StringScanner.new(string, fixed_anchor: true)
end
def test_skip_with_begenning_of_string_anchor_match
s = create_string_scanner("a")
assert_equal 1, s.skip(/\Aa/)
end
def test_skip_with_begenning_of_string_anchor_not_match
s = create_string_scanner("a\nb")
assert_equal 2, s.skip(/a\n/)
assert_nil s.skip(/\Ab/)
end
def test_skip_with_begenning_of_line_anchor_match
s = create_string_scanner("a\nb")
assert_equal 2, s.skip(/a\n/)
assert_equal 1, s.skip(/^b/)
end
def test_skip_with_begenning_of_line_anchor_not_match
s = create_string_scanner("ab")
assert_equal 1, s.skip(/a/)
assert_nil s.skip(/^b/)
end
end