1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/spec/ruby/core/string/undump_spec.rb
2020-05-03 12:28:29 +02:00

451 lines
17 KiB
Ruby

# encoding: utf-8
require_relative '../../spec_helper'
require_relative 'fixtures/classes'
describe "String#undump" do
ruby_version_is ''...'2.7' do
it "taints the result if self is tainted" do
'"foo"'.taint.undump.should.tainted?
end
it "untrusts the result if self is untrusted" do
'"foo"'.untrust.undump.should.untrusted?
end
end
it "does not take into account if a string is frozen" do
'"foo"'.freeze.undump.should_not.frozen?
end
it "always returns String instance" do
StringSpecs::MyString.new('"foo"').undump.should be_an_instance_of(String)
end
it "strips outer \"" do
'"foo"'.undump.should == 'foo'
end
it "returns a string with special characters in \\<char> notation replaced with the characters" do
[ ['"\\a"', "\a"],
['"\\b"', "\b"],
['"\\t"', "\t"],
['"\\n"', "\n"],
['"\\v"', "\v"],
['"\\f"', "\f"],
['"\\r"', "\r"],
['"\\e"', "\e"]
].should be_computed_by(:undump)
end
it "returns a string with unescaped sequences \" and \\" do
[ ['"\\""' , "\""],
['"\\\\"', "\\"]
].should be_computed_by(:undump)
end
it "returns a string with unescaped sequences \\#<char> when # is followed by $, @, {" do
[ ['"\\#$PATH"', "\#$PATH"],
['"\\#@a"', "\#@a"],
['"\\#@@a"', "\#@@a"],
['"\\#{a}"', "\#{a}"]
].should be_computed_by(:undump)
end
it "returns a string with # not escaped when followed by any other character" do
[ ['"#"', '#'],
['"#1"', '#1']
].should be_computed_by(:undump)
end
it "returns a string with printable non-alphanumeric characters" do
[ ['" "', ' '],
['"!"', '!'],
['"$"', '$'],
['"%"', '%'],
['"&"', '&'],
['"\'"', '\''],
['"("', '('],
['")"', ')'],
['"*"', '*'],
['"+"', '+'],
['","', ','],
['"-"', '-'],
['"."', '.'],
['"/"', '/'],
['":"', ':'],
['";"', ';'],
['"<"', '<'],
['"="', '='],
['">"', '>'],
['"?"', '?'],
['"@"', '@'],
['"["', '['],
['"]"', ']'],
['"^"', '^'],
['"_"', '_'],
['"`"', '`'],
['"{"', '{'],
['"|"', '|'],
['"}"', '}'],
['"~"', '~']
].should be_computed_by(:undump)
end
it "returns a string with numeric characters unescaped" do
[ ['"0"', "0"],
['"1"', "1"],
['"2"', "2"],
['"3"', "3"],
['"4"', "4"],
['"5"', "5"],
['"6"', "6"],
['"7"', "7"],
['"8"', "8"],
['"9"', "9"],
].should be_computed_by(:undump)
end
it "returns a string with upper-case alpha characters unescaped" do
[ ['"A"', 'A'],
['"B"', 'B'],
['"C"', 'C'],
['"D"', 'D'],
['"E"', 'E'],
['"F"', 'F'],
['"G"', 'G'],
['"H"', 'H'],
['"I"', 'I'],
['"J"', 'J'],
['"K"', 'K'],
['"L"', 'L'],
['"M"', 'M'],
['"N"', 'N'],
['"O"', 'O'],
['"P"', 'P'],
['"Q"', 'Q'],
['"R"', 'R'],
['"S"', 'S'],
['"T"', 'T'],
['"U"', 'U'],
['"V"', 'V'],
['"W"', 'W'],
['"X"', 'X'],
['"Y"', 'Y'],
['"Z"', 'Z']
].should be_computed_by(:undump)
end
it "returns a string with lower-case alpha characters unescaped" do
[ ['"a"', 'a'],
['"b"', 'b'],
['"c"', 'c'],
['"d"', 'd'],
['"e"', 'e'],
['"f"', 'f'],
['"g"', 'g'],
['"h"', 'h'],
['"i"', 'i'],
['"j"', 'j'],
['"k"', 'k'],
['"l"', 'l'],
['"m"', 'm'],
['"n"', 'n'],
['"o"', 'o'],
['"p"', 'p'],
['"q"', 'q'],
['"r"', 'r'],
['"s"', 's'],
['"t"', 't'],
['"u"', 'u'],
['"v"', 'v'],
['"w"', 'w'],
['"x"', 'x'],
['"y"', 'y'],
['"z"', 'z']
].should be_computed_by(:undump)
end
it "returns a string with \\x notation replaced with non-printing ASCII character" do
[ ['"\\x00"', 0000.chr.force_encoding('utf-8')],
['"\\x01"', 0001.chr.force_encoding('utf-8')],
['"\\x02"', 0002.chr.force_encoding('utf-8')],
['"\\x03"', 0003.chr.force_encoding('utf-8')],
['"\\x04"', 0004.chr.force_encoding('utf-8')],
['"\\x05"', 0005.chr.force_encoding('utf-8')],
['"\\x06"', 0006.chr.force_encoding('utf-8')],
['"\\x0E"', 0016.chr.force_encoding('utf-8')],
['"\\x0F"', 0017.chr.force_encoding('utf-8')],
['"\\x10"', 0020.chr.force_encoding('utf-8')],
['"\\x11"', 0021.chr.force_encoding('utf-8')],
['"\\x12"', 0022.chr.force_encoding('utf-8')],
['"\\x13"', 0023.chr.force_encoding('utf-8')],
['"\\x14"', 0024.chr.force_encoding('utf-8')],
['"\\x15"', 0025.chr.force_encoding('utf-8')],
['"\\x16"', 0026.chr.force_encoding('utf-8')],
['"\\x17"', 0027.chr.force_encoding('utf-8')],
['"\\x18"', 0030.chr.force_encoding('utf-8')],
['"\\x19"', 0031.chr.force_encoding('utf-8')],
['"\\x1A"', 0032.chr.force_encoding('utf-8')],
['"\\x1C"', 0034.chr.force_encoding('utf-8')],
['"\\x1D"', 0035.chr.force_encoding('utf-8')],
['"\\x1E"', 0036.chr.force_encoding('utf-8')],
['"\\x1F"', 0037.chr.force_encoding('utf-8')],
['"\\x7F"', 0177.chr.force_encoding('utf-8')],
['"\\x80"', 0200.chr.force_encoding('utf-8')],
['"\\x81"', 0201.chr.force_encoding('utf-8')],
['"\\x82"', 0202.chr.force_encoding('utf-8')],
['"\\x83"', 0203.chr.force_encoding('utf-8')],
['"\\x84"', 0204.chr.force_encoding('utf-8')],
['"\\x85"', 0205.chr.force_encoding('utf-8')],
['"\\x86"', 0206.chr.force_encoding('utf-8')],
['"\\x87"', 0207.chr.force_encoding('utf-8')],
['"\\x88"', 0210.chr.force_encoding('utf-8')],
['"\\x89"', 0211.chr.force_encoding('utf-8')],
['"\\x8A"', 0212.chr.force_encoding('utf-8')],
['"\\x8B"', 0213.chr.force_encoding('utf-8')],
['"\\x8C"', 0214.chr.force_encoding('utf-8')],
['"\\x8D"', 0215.chr.force_encoding('utf-8')],
['"\\x8E"', 0216.chr.force_encoding('utf-8')],
['"\\x8F"', 0217.chr.force_encoding('utf-8')],
['"\\x90"', 0220.chr.force_encoding('utf-8')],
['"\\x91"', 0221.chr.force_encoding('utf-8')],
['"\\x92"', 0222.chr.force_encoding('utf-8')],
['"\\x93"', 0223.chr.force_encoding('utf-8')],
['"\\x94"', 0224.chr.force_encoding('utf-8')],
['"\\x95"', 0225.chr.force_encoding('utf-8')],
['"\\x96"', 0226.chr.force_encoding('utf-8')],
['"\\x97"', 0227.chr.force_encoding('utf-8')],
['"\\x98"', 0230.chr.force_encoding('utf-8')],
['"\\x99"', 0231.chr.force_encoding('utf-8')],
['"\\x9A"', 0232.chr.force_encoding('utf-8')],
['"\\x9B"', 0233.chr.force_encoding('utf-8')],
['"\\x9C"', 0234.chr.force_encoding('utf-8')],
['"\\x9D"', 0235.chr.force_encoding('utf-8')],
['"\\x9E"', 0236.chr.force_encoding('utf-8')],
['"\\x9F"', 0237.chr.force_encoding('utf-8')],
['"\\xA0"', 0240.chr.force_encoding('utf-8')],
['"\\xA1"', 0241.chr.force_encoding('utf-8')],
['"\\xA2"', 0242.chr.force_encoding('utf-8')],
['"\\xA3"', 0243.chr.force_encoding('utf-8')],
['"\\xA4"', 0244.chr.force_encoding('utf-8')],
['"\\xA5"', 0245.chr.force_encoding('utf-8')],
['"\\xA6"', 0246.chr.force_encoding('utf-8')],
['"\\xA7"', 0247.chr.force_encoding('utf-8')],
['"\\xA8"', 0250.chr.force_encoding('utf-8')],
['"\\xA9"', 0251.chr.force_encoding('utf-8')],
['"\\xAA"', 0252.chr.force_encoding('utf-8')],
['"\\xAB"', 0253.chr.force_encoding('utf-8')],
['"\\xAC"', 0254.chr.force_encoding('utf-8')],
['"\\xAD"', 0255.chr.force_encoding('utf-8')],
['"\\xAE"', 0256.chr.force_encoding('utf-8')],
['"\\xAF"', 0257.chr.force_encoding('utf-8')],
['"\\xB0"', 0260.chr.force_encoding('utf-8')],
['"\\xB1"', 0261.chr.force_encoding('utf-8')],
['"\\xB2"', 0262.chr.force_encoding('utf-8')],
['"\\xB3"', 0263.chr.force_encoding('utf-8')],
['"\\xB4"', 0264.chr.force_encoding('utf-8')],
['"\\xB5"', 0265.chr.force_encoding('utf-8')],
['"\\xB6"', 0266.chr.force_encoding('utf-8')],
['"\\xB7"', 0267.chr.force_encoding('utf-8')],
['"\\xB8"', 0270.chr.force_encoding('utf-8')],
['"\\xB9"', 0271.chr.force_encoding('utf-8')],
['"\\xBA"', 0272.chr.force_encoding('utf-8')],
['"\\xBB"', 0273.chr.force_encoding('utf-8')],
['"\\xBC"', 0274.chr.force_encoding('utf-8')],
['"\\xBD"', 0275.chr.force_encoding('utf-8')],
['"\\xBE"', 0276.chr.force_encoding('utf-8')],
['"\\xBF"', 0277.chr.force_encoding('utf-8')],
['"\\xC0"', 0300.chr.force_encoding('utf-8')],
['"\\xC1"', 0301.chr.force_encoding('utf-8')],
['"\\xC2"', 0302.chr.force_encoding('utf-8')],
['"\\xC3"', 0303.chr.force_encoding('utf-8')],
['"\\xC4"', 0304.chr.force_encoding('utf-8')],
['"\\xC5"', 0305.chr.force_encoding('utf-8')],
['"\\xC6"', 0306.chr.force_encoding('utf-8')],
['"\\xC7"', 0307.chr.force_encoding('utf-8')],
['"\\xC8"', 0310.chr.force_encoding('utf-8')],
['"\\xC9"', 0311.chr.force_encoding('utf-8')],
['"\\xCA"', 0312.chr.force_encoding('utf-8')],
['"\\xCB"', 0313.chr.force_encoding('utf-8')],
['"\\xCC"', 0314.chr.force_encoding('utf-8')],
['"\\xCD"', 0315.chr.force_encoding('utf-8')],
['"\\xCE"', 0316.chr.force_encoding('utf-8')],
['"\\xCF"', 0317.chr.force_encoding('utf-8')],
['"\\xD0"', 0320.chr.force_encoding('utf-8')],
['"\\xD1"', 0321.chr.force_encoding('utf-8')],
['"\\xD2"', 0322.chr.force_encoding('utf-8')],
['"\\xD3"', 0323.chr.force_encoding('utf-8')],
['"\\xD4"', 0324.chr.force_encoding('utf-8')],
['"\\xD5"', 0325.chr.force_encoding('utf-8')],
['"\\xD6"', 0326.chr.force_encoding('utf-8')],
['"\\xD7"', 0327.chr.force_encoding('utf-8')],
['"\\xD8"', 0330.chr.force_encoding('utf-8')],
['"\\xD9"', 0331.chr.force_encoding('utf-8')],
['"\\xDA"', 0332.chr.force_encoding('utf-8')],
['"\\xDB"', 0333.chr.force_encoding('utf-8')],
['"\\xDC"', 0334.chr.force_encoding('utf-8')],
['"\\xDD"', 0335.chr.force_encoding('utf-8')],
['"\\xDE"', 0336.chr.force_encoding('utf-8')],
['"\\xDF"', 0337.chr.force_encoding('utf-8')],
['"\\xE0"', 0340.chr.force_encoding('utf-8')],
['"\\xE1"', 0341.chr.force_encoding('utf-8')],
['"\\xE2"', 0342.chr.force_encoding('utf-8')],
['"\\xE3"', 0343.chr.force_encoding('utf-8')],
['"\\xE4"', 0344.chr.force_encoding('utf-8')],
['"\\xE5"', 0345.chr.force_encoding('utf-8')],
['"\\xE6"', 0346.chr.force_encoding('utf-8')],
['"\\xE7"', 0347.chr.force_encoding('utf-8')],
['"\\xE8"', 0350.chr.force_encoding('utf-8')],
['"\\xE9"', 0351.chr.force_encoding('utf-8')],
['"\\xEA"', 0352.chr.force_encoding('utf-8')],
['"\\xEB"', 0353.chr.force_encoding('utf-8')],
['"\\xEC"', 0354.chr.force_encoding('utf-8')],
['"\\xED"', 0355.chr.force_encoding('utf-8')],
['"\\xEE"', 0356.chr.force_encoding('utf-8')],
['"\\xEF"', 0357.chr.force_encoding('utf-8')],
['"\\xF0"', 0360.chr.force_encoding('utf-8')],
['"\\xF1"', 0361.chr.force_encoding('utf-8')],
['"\\xF2"', 0362.chr.force_encoding('utf-8')],
['"\\xF3"', 0363.chr.force_encoding('utf-8')],
['"\\xF4"', 0364.chr.force_encoding('utf-8')],
['"\\xF5"', 0365.chr.force_encoding('utf-8')],
['"\\xF6"', 0366.chr.force_encoding('utf-8')],
['"\\xF7"', 0367.chr.force_encoding('utf-8')],
['"\\xF8"', 0370.chr.force_encoding('utf-8')],
['"\\xF9"', 0371.chr.force_encoding('utf-8')],
['"\\xFA"', 0372.chr.force_encoding('utf-8')],
['"\\xFB"', 0373.chr.force_encoding('utf-8')],
['"\\xFC"', 0374.chr.force_encoding('utf-8')],
['"\\xFD"', 0375.chr.force_encoding('utf-8')],
['"\\xFE"', 0376.chr.force_encoding('utf-8')],
['"\\xFF"', 0377.chr.force_encoding('utf-8')]
].should be_computed_by(:undump)
end
it "returns a string with \\u{} notation replaced with multi-byte UTF-8 characters" do
[ ['"\u{80}"', 0200.chr('utf-8')],
['"\u{81}"', 0201.chr('utf-8')],
['"\u{82}"', 0202.chr('utf-8')],
['"\u{83}"', 0203.chr('utf-8')],
['"\u{84}"', 0204.chr('utf-8')],
['"\u{86}"', 0206.chr('utf-8')],
['"\u{87}"', 0207.chr('utf-8')],
['"\u{88}"', 0210.chr('utf-8')],
['"\u{89}"', 0211.chr('utf-8')],
['"\u{8a}"', 0212.chr('utf-8')],
['"\u{8b}"', 0213.chr('utf-8')],
['"\u{8c}"', 0214.chr('utf-8')],
['"\u{8d}"', 0215.chr('utf-8')],
['"\u{8e}"', 0216.chr('utf-8')],
['"\u{8f}"', 0217.chr('utf-8')],
['"\u{90}"', 0220.chr('utf-8')],
['"\u{91}"', 0221.chr('utf-8')],
['"\u{92}"', 0222.chr('utf-8')],
['"\u{93}"', 0223.chr('utf-8')],
['"\u{94}"', 0224.chr('utf-8')],
['"\u{95}"', 0225.chr('utf-8')],
['"\u{96}"', 0226.chr('utf-8')],
['"\u{97}"', 0227.chr('utf-8')],
['"\u{98}"', 0230.chr('utf-8')],
['"\u{99}"', 0231.chr('utf-8')],
['"\u{9a}"', 0232.chr('utf-8')],
['"\u{9b}"', 0233.chr('utf-8')],
['"\u{9c}"', 0234.chr('utf-8')],
['"\u{9d}"', 0235.chr('utf-8')],
['"\u{9e}"', 0236.chr('utf-8')],
['"\u{9f}"', 0237.chr('utf-8')],
].should be_computed_by(:undump)
end
it "returns a string with \\uXXXX notation replaced with multi-byte UTF-8 characters" do
[ ['"\u0080"', 0200.chr('utf-8')],
['"\u0081"', 0201.chr('utf-8')],
['"\u0082"', 0202.chr('utf-8')],
['"\u0083"', 0203.chr('utf-8')],
['"\u0084"', 0204.chr('utf-8')],
['"\u0086"', 0206.chr('utf-8')],
['"\u0087"', 0207.chr('utf-8')],
['"\u0088"', 0210.chr('utf-8')],
['"\u0089"', 0211.chr('utf-8')],
['"\u008a"', 0212.chr('utf-8')],
['"\u008b"', 0213.chr('utf-8')],
['"\u008c"', 0214.chr('utf-8')],
['"\u008d"', 0215.chr('utf-8')],
['"\u008e"', 0216.chr('utf-8')],
['"\u008f"', 0217.chr('utf-8')],
['"\u0090"', 0220.chr('utf-8')],
['"\u0091"', 0221.chr('utf-8')],
['"\u0092"', 0222.chr('utf-8')],
['"\u0093"', 0223.chr('utf-8')],
['"\u0094"', 0224.chr('utf-8')],
['"\u0095"', 0225.chr('utf-8')],
['"\u0096"', 0226.chr('utf-8')],
['"\u0097"', 0227.chr('utf-8')],
['"\u0098"', 0230.chr('utf-8')],
['"\u0099"', 0231.chr('utf-8')],
['"\u009a"', 0232.chr('utf-8')],
['"\u009b"', 0233.chr('utf-8')],
['"\u009c"', 0234.chr('utf-8')],
['"\u009d"', 0235.chr('utf-8')],
['"\u009e"', 0236.chr('utf-8')],
['"\u009f"', 0237.chr('utf-8')],
].should be_computed_by(:undump)
end
it "undumps correctly string produced from non ASCII-compatible one" do
s = "\u{876}".encode('utf-16be')
s.dump.undump.should == s
'"\\bv".force_encoding("UTF-16BE")'.undump.should == "\u0876".encode('utf-16be')
end
it "keeps origin encoding" do
'"foo"'.encode("ISO-8859-1").undump.encoding.should == Encoding::ISO_8859_1
'"foo"'.encode('windows-1251').undump.encoding.should == Encoding::Windows_1251
end
describe "Limitations" do
it "cannot undump non ASCII-compatible string" do
-> { '"foo"'.encode('utf-16le').undump }.should raise_error(Encoding::CompatibilityError)
end
end
describe "invalid dump" do
it "raises RuntimeError exception if wrapping \" are missing" do
-> { 'foo'.undump }.should raise_error(RuntimeError, /invalid dumped string/)
-> { '"foo'.undump }.should raise_error(RuntimeError, /unterminated dumped string/)
-> { 'foo"'.undump }.should raise_error(RuntimeError, /invalid dumped string/)
-> { "'foo'".undump }.should raise_error(RuntimeError, /invalid dumped string/)
end
it "raises RuntimeError if there is incorrect \\x sequence" do
-> { '"\x"'.undump }.should raise_error(RuntimeError, /invalid hex escape/)
-> { '"\\x3y"'.undump }.should raise_error(RuntimeError, /invalid hex escape/)
end
it "raises RuntimeError in there is incorrect \\u sequence" do
-> { '"\\u"'.undump }.should raise_error(RuntimeError, /invalid Unicode escape/)
-> { '"\\u{"'.undump }.should raise_error(RuntimeError, /invalid Unicode escape/)
-> { '"\\u{3042"'.undump }.should raise_error(RuntimeError, /invalid Unicode escape/)
-> { '"\\u"'.undump }.should raise_error(RuntimeError, /invalid Unicode escape/)
end
it "raises RuntimeError if there is malformed dump of non ASCII-compatible string" do
-> { '"".force_encoding("BINARY"'.undump }.should raise_error(RuntimeError, /invalid dumped string/)
-> { '"".force_encoding("Unknown")'.undump }.should raise_error(RuntimeError, /dumped string has unknown encoding name/)
-> { '"".force_encoding()'.undump }.should raise_error(RuntimeError, /invalid dumped string/)
end
it "raises RuntimeError if string contains \0 character" do
-> { "\"foo\0\"".undump }.should raise_error(RuntimeError, /string contains null byte/)
end
it "raises RuntimeError if string contains non ASCII character" do
-> { "\"\u3042\"".undump }.should raise_error(RuntimeError, /non-ASCII character detected/)
end
it "raises RuntimeError if there are some excessive \"" do
-> { '" "" "'.undump }.should raise_error(RuntimeError, /invalid dumped string/)
end
end
end