From 4fb434e351c0ebb529e102eeceeb1846664875b3 Mon Sep 17 00:00:00 2001 From: Andy Brody Date: Tue, 4 Jul 2017 23:46:03 -0400 Subject: [PATCH] Add dependency on http-accept for Content-Type. The previous "Content-Type" header parser was ported from Python and was not very idiomatic Ruby. While fast and correct per the RFCs, it was triggering bugs in Ruby MRI that we could probably work around. Ideally someone would fix the bugs in Ruby MRI, but I haven't had time to track them down. Switch to using HTTP::Accept for parsing the media-type charset out of the "Content-Type" header. Also relax the tests since HTTP::Accept is somewhat stricter in the input it will accept. As a result, rest-client will now ignore Content-Type headers with a trailing `;` character. (This is invalid per the RFCs, so impact is likely to be small in comparison to fixing the Ruby 2.4 memory leak.) I also tried using https://github.com/httprb/content_type.rb but it is significantly slower (caused 2x blowup in time on a simple benchmark) and doesn't correctly handle content-types containing '.' characters. Fixes: #523 (occasional MRI segfault) Fixes: #611 (MRI 2.4.* memory leak) * Unfortunately, HTTP::Accept does not support Ruby 2.0 due to its use of named capture groups in StringScanner, which was added in Ruby 2.1. Because rest-client still supports Ruby 2.0, fall back on the old logic when running on Ruby 2.0. Even though Ruby 2.0 is unsupported, it probably still sees wide use since it is the system Ruby even on macOS Sierra. * Don't bother running tests for .cgi_parse_header on Ruby 2.0. Still keep the higher level tests that will exercise the deprecated code. --- lib/restclient/utils.rb | 55 +++++++++++++++++++++++++++++++++++------ rest-client.gemspec | 1 + spec/unit/utils_spec.rb | 20 +++++++-------- 3 files changed, 58 insertions(+), 18 deletions(-) diff --git a/lib/restclient/utils.rb b/lib/restclient/utils.rb index d41eefa..aeaba1f 100644 --- a/lib/restclient/utils.rb +++ b/lib/restclient/utils.rb @@ -1,3 +1,5 @@ +require 'http/accept' + module RestClient # Various utility methods module Utils @@ -13,8 +15,8 @@ module RestClient # # @param headers [Hash] # - # @return [String, nil] encoding Return the string encoding or nil if no - # header is found. + # @return [String, nil] Return the string encoding or nil if no header is + # found. # # @example # >> get_encoding_from_headers({:content_type => 'text/plain; charset=UTF-8'}) @@ -24,19 +26,52 @@ module RestClient type_header = headers[:content_type] return nil unless type_header - _content_type, params = cgi_parse_header(type_header) + # TODO: remove this hack once we drop support for Ruby 2.0 + if RUBY_VERSION.start_with?('2.0') + _content_type, params = deprecated_cgi_parse_header(type_header) - if params.include?('charset') - return params.fetch('charset').gsub(/(\A["']*)|(["']*\z)/, '') + if params.include?('charset') + return params.fetch('charset').gsub(/(\A["']*)|(["']*\z)/, '') + end + + else + + begin + _content_type, params = cgi_parse_header(type_header) + rescue HTTP::Accept::ParseError + return nil + else + params['charset'] + end + end + end + + # Parse a Content-Type like header. + # + # Return the main content-type and a hash of params. + # + # @param [String] line + # @return [Array(String, Hash)] + # + def self.cgi_parse_header(line) + types = HTTP::Accept::MediaTypes.parse(line) + + if types.empty? + raise HTTP::Accept::ParseError.new("Found no types in header line") end - nil + [types.first.mime_type, types.first.parameters] end # Parse semi-colon separated, potentially quoted header string iteratively. # # @private # + # @deprecated This method is deprecated and only exists to support Ruby + # 2.0, which is not supported by HTTP::Accept. + # + # @todo remove this method when dropping support for Ruby 2.0 + # def self._cgi_parseparam(s) return enum_for(__method__, s) unless block_given? @@ -66,11 +101,15 @@ module RestClient # probably doesn't read or perform particularly well in ruby. # https://github.com/python/cpython/blob/3.4/Lib/cgi.py#L301-L331 # - # # @param [String] line # @return [Array(String, Hash)] # - def self.cgi_parse_header(line) + # @deprecated This method is deprecated and only exists to support Ruby + # 2.0, which is not supported by HTTP::Accept. + # + # @todo remove this method when dropping support for Ruby 2.0 + # + def self.deprecated_cgi_parse_header(line) parts = _cgi_parseparam(';' + line) key = parts.next pdict = {} diff --git a/rest-client.gemspec b/rest-client.gemspec index e780a12..a26dcf8 100644 --- a/rest-client.gemspec +++ b/rest-client.gemspec @@ -23,6 +23,7 @@ Gem::Specification.new do |s| s.add_development_dependency('rdoc', '>= 2.4.2', '< 6.0') s.add_development_dependency('rubocop', '~> 0') + s.add_dependency('http-accept', '>= 1.7.0', '< 2.0') s.add_dependency('http-cookie', '>= 1.0.2', '< 2.0') s.add_dependency('mime-types', '>= 1.16', '< 4.0') s.add_dependency('netrc', '~> 0.8') diff --git a/spec/unit/utils_spec.rb b/spec/unit/utils_spec.rb index 9c90ec0..36970b8 100644 --- a/spec/unit/utils_spec.rb +++ b/spec/unit/utils_spec.rb @@ -34,11 +34,11 @@ describe RestClient::Utils do end describe '.cgi_parse_header' do - it 'parses headers' do + it 'parses headers', :unless => RUBY_VERSION.start_with?('2.0') do expect(RestClient::Utils.cgi_parse_header('text/plain')). to eq ['text/plain', {}] - expect(RestClient::Utils.cgi_parse_header('text/vnd.just.made.this.up ; ')). + expect(RestClient::Utils.cgi_parse_header('text/vnd.just.made.this.up')). to eq ['text/vnd.just.made.this.up', {}] expect(RestClient::Utils.cgi_parse_header('text/plain;charset=us-ascii')). @@ -52,20 +52,20 @@ describe RestClient::Utils do to eq ['text/plain', {'charset' => 'us-ascii', 'another' => 'opt'}] expect(RestClient::Utils.cgi_parse_header( - 'attachment; filename="silly.txt"')). - to eq ['attachment', {'filename' => 'silly.txt'}] + 'foo/bar; filename="silly.txt"')). + to eq ['foo/bar', {'filename' => 'silly.txt'}] expect(RestClient::Utils.cgi_parse_header( - 'attachment; filename="strange;name"')). - to eq ['attachment', {'filename' => 'strange;name'}] + 'foo/bar; filename="strange;name"')). + to eq ['foo/bar', {'filename' => 'strange;name'}] expect(RestClient::Utils.cgi_parse_header( - 'attachment; filename="strange;name";size=123;')).to eq \ - ['attachment', {'filename' => 'strange;name', 'size' => '123'}] + 'foo/bar; filename="strange;name";size=123')).to eq \ + ['foo/bar', {'filename' => 'strange;name', 'size' => '123'}] expect(RestClient::Utils.cgi_parse_header( - 'form-data; name="files"; filename="fo\\"o;bar"')).to eq \ - ['form-data', {'name' => 'files', 'filename' => 'fo"o;bar'}] + 'foo/bar; name="files"; filename="fo\\"o;bar"')).to eq \ + ['foo/bar', {'name' => 'files', 'filename' => 'fo"o;bar'}] end end