Normalize values, reusing DN normalization code

I first attempted to extract logic from the code that normalizes DNs, but I was unsuccessful. This is a hack but it works.
2017-10-03 14:38:55 -07:00 · 2017-10-03 14:38:55 -07:00 · 6b9229466d
commit 6b9229466d
parent ed07faf284
5 changed files with 111 additions and 61 deletions
--- a/lib/gitlab/ldap/dn.rb
+++ b/lib/gitlab/ldap/dn.rb
@ -25,6 +25,12 @@ module Gitlab
    UnsupportedDnFormatError = Class.new(StandardError)

    class DN
+      def self.normalize_value(given_value)
+        dummy_dn = "placeholder=#{given_value}"
+        normalized_dn = new(*dummy_dn).to_normalized_s
+        normalized_dn.sub(/\Aplaceholder=/, '')
+      end
+
      ##
      # Initialize a DN, escaping as required. Pass in attributes in name/value
      # pairs. If there is a left over argument, it will be appended to the dn
--- a/lib/gitlab/ldap/person.rb
+++ b/lib/gitlab/ldap/person.rb
@ -41,8 +41,8 @@ module Gitlab
      # 1. Excess spaces are stripped
      # 2. The string is downcased (for case-insensitivity)
      def self.normalize_uid(uid)
-        normalize_dn_part(uid)
-      rescue StandardError => e
+        ::Gitlab::LDAP::DN.normalize_value(uid)
+      rescue ::Gitlab::LDAP::MalformedDnError, ::Gitlab::LDAP::UnsupportedDnFormatError => e
        Rails.logger.info("Returning original UID \"#{uid}\" due to error during normalization attempt: #{e.message}")
        Rails.logger.info(e.backtrace.join("\n"))

@ -77,37 +77,6 @@ module Gitlab

      private

-      def self.normalize_dn_part(part)
-        cleaned = part.strip.downcase
-
-        if cleaned.ends_with?('\\')
-          # If it ends with an escape character that is not followed by a
-          # character to be escaped, then this part may be malformed. But let's
-          # not worry too much about it, and just return it unmodified.
-          #
-          # Why? Because the reason we clean DNs is to make our simplistic
-          # string comparisons work better, even though there are all kinds of
-          # ways that equivalent DNs can vary as strings. If we run into a
-          # strange DN, we should just try to work with it.
-          #
-          # See https://www.ldap.com/ldap-dns-and-rdns for more.
-          return part unless part.ends_with?(' ')
-
-          # Ends with an escaped space (which is valid).
-          cleaned = cleaned + ' '
-        end
-
-        # Get rid of blanks. This can happen if a split character is followed by
-        # whitespace and then another split character.
-        #
-        # E.g. this DN: 'uid=john+telephoneNumber= +1 555-555-5555'
-        #
-        # Should be returned as: 'uid=john+telephoneNumber=+1 555-555-5555'
-        cleaned = '' if cleaned.blank?
-
-        cleaned
-      end
-
      def entry
        @entry
      end
--- a/spec/lib/gitlab/ldap/dn_spec.rb
+++ b/spec/lib/gitlab/ldap/dn_spec.rb
@ -3,6 +3,78 @@ require 'spec_helper'
 describe Gitlab::LDAP::DN do
  using RSpec::Parameterized::TableSyntax

+  describe '#normalize_value' do
+    subject { described_class.normalize_value(given) }
+
+    it_behaves_like 'normalizes a DN attribute value'
+
+    context 'when the given DN is malformed' do
+      context 'when ending with a comma' do
+        let(:given) { 'John Smith,' }
+
+        it 'raises MalformedDnError' do
+          expect { subject }.to raise_error(Gitlab::LDAP::MalformedDnError, 'DN string ended unexpectedly')
+        end
+      end
+
+      context 'when given a BER encoded attribute value with a space in it' do
+        let(:given) { '#aa aa' }
+
+        it 'raises MalformedDnError' do
+          expect { subject }.to raise_error(Gitlab::LDAP::MalformedDnError, "Expected the end of an attribute value, but got \"a\"")
+        end
+      end
+
+      context 'when given a BER encoded attribute value with a non-hex character in it' do
+        let(:given) { '#aaXaaa' }
+
+        it 'raises MalformedDnError' do
+          expect { subject }.to raise_error(Gitlab::LDAP::MalformedDnError, "Expected the first character of a hex pair, but got \"X\"")
+        end
+      end
+
+      context 'when given a BER encoded attribute value with a non-hex character in it' do
+        let(:given) { '#aaaYaa' }
+
+        it 'raises MalformedDnError' do
+          expect { subject }.to raise_error(Gitlab::LDAP::MalformedDnError, "Expected the second character of a hex pair, but got \"Y\"")
+        end
+      end
+
+      context 'when given a hex pair with a non-hex character in it, inside double quotes' do
+        let(:given) { '"Sebasti\\cX\\a1n"' }
+
+        it 'raises MalformedDnError' do
+          expect { subject }.to raise_error(Gitlab::LDAP::MalformedDnError, "Expected the second character of a hex pair inside a double quoted value, but got \"X\"")
+        end
+      end
+
+      context 'with an open (as opposed to closed) double quote' do
+        let(:given) { '"James' }
+
+        it 'raises MalformedDnError' do
+          expect { subject }.to raise_error(Gitlab::LDAP::MalformedDnError, 'DN string ended unexpectedly')
+        end
+      end
+
+      context 'with an invalid escaped hex code' do
+        let(:given) { 'J\ames' }
+
+        it 'raises MalformedDnError' do
+          expect { subject }.to raise_error(Gitlab::LDAP::MalformedDnError, 'Invalid escaped hex code "\am"')
+        end
+      end
+
+      context 'with a value ending with the escape character' do
+        let(:given) { 'foo\\' }
+
+        it 'raises MalformedDnError' do
+          expect { subject }.to raise_error(Gitlab::LDAP::MalformedDnError, 'DN string ended unexpectedly')
+        end
+      end
+    end
+  end
+
  describe '#to_normalized_s' do
    subject { described_class.new(given).to_normalized_s }

--- a/spec/lib/gitlab/ldap/person_spec.rb
+++ b/spec/lib/gitlab/ldap/person_spec.rb
@ -1,7 +1,6 @@
 require 'spec_helper'

 describe Gitlab::LDAP::Person do
-  using RSpec::Parameterized::TableSyntax
  include LdapHelpers

  let(:entry) { ldap_user_entry('john.doe') }
@ -17,38 +16,13 @@ describe Gitlab::LDAP::Person do
    )
  end

-  shared_examples_for 'normalizes the UID' do
-    where(:test_description, :given, :expected) do
-      'strips extraneous whitespace'                                        | ' John C. Smith   '                     | 'john c. smith'
-      'strips extraneous whitespace without changing escaped characters'    | '  Sebasti\\c3\\a1n\\ C.\\20Smith\\   ' | 'sebasti\\c3\\a1n\\ c.\\20smith\\ '
-      'downcases the whole string'                                          | 'John Smith'                            | 'john smith'
-      'does not strip the escaped leading space in an attribute value'      | '   \\ John Smith '                     | '\\ john smith'
-      'does not strip the escaped trailing space in an attribute value'     | '    John Smith\\   '                   | 'john smith\\ '
-      'does not strip the escaped leading newline in an attribute value'    | '     \\\nJohn Smith  '                 | '\\\njohn smith'
-      'does not strip the escaped trailing newline in an attribute value'   | '    John Smith\\\n  '                  | 'john smith\\\n'
-      'does not strip the unescaped leading newline in an attribute value'  | '   \nJohn Smith '                      | '\njohn smith'
-      'does not strip the unescaped trailing newline in an attribute value' | '  John Smith\n   '                     | 'john smith\n'
-      'does not strip non whitespace'                                       | 'John Smith'                            | 'john smith'
-      'does not treat escaped equal signs as attribute delimiters'          | ' foo  \\=  bar'                        | 'foo  \\=  bar'
-      'does not treat escaped hex equal signs as attribute delimiters'      | ' foo  \\3D  bar'                       | 'foo  \\3d  bar'
-      'does not treat escaped commas as attribute delimiters'               | ' Smith\\, John C.'                     | 'smith\\, john c.'
-      'does not treat escaped hex commas as attribute delimiters'           | ' Smith\\2C John C.'                    | 'smith\\2c john c.'
-    end
-
-    with_them do
-      it 'normalizes the UID' do
-        assert_generic_test(test_description, subject, expected)
-      end
-    end
-  end
-
  describe '.normalize_uid' do
    subject { described_class.normalize_uid(given) }

-    it_behaves_like 'normalizes the UID'
+    it_behaves_like 'normalizes a DN attribute value'

    context 'with an exception during normalization' do
-      let(:given) { described_class } # just something that will cause an exception
+      let(:given) { 'John "Smith,' } # just something that will cause an exception

      it 'returns the given UID unmodified' do
        expect(subject).to eq(given)
--- a/spec/support/ldap_shared_examples.rb
+++ b/spec/support/ldap_shared_examples.rb
@ -0,0 +1,29 @@
+shared_examples_for 'normalizes a DN attribute value' do
+  using RSpec::Parameterized::TableSyntax
+
+  where(:test_description, :given, :expected) do
+    'strips extraneous whitespace'                                                      | '   John Smith   '                 | 'john smith'
+    'unescapes non-reserved, non-special Unicode characters'                            | 'Sebasti\\c3\\a1n\\ C.\\20Smith'   | 'sebastián c. smith'
+    'downcases the whole string'                                                        | 'JoHn C. Smith'                    | 'john c. smith'
+    'does not strip an escaped leading space in an attribute value'                     | '\\ John Smith'                    | '\\ john smith'
+    'does not strip an escaped trailing space in an attribute value'                    | 'John Smith\\ '                    | 'john smith\\ '
+    'hex-escapes an escaped leading newline in an attribute value'                      | "\\\nJohn Smith"                   | "\\0ajohn smith"
+    'hex-escapes and does not strip an escaped trailing newline in an attribute value'  | "John Smith\\\n"                   | "john smith\\0a"
+    'hex-escapes an unescaped leading newline (actually an invalid DN value?)'          | "\nJohn Smith"                     | "\\0ajohn smith"
+    'strips an unescaped trailing newline (actually an invalid DN value?)'              | "John Smith\n"                     | "john smith"
+    'does not strip if no extraneous whitespace'                                        | 'John Smith'                       | 'john smith'
+    'does not modify an escaped equal sign in an attribute value'                       | ' foo  \\=  bar'                   | 'foo  \\=  bar'
+    'converts an escaped hex equal sign to an escaped equal sign in an attribute value' | ' foo  \\3D  bar'                  | 'foo  \\=  bar'
+    'does not modify an escaped comma in an attribute value'                            | 'San Francisco\\, CA'              | 'san francisco\\, ca'
+    'converts an escaped hex comma to an escaped comma in an attribute value'           | 'San Francisco\\2C CA'             | 'san francisco\\, ca'
+    'does not modify an escaped hex carriage return character in an attribute value'    | 'San Francisco\\,\\0DCA'           | 'san francisco\\,\\0dca'
+    'does not modify an escaped hex line feed character in an attribute value'          | 'San Francisco\\,\\0ACA'           | 'san francisco\\,\\0aca'
+    'does not modify an escaped hex CRLF in an attribute value'                         | 'San Francisco\\,\\0D\\0ACA'       | 'san francisco\\,\\0d\\0aca'
+  end
+
+  with_them do
+    it 'normalizes the DN attribute value' do
+      assert_generic_test(test_description, subject, expected)
+    end
+  end
+end