Optimized file search to work without limits

* removed 100 limit on file search results because we load all results anyway * expensive processing (parsing match content, utf encoding) is done only for selected page in paginated output
2018-12-02 22:47:33 +01:00 · 2018-12-02 22:47:33 +01:00 · 58bfd73331
commit 58bfd73331
parent 00acef4340
14 changed files with 380 additions and 254 deletions
--- a/app/views/search/results/_blob.html.haml
+++ b/app/views/search/results/_blob.html.haml
@ -1,7 +1,7 @@
 - project = find_project_for_result_blob(blob)
 - return unless project

- file_name, blob = parse_search_result(blob)
- blob_link = project_blob_path(project, tree_join(blob.ref, file_name))
+- blob = parse_search_result(blob)
+- blob_link = project_blob_path(project, tree_join(blob.ref, blob.filename))

-= render partial: 'search/results/blob_data', locals: { blob: blob, project: project, file_name: file_name, blob_link: blob_link }
+= render partial: 'search/results/blob_data', locals: { blob: blob, project: project, file_name: blob.filename, blob_link: blob_link }
--- a/app/views/search/results/_wiki_blob.html.haml
+++ b/app/views/search/results/_wiki_blob.html.haml
@ -1,5 +1,5 @@
 - project = find_project_for_result_blob(wiki_blob)
- file_name, wiki_blob = parse_search_result(wiki_blob)
+- wiki_blob = parse_search_result(wiki_blob)
 - wiki_blob_link = project_wiki_path(project, wiki_blob.basename)

-= render partial: 'search/results/blob_data', locals: { blob: wiki_blob, project: project, file_name: file_name, blob_link: wiki_blob_link }
+= render partial: 'search/results/blob_data', locals: { blob: wiki_blob, project: project, file_name: wiki_blob.filename, blob_link: wiki_blob_link }
--- a/changelogs/unreleased/remove-blob-search-limit.yml
+++ b/changelogs/unreleased/remove-blob-search-limit.yml
@ -0,0 +1,5 @@
+---
+title: Remove limit of 100 when searching repository code.
+merge_request: 8671
+author:
+type: fixed
--- a/doc/api/search.md
+++ b/doc/api/search.md
@ -722,6 +722,17 @@ Example response:

 ### Scope: wiki_blobs

+Wiki blobs searches are performed on both filenames and contents. Search
+results:
+
+- Found in filenames are displayed before results found in contents.
+- May contain multiple matches for the same blob because the search string
+  might be found in both the filename and content, and matches of the different
+types are displayed separately.
+- May contain multiple matches for the same blob because the search string
+  might be found if the search string appears multiple times in the content.
+
+
 ```bash
 curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=wiki_blobs&search=bye
 ```
@ -783,6 +794,15 @@ Filters are available for this scope:

 to use a filter simply include it in your query like so: `a query filename:some_name*`.

+Blobs searches are performed on both filenames and contents. Search results:
+
+- Found in filenames are displayed before results found in contents.
+- May contain multiple matches for the same blob because the search string
+  might be found in both the filename and content, and matches of the different
+types are displayed separately.
+- May contain multiple matches for the same blob because the search string
+  might be found if the search string appears multiple times in the content.
+
 You may use wildcards (`*`) to use glob matching.

 ```bash
--- a/lib/api/search.rb
+++ b/lib/api/search.rb
@ -35,12 +35,7 @@ module API
      end

      def process_results(results)
-        case params[:scope]
-        when 'blobs', 'wiki_blobs'
-          paginate(results).map { |blob| blob[1] }
-        else
-          paginate(results)
-        end
+        paginate(results)
      end

      def snippets?
--- a/lib/gitlab/file_finder.rb
+++ b/lib/gitlab/file_finder.rb
@ -4,8 +4,6 @@
 # the result is joined and sorted by file name
 module Gitlab
  class FileFinder
-    BATCH_SIZE = 100
-
    attr_reader :project, :ref

    delegate :repository, to: :project
@ -16,60 +14,35 @@ module Gitlab
    end

    def find(query)
-      query = Gitlab::Search::Query.new(query) do
-        filter :filename, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}$/i }
-        filter :path, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}/i }
-        filter :extension, matcher: ->(filter, blob) { blob.filename =~ /\.#{filter[:regex_value]}$/i }
+      query = Gitlab::Search::Query.new(query, encode_binary: true) do
+        filter :filename, matcher: ->(filter, blob) { blob.binary_filename =~ /#{filter[:regex_value]}$/i }
+        filter :path, matcher: ->(filter, blob) { blob.binary_filename =~ /#{filter[:regex_value]}/i }
+        filter :extension, matcher: ->(filter, blob) { blob.binary_filename =~ /\.#{filter[:regex_value]}$/i }
      end

-      by_content = find_by_content(query.term)
+      files = find_by_filename(query.term) + find_by_content(query.term)

-      already_found = Set.new(by_content.map(&:filename))
-      by_filename = find_by_filename(query.term, except: already_found)
+      files = query.filter_results(files) if query.filters.any?

-      files = (by_content + by_filename)
-              .sort_by(&:filename)
-
-      query.filter_results(files).map { |blob| [blob.filename, blob] }
+      files
    end

    private

    def find_by_content(query)
-      results = repository.search_files_by_content(query, ref).first(BATCH_SIZE)
-      results.map { |result| Gitlab::ProjectSearchResults.parse_search_result(result, project) }
-    end
-
-    def find_by_filename(query, except: [])
-      filenames = search_filenames(query, except)
-
-      blobs(filenames).map do |blob|
-        Gitlab::SearchResults::FoundBlob.new(
-          id: blob.id,
-          filename: blob.path,
-          basename: File.basename(blob.path, File.extname(blob.path)),
-          ref: ref,
-          startline: 1,
-          data: blob.data,
-          project: project
-        )
+      repository.search_files_by_content(query, ref).map do |result|
+        Gitlab::Search::FoundBlob.new(content_match: result, project: project, ref: ref, repository: repository)
      end
    end

-    def search_filenames(query, except)
-      filenames = repository.search_files_by_name(query, ref).first(BATCH_SIZE)
-
-      filenames.delete_if { |filename| except.include?(filename) } unless except.empty?
-
-      filenames
+    def find_by_filename(query)
+      search_filenames(query).map do |filename|
+        Gitlab::Search::FoundBlob.new(blob_filename: filename, project: project, ref: ref, repository: repository)
+      end
    end

-    def blob_refs(filenames)
-      filenames.map { |filename| [ref, filename] }
-    end
-
-    def blobs(filenames)
-      Gitlab::Git::Blob.batch(repository, blob_refs(filenames), blob_size_limit: 1024)
+    def search_filenames(query)
+      repository.search_files_by_name(query, ref)
    end
  end
 end
--- a/lib/gitlab/project_search_results.rb
+++ b/lib/gitlab/project_search_results.rb
@ -17,9 +17,9 @@ module Gitlab
      when 'notes'
        notes.page(page).per(per_page)
      when 'blobs'
-        Kaminari.paginate_array(blobs).page(page).per(per_page)
+        paginated_blobs(blobs, page)
      when 'wiki_blobs'
-        Kaminari.paginate_array(wiki_blobs).page(page).per(per_page)
+        paginated_blobs(wiki_blobs, page)
      when 'commits'
        Kaminari.paginate_array(commits).page(page).per(per_page)
      else
@ -55,37 +55,6 @@ module Gitlab
      @commits_count ||= commits.count
    end

-    def self.parse_search_result(result, project = nil)
-      ref = nil
-      filename = nil
-      basename = nil
-
-      data = []
-      startline = 0
-
-      result.each_line.each_with_index do |line, index|
-        prefix ||= line.match(/^(?<ref>[^:]*):(?<filename>[^\x00]*)\x00(?<startline>\d+)\x00/)&.tap do |matches|
-          ref = matches[:ref]
-          filename = matches[:filename]
-          startline = matches[:startline]
-          startline = startline.to_i - index
-          extname = Regexp.escape(File.extname(filename))
-          basename = filename.sub(/#{extname}$/, '')
-        end
-
-        data << line.sub(prefix.to_s, '')
-      end
-
-      FoundBlob.new(
-        filename: filename,
-        basename: basename,
-        ref: ref,
-        startline: startline,
-        data: data.join,
-        project: project
-      )
-    end
-
    def single_commit_result?
      return false if commits_count != 1

@ -97,6 +66,14 @@ module Gitlab

    private

+    def paginated_blobs(blobs, page)
+      results = Kaminari.paginate_array(blobs).page(page).per(per_page)
+
+      Gitlab::Search::FoundBlob.preload_blobs(results)
+
+      results
+    end
+
    def blobs
      return [] unless Ability.allowed?(@current_user, :download_code, @project)

--- a/lib/gitlab/search/found_blob.rb
+++ b/lib/gitlab/search/found_blob.rb
@ -0,0 +1,162 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Search
+    class FoundBlob
+      include EncodingHelper
+      include Presentable
+      include BlobLanguageFromGitAttributes
+      include Gitlab::Utils::StrongMemoize
+
+      attr_reader :project, :content_match, :blob_filename
+
+      FILENAME_REGEXP = /\A(?<ref>[^:]*):(?<filename>[^\x00]*)\x00/.freeze
+      CONTENT_REGEXP = /^(?<ref>[^:]*):(?<filename>[^\x00]*)\x00(?<startline>\d+)\x00/.freeze
+
+      def self.preload_blobs(blobs)
+        to_fetch = blobs.select { |blob| blob.is_a?(self) && blob.blob_filename }
+
+        to_fetch.each { |blob| blob.fetch_blob }
+      end
+
+      def initialize(opts = {})
+        @id = opts.fetch(:id, nil)
+        @binary_filename = opts.fetch(:filename, nil)
+        @binary_basename = opts.fetch(:basename, nil)
+        @ref = opts.fetch(:ref, nil)
+        @startline = opts.fetch(:startline, nil)
+        @binary_data = opts.fetch(:data, nil)
+        @per_page = opts.fetch(:per_page, 20)
+        @project = opts.fetch(:project, nil)
+        # Some caller does not have project object (e.g. elastic search),
+        # yet they can trigger many calls in one go,
+        # causing duplicated queries.
+        # Allow those to just pass project_id instead.
+        @project_id = opts.fetch(:project_id, nil)
+        @content_match = opts.fetch(:content_match, nil)
+        @blob_filename = opts.fetch(:blob_filename, nil)
+        @repository = opts.fetch(:repository, nil)
+      end
+
+      def id
+        @id ||= parsed_content[:id]
+      end
+
+      def ref
+        @ref ||= parsed_content[:ref]
+      end
+
+      def startline
+        @startline ||= parsed_content[:startline]
+      end
+
+      # binary_filename is used for running filters on all matches,
+      # for grepped results (which use content_match), we get
+      # filename from the beginning of the grepped result which is faster
+      # then parsing whole snippet
+      def binary_filename
+        @binary_filename ||= content_match ? search_result_filename : parsed_content[:binary_filename]
+      end
+
+      def filename
+        @filename ||= encode_utf8(@binary_filename || parsed_content[:binary_filename])
+      end
+
+      def basename
+        @basename ||= encode_utf8(@binary_basename || parsed_content[:binary_basename])
+      end
+
+      def data
+        @data ||= encode_utf8(@binary_data || parsed_content[:binary_data])
+      end
+
+      def path
+        filename
+      end
+
+      def project_id
+        @project_id || @project&.id
+      end
+
+      def present
+        super(presenter_class: BlobPresenter)
+      end
+
+      def fetch_blob
+        path = [ref, blob_filename]
+        missing_blob = { binary_filename: blob_filename }
+
+        BatchLoader.for(path).batch(default_value: missing_blob) do |refs, loader|
+          Gitlab::Git::Blob.batch(repository, refs, blob_size_limit: 1024).each do |blob|
+            # if the blob couldn't be fetched for some reason,
+            # show at least the blob filename
+            data = {
+              id: blob.id,
+              binary_filename: blob.path,
+              binary_basename: File.basename(blob.path, File.extname(blob.path)),
+              ref: ref,
+              startline: 1,
+              binary_data: blob.data,
+              project: project
+            }
+
+            loader.call([ref, blob.path], data)
+          end
+        end
+      end
+
+      private
+
+      def search_result_filename
+        content_match.match(FILENAME_REGEXP) { |matches| matches[:filename] }
+      end
+
+      def parsed_content
+        strong_memoize(:parsed_content) do
+          if content_match
+            parse_search_result
+          elsif blob_filename
+            fetch_blob
+          else
+            {}
+          end
+        end
+      end
+
+      def parse_search_result
+        ref = nil
+        filename = nil
+        basename = nil
+
+        data = []
+        startline = 0
+
+        content_match.each_line.each_with_index do |line, index|
+          prefix ||= line.match(CONTENT_REGEXP)&.tap do |matches|
+            ref = matches[:ref]
+            filename = matches[:filename]
+            startline = matches[:startline]
+            startline = startline.to_i - index
+            extname = Regexp.escape(File.extname(filename))
+            basename = filename.sub(/#{extname}$/, '')
+          end
+
+          data << line.sub(prefix.to_s, '')
+        end
+
+        {
+          binary_filename: filename,
+          binary_basename: basename,
+          ref: ref,
+          startline: startline,
+          binary_data: data.join,
+          project: project
+        }
+      end
+
+      def repository
+        @repository ||= project.repository
+      end
+    end
+  end
+end
--- a/lib/gitlab/search/query.rb
+++ b/lib/gitlab/search/query.rb
@ -3,6 +3,8 @@
 module Gitlab
  module Search
    class Query < SimpleDelegator
+      include EncodingHelper
+
      def initialize(query, filter_opts = {}, &block)
        @raw_query = query.dup
        @filters = []
@ -50,7 +52,9 @@ module Gitlab
      end

      def parse_filter(filter, input)
-        filter[:parser].call(input)
+        result = filter[:parser].call(input)
+
+        @filter_options[:encode_binary] ? encode_binary(result) : result
      end
    end
  end
--- a/lib/gitlab/search_results.rb
+++ b/lib/gitlab/search_results.rb
@ -2,42 +2,6 @@

 module Gitlab
  class SearchResults
-    class FoundBlob
-      include EncodingHelper
-      include Presentable
-      include BlobLanguageFromGitAttributes
-
-      attr_reader :id, :filename, :basename, :ref, :startline, :data, :project
-
-      def initialize(opts = {})
-        @id = opts.fetch(:id, nil)
-        @filename = encode_utf8(opts.fetch(:filename, nil))
-        @basename = encode_utf8(opts.fetch(:basename, nil))
-        @ref = opts.fetch(:ref, nil)
-        @startline = opts.fetch(:startline, nil)
-        @data = encode_utf8(opts.fetch(:data, nil))
-        @per_page = opts.fetch(:per_page, 20)
-        @project = opts.fetch(:project, nil)
-        # Some caller does not have project object (e.g. elastic search),
-        # yet they can trigger many calls in one go,
-        # causing duplicated queries.
-        # Allow those to just pass project_id instead.
-        @project_id = opts.fetch(:project_id, nil)
-      end
-
-      def path
-        filename
-      end
-
-      def project_id
-        @project_id || @project&.id
-      end
-
-      def present
-        super(presenter_class: BlobPresenter)
-      end
-    end
-
    attr_reader :current_user, :query, :per_page

    # Limit search results by passed projects
--- a/lib/gitlab/wiki_file_finder.rb
+++ b/lib/gitlab/wiki_file_finder.rb
@ -2,6 +2,8 @@

 module Gitlab
  class WikiFileFinder < FileFinder
+    BATCH_SIZE = 100
+
    attr_reader :repository

    def initialize(project, ref)
@ -12,13 +14,11 @@ module Gitlab

    private

-    def search_filenames(query, except)
+    def search_filenames(query)
      safe_query = Regexp.escape(query.tr(' ', '-'))
      safe_query = Regexp.new(safe_query, Regexp::IGNORECASE)
      filenames = repository.ls_files(ref)

-      filenames.delete_if { |filename| except.include?(filename) } unless except.empty?
-
      filenames.grep(safe_query).first(BATCH_SIZE)
    end
  end
--- a/spec/lib/gitlab/project_search_results_spec.rb
+++ b/spec/lib/gitlab/project_search_results_spec.rb
@ -54,11 +54,18 @@ describe Gitlab::ProjectSearchResults do
    end

    it 'finds by name' do
-      expect(results.map(&:first)).to include(expected_file_by_name)
+      expect(results.map(&:filename)).to include(expected_file_by_name)
+    end
+
+    it "loads all blobs for filename matches in single batch" do
+      expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original
+
+      expected = project.repository.search_files_by_name(query, 'master')
+      expect(results.map(&:filename)).to include(*expected)
    end

    it 'finds by content' do
-      blob = results.select { |result| result.first == expected_file_by_content }.flatten.last
+      blob = results.select { |result| result.filename == expected_file_by_content }.flatten.last

      expect(blob.filename).to eq(expected_file_by_content)
    end
@ -122,126 +129,6 @@ describe Gitlab::ProjectSearchResults do
      let(:blob_type) { 'blobs' }
      let(:entity) { project }
    end
-
-    describe 'parsing results' do
-      let(:results) { project.repository.search_files_by_content('feature', 'master') }
-      let(:search_result) { results.first }
-
-      subject { described_class.parse_search_result(search_result) }
-
-      it "returns a valid FoundBlob" do
-        is_expected.to be_an Gitlab::SearchResults::FoundBlob
-        expect(subject.id).to be_nil
-        expect(subject.path).to eq('CHANGELOG')
-        expect(subject.filename).to eq('CHANGELOG')
-        expect(subject.basename).to eq('CHANGELOG')
-        expect(subject.ref).to eq('master')
-        expect(subject.startline).to eq(188)
-        expect(subject.data.lines[2]).to eq("  - Feature: Replace teams with group membership\n")
-      end
-
-      context 'when the matching filename contains a colon' do
-        let(:search_result) { "master:testdata/project::function1.yaml\x001\x00---\n" }
-
-        it 'returns a valid FoundBlob' do
-          expect(subject.filename).to eq('testdata/project::function1.yaml')
-          expect(subject.basename).to eq('testdata/project::function1')
-          expect(subject.ref).to eq('master')
-          expect(subject.startline).to eq(1)
-          expect(subject.data).to eq("---\n")
-        end
-      end
-
-      context 'when the matching content contains a number surrounded by colons' do
-        let(:search_result) { "master:testdata/foo.txt\x001\x00blah:9:blah" }
-
-        it 'returns a valid FoundBlob' do
-          expect(subject.filename).to eq('testdata/foo.txt')
-          expect(subject.basename).to eq('testdata/foo')
-          expect(subject.ref).to eq('master')
-          expect(subject.startline).to eq(1)
-          expect(subject.data).to eq('blah:9:blah')
-        end
-      end
-
-      context 'when the matching content contains multiple null bytes' do
-        let(:search_result) { "master:testdata/foo.txt\x001\x00blah\x001\x00foo" }
-
-        it 'returns a valid FoundBlob' do
-          expect(subject.filename).to eq('testdata/foo.txt')
-          expect(subject.basename).to eq('testdata/foo')
-          expect(subject.ref).to eq('master')
-          expect(subject.startline).to eq(1)
-          expect(subject.data).to eq("blah\x001\x00foo")
-        end
-      end
-
-      context 'when the search result ends with an empty line' do
-        let(:results) { project.repository.search_files_by_content('Role models', 'master') }
-
-        it 'returns a valid FoundBlob that ends with an empty line' do
-          expect(subject.filename).to eq('files/markdown/ruby-style-guide.md')
-          expect(subject.basename).to eq('files/markdown/ruby-style-guide')
-          expect(subject.ref).to eq('master')
-          expect(subject.startline).to eq(1)
-          expect(subject.data).to eq("# Prelude\n\n> Role models are important. <br/>\n> -- Officer Alex J. Murphy / RoboCop\n\n")
-        end
-      end
-
-      context 'when the search returns non-ASCII data' do
-        context 'with UTF-8' do
-          let(:results) { project.repository.search_files_by_content('файл', 'master') }
-
-          it 'returns results as UTF-8' do
-            expect(subject.filename).to eq('encoding/russian.rb')
-            expect(subject.basename).to eq('encoding/russian')
-            expect(subject.ref).to eq('master')
-            expect(subject.startline).to eq(1)
-            expect(subject.data).to eq("Хороший файл\n")
-          end
-        end
-
-        context 'with UTF-8 in the filename' do
-          let(:results) { project.repository.search_files_by_content('webhook', 'master') }
-
-          it 'returns results as UTF-8' do
-            expect(subject.filename).to eq('encoding/テスト.txt')
-            expect(subject.basename).to eq('encoding/テスト')
-            expect(subject.ref).to eq('master')
-            expect(subject.startline).to eq(3)
-            expect(subject.data).to include('WebHookの確認')
-          end
-        end
-
-        context 'with ISO-8859-1' do
-          let(:search_result) { "master:encoding/iso8859.txt\x001\x00\xC4\xFC\nmaster:encoding/iso8859.txt\x002\x00\nmaster:encoding/iso8859.txt\x003\x00foo\n".force_encoding(Encoding::ASCII_8BIT) }
-
-          it 'returns results as UTF-8' do
-            expect(subject.filename).to eq('encoding/iso8859.txt')
-            expect(subject.basename).to eq('encoding/iso8859')
-            expect(subject.ref).to eq('master')
-            expect(subject.startline).to eq(1)
-            expect(subject.data).to eq("Äü\n\nfoo\n")
-          end
-        end
-      end
-
-      context "when filename has extension" do
-        let(:search_result) { "master:CONTRIBUTE.md\x005\x00- [Contribute to GitLab](#contribute-to-gitlab)\n" }
-
-        it { expect(subject.path).to eq('CONTRIBUTE.md') }
-        it { expect(subject.filename).to eq('CONTRIBUTE.md') }
-        it { expect(subject.basename).to eq('CONTRIBUTE') }
-      end
-
-      context "when file under directory" do
-        let(:search_result) { "master:a/b/c.md\x005\x00a b c\n" }
-
-        it { expect(subject.path).to eq('a/b/c.md') }
-        it { expect(subject.filename).to eq('a/b/c.md') }
-        it { expect(subject.basename).to eq('a/b/c') }
-      end
-    end
  end

  describe 'wiki search' do
--- a/spec/lib/gitlab/search/found_blob_spec.rb
+++ b/spec/lib/gitlab/search/found_blob_spec.rb
@ -0,0 +1,138 @@
+# coding: utf-8
+
+require 'spec_helper'
+
+describe Gitlab::Search::FoundBlob do
+  describe 'parsing results' do
+    let(:project) { create(:project, :public, :repository) }
+    let(:results) { project.repository.search_files_by_content('feature', 'master') }
+    let(:search_result) { results.first }
+
+    subject { described_class.new(content_match: search_result, project: project) }
+
+    it "returns a valid FoundBlob" do
+      is_expected.to be_an described_class
+      expect(subject.id).to be_nil
+      expect(subject.path).to eq('CHANGELOG')
+      expect(subject.filename).to eq('CHANGELOG')
+      expect(subject.basename).to eq('CHANGELOG')
+      expect(subject.ref).to eq('master')
+      expect(subject.startline).to eq(188)
+      expect(subject.data.lines[2]).to eq("  - Feature: Replace teams with group membership\n")
+    end
+
+    it "doesn't parses content if not needed" do
+      expect(subject).not_to receive(:parse_search_result)
+      expect(subject.project_id).to eq(project.id)
+      expect(subject.binary_filename).to eq('CHANGELOG')
+    end
+
+    it "parses content only once when needed" do
+      expect(subject).to receive(:parse_search_result).once.and_call_original
+      expect(subject.filename).to eq('CHANGELOG')
+      expect(subject.startline).to eq(188)
+    end
+
+    context 'when the matching filename contains a colon' do
+      let(:search_result) { "master:testdata/project::function1.yaml\x001\x00---\n" }
+
+      it 'returns a valid FoundBlob' do
+        expect(subject.filename).to eq('testdata/project::function1.yaml')
+        expect(subject.basename).to eq('testdata/project::function1')
+        expect(subject.ref).to eq('master')
+        expect(subject.startline).to eq(1)
+        expect(subject.data).to eq("---\n")
+      end
+    end
+
+    context 'when the matching content contains a number surrounded by colons' do
+      let(:search_result) { "master:testdata/foo.txt\x001\x00blah:9:blah" }
+
+      it 'returns a valid FoundBlob' do
+        expect(subject.filename).to eq('testdata/foo.txt')
+        expect(subject.basename).to eq('testdata/foo')
+        expect(subject.ref).to eq('master')
+        expect(subject.startline).to eq(1)
+        expect(subject.data).to eq('blah:9:blah')
+      end
+    end
+
+    context 'when the matching content contains multiple null bytes' do
+      let(:search_result) { "master:testdata/foo.txt\x001\x00blah\x001\x00foo" }
+
+      it 'returns a valid FoundBlob' do
+        expect(subject.filename).to eq('testdata/foo.txt')
+        expect(subject.basename).to eq('testdata/foo')
+        expect(subject.ref).to eq('master')
+        expect(subject.startline).to eq(1)
+        expect(subject.data).to eq("blah\x001\x00foo")
+      end
+    end
+
+    context 'when the search result ends with an empty line' do
+      let(:results) { project.repository.search_files_by_content('Role models', 'master') }
+
+      it 'returns a valid FoundBlob that ends with an empty line' do
+        expect(subject.filename).to eq('files/markdown/ruby-style-guide.md')
+        expect(subject.basename).to eq('files/markdown/ruby-style-guide')
+        expect(subject.ref).to eq('master')
+        expect(subject.startline).to eq(1)
+        expect(subject.data).to eq("# Prelude\n\n> Role models are important. <br/>\n> -- Officer Alex J. Murphy / RoboCop\n\n")
+      end
+    end
+
+    context 'when the search returns non-ASCII data' do
+      context 'with UTF-8' do
+        let(:results) { project.repository.search_files_by_content('файл', 'master') }
+
+        it 'returns results as UTF-8' do
+          expect(subject.filename).to eq('encoding/russian.rb')
+          expect(subject.basename).to eq('encoding/russian')
+          expect(subject.ref).to eq('master')
+          expect(subject.startline).to eq(1)
+          expect(subject.data).to eq("Хороший файл\n")
+        end
+      end
+
+      context 'with UTF-8 in the filename' do
+        let(:results) { project.repository.search_files_by_content('webhook', 'master') }
+
+        it 'returns results as UTF-8' do
+          expect(subject.filename).to eq('encoding/テスト.txt')
+          expect(subject.basename).to eq('encoding/テスト')
+          expect(subject.ref).to eq('master')
+          expect(subject.startline).to eq(3)
+          expect(subject.data).to include('WebHookの確認')
+        end
+      end
+
+      context 'with ISO-8859-1' do
+        let(:search_result) { "master:encoding/iso8859.txt\x001\x00\xC4\xFC\nmaster:encoding/iso8859.txt\x002\x00\nmaster:encoding/iso8859.txt\x003\x00foo\n".force_encoding(Encoding::ASCII_8BIT) }
+
+        it 'returns results as UTF-8' do
+          expect(subject.filename).to eq('encoding/iso8859.txt')
+          expect(subject.basename).to eq('encoding/iso8859')
+          expect(subject.ref).to eq('master')
+          expect(subject.startline).to eq(1)
+          expect(subject.data).to eq("Äü\n\nfoo\n")
+        end
+      end
+    end
+
+    context "when filename has extension" do
+      let(:search_result) { "master:CONTRIBUTE.md\x005\x00- [Contribute to GitLab](#contribute-to-gitlab)\n" }
+
+      it { expect(subject.path).to eq('CONTRIBUTE.md') }
+      it { expect(subject.filename).to eq('CONTRIBUTE.md') }
+      it { expect(subject.basename).to eq('CONTRIBUTE') }
+    end
+
+    context "when file under directory" do
+      let(:search_result) { "master:a/b/c.md\x005\x00a b c\n" }
+
+      it { expect(subject.path).to eq('a/b/c.md') }
+      it { expect(subject.filename).to eq('a/b/c.md') }
+      it { expect(subject.basename).to eq('a/b/c') }
+    end
+  end
+end
--- a/spec/support/shared_examples/file_finder.rb
+++ b/spec/support/shared_examples/file_finder.rb
@ -3,18 +3,19 @@ shared_examples 'file finder' do
  let(:search_results) { subject.find(query) }

  it 'finds by name' do
-    filename,  blob = search_results.find { |_, blob| blob.filename == expected_file_by_name }
-    expect(filename).to eq(expected_file_by_name)
-    expect(blob).to be_a(Gitlab::SearchResults::FoundBlob)
+    blob = search_results.find { |blob| blob.filename == expected_file_by_name }
+
+    expect(blob.filename).to eq(expected_file_by_name)
+    expect(blob).to be_a(Gitlab::Search::FoundBlob)
    expect(blob.ref).to eq(subject.ref)
    expect(blob.data).not_to be_empty
  end

  it 'finds by content' do
-    filename, blob = search_results.find { |_, blob| blob.filename == expected_file_by_content }
+    blob = search_results.find { |blob| blob.filename == expected_file_by_content }

-    expect(filename).to eq(expected_file_by_content)
-    expect(blob).to be_a(Gitlab::SearchResults::FoundBlob)
+    expect(blob.filename).to eq(expected_file_by_content)
+    expect(blob).to be_a(Gitlab::Search::FoundBlob)
    expect(blob.ref).to eq(subject.ref)
    expect(blob.data).not_to be_empty
  end