diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb index a3029a54604..712347e76ed 100644 --- a/app/models/merge_request_diff.rb +++ b/app/models/merge_request_diff.rb @@ -7,6 +7,7 @@ class MergeRequestDiff < ActiveRecord::Base include IgnorableColumn include EachBatch include Gitlab::Utils::StrongMemoize + include ObjectStorage::BackgroundMove # Don't display more than 100 commits at once COMMITS_SAFE_SIZE = 100 @@ -15,9 +16,13 @@ class MergeRequestDiff < ActiveRecord::Base :st_diffs belongs_to :merge_request + manual_inverse_association :merge_request, :merge_request_diff - has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) } + has_many :merge_request_diff_files, + -> { order(:merge_request_diff_id, :relative_order) }, + inverse_of: :merge_request_diff + has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) } state_machine :state, initial: :empty do @@ -45,10 +50,14 @@ class MergeRequestDiff < ActiveRecord::Base scope :recent, -> { order(id: :desc).limit(100) } + mount_uploader :external_diff, ExternalDiffUploader + # All diff information is collected from repository after object is created. # It allows you to override variables like head_commit_sha before getting diff. after_create :save_git_content, unless: :importing? + after_save :update_external_diff_store, if: :external_diff_changed? + def self.find_by_diff_refs(diff_refs) find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha) end @@ -241,10 +250,97 @@ class MergeRequestDiff < ActiveRecord::Base end end + # Carrierwave defines `write_uploader` dynamically on this class, so `super` + # does not work. Alias the carrierwave method so we can call it when needed + alias_method :carrierwave_write_uploader, :write_uploader + + # The `external_diff`, `external_diff_store`, and `stored_externally` + # columns were introduced in GitLab 11.8, but some background migration specs + # use factories that rely on current code with an old schema. Without these + # `has_attribute?` guards, they fail with a `MissingAttributeError`. + # + # For more details, see: https://gitlab.com/gitlab-org/gitlab-ce/issues/44990 + + def write_uploader(column, identifier) + carrierwave_write_uploader(column, identifier) if has_attribute?(column) + end + + def update_external_diff_store + update_column(:external_diff_store, external_diff.object_store) if + has_attribute?(:external_diff_store) + end + + def external_diff_changed? + super if has_attribute?(:external_diff) + end + + def stored_externally + super if has_attribute?(:stored_externally) + end + alias_method :stored_externally?, :stored_externally + + # If enabled, yields the external file containing the diff. Otherwise, yields + # nil. This method is not thread-safe, but it *is* re-entrant, which allows + # multiple merge_request_diff_files to load their data efficiently + def opening_external_diff + return yield(nil) unless stored_externally? + return yield(@external_diff_file) if @external_diff_file + + external_diff.open do |file| + begin + @external_diff_file = file + + yield(@external_diff_file) + ensure + @external_diff_file = nil + end + end + end + private def create_merge_request_diff_files(diffs) - rows = diffs.map.with_index do |diff, index| + rows = + if has_attribute?(:external_diff) && Gitlab.config.external_diffs.enabled + build_external_merge_request_diff_files(diffs) + else + build_merge_request_diff_files(diffs) + end + + # Faster inserts + Gitlab::Database.bulk_insert('merge_request_diff_files', rows) + end + + def build_external_merge_request_diff_files(diffs) + rows = build_merge_request_diff_files(diffs) + tempfile = build_external_diff_tempfile(rows) + + self.external_diff = tempfile + self.stored_externally = true + + rows + ensure + tempfile&.unlink + end + + def build_external_diff_tempfile(rows) + Tempfile.open(external_diff.filename) do |file| + rows.inject(0) do |offset, row| + data = row.delete(:diff) + row[:external_diff_offset] = offset + row[:external_diff_size] = data.size + + file.write(data) + + offset + data.size + end + + file + end + end + + def build_merge_request_diff_files(diffs) + diffs.map.with_index do |diff, index| diff_hash = diff.to_hash.merge( binary: false, merge_request_diff_id: self.id, @@ -261,18 +357,20 @@ class MergeRequestDiff < ActiveRecord::Base end end end - - Gitlab::Database.bulk_insert('merge_request_diff_files', rows) end def load_diffs(options) - collection = merge_request_diff_files + # Ensure all diff files operate on the same external diff file instance if + # present. This reduces file open/close overhead. + opening_external_diff do + collection = merge_request_diff_files - if paths = options[:paths] - collection = collection.where('old_path IN (?) OR new_path IN (?)', paths, paths) + if paths = options[:paths] + collection = collection.where('old_path IN (?) OR new_path IN (?)', paths, paths) + end + + Gitlab::Git::DiffCollection.new(collection.map(&:to_hash), options) end - - Gitlab::Git::DiffCollection.new(collection.map(&:to_hash), options) end def load_commits diff --git a/app/models/merge_request_diff_file.rb b/app/models/merge_request_diff_file.rb index a9f110bec5c..e8d936e265c 100644 --- a/app/models/merge_request_diff_file.rb +++ b/app/models/merge_request_diff_file.rb @@ -4,7 +4,7 @@ class MergeRequestDiffFile < ActiveRecord::Base include Gitlab::EncodingHelper include DiffFile - belongs_to :merge_request_diff + belongs_to :merge_request_diff, inverse_of: :merge_request_diff_files def utf8_diff return '' if diff.blank? @@ -13,6 +13,16 @@ class MergeRequestDiffFile < ActiveRecord::Base end def diff - binary? ? super.unpack('m0').first : super + content = + if merge_request_diff&.stored_externally? + merge_request_diff.opening_external_diff do |file| + file.seek(external_diff_offset) + file.read(external_diff_size) + end + else + super + end + + binary? ? content.unpack('m0').first : content end end diff --git a/app/uploaders/external_diff_uploader.rb b/app/uploaders/external_diff_uploader.rb new file mode 100644 index 00000000000..d2707cd0777 --- /dev/null +++ b/app/uploaders/external_diff_uploader.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class ExternalDiffUploader < GitlabUploader + include ObjectStorage::Concern + + storage_options Gitlab.config.external_diffs + + alias_method :upload, :model + + def filename + "diff-#{model.id}" + end + + def store_dir + dynamic_segment + end + + private + + def dynamic_segment + File.join(model.model_name.plural, "mr-#{model.merge_request_id}") + end +end diff --git a/changelogs/unreleased/52568-external-mr-diffs.yml b/changelogs/unreleased/52568-external-mr-diffs.yml new file mode 100644 index 00000000000..b1c9d5cb809 --- /dev/null +++ b/changelogs/unreleased/52568-external-mr-diffs.yml @@ -0,0 +1,5 @@ +--- +title: Allow merge request diffs to be placed into an object store +merge_request: 24276 +author: +type: added diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index 6fc33e8971e..be23166cb7b 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -166,6 +166,23 @@ production: &base # aws_signature_version: 4 # For creation of signed URLs. Set to 2 if provider does not support v4. # endpoint: 'https://s3.amazonaws.com' # default: nil - Useful for S3 compliant services such as DigitalOcean Spaces + ## Merge request external diff storage + external_diffs: + # If disabled (the default), the diffs are in-database. Otherwise, they can + # be stored on disk, or in object storage + enabled: false + # The location where external diffs are stored (default: shared/lfs-external-diffs). + # storage_path: shared/external-diffs + # object_store: + # enabled: false + # remote_directory: external-diffs + # background_upload: false + # proxy_download: false + # connection: + # provider: AWS + # aws_access_key_id: AWS_ACCESS_KEY_ID + # aws_secret_access_key: AWS_SECRET_ACCESS_KEY + # region: us-east-1 ## Git LFS lfs: @@ -733,6 +750,18 @@ test: <<: *base gravatar: enabled: true + external_diffs: + enabled: false + # The location where external diffs are stored (default: shared/external-diffs). + # storage_path: shared/external-diffs + object_store: + enabled: false + remote_directory: external-diffs # The bucket name + connection: + provider: AWS # Only AWS supported at the moment + aws_access_key_id: AWS_ACCESS_KEY_ID + aws_secret_access_key: AWS_SECRET_ACCESS_KEY + region: us-east-1 lfs: enabled: false # The location where LFS objects are stored (default: shared/lfs-objects). diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 1aed41e02ab..dfcf1e648b4 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -215,6 +215,14 @@ Settings.pages['artifacts_server'] ||= Settings.pages['enabled'] if Settings.pa Settings.pages['admin'] ||= Settingslogic.new({}) Settings.pages.admin['certificate'] ||= '' +# +# External merge request diffs +# +Settings['external_diffs'] ||= Settingslogic.new({}) +Settings.external_diffs['enabled'] = false if Settings.external_diffs['enabled'].nil? +Settings.external_diffs['storage_path'] = Settings.absolute(Settings.external_diffs['storage_path'] || File.join(Settings.shared['path'], 'external-diffs')) +Settings.external_diffs['object_store'] = ObjectStoreSettings.parse(Settings.external_diffs['object_store']) + # # Git LFS # diff --git a/db/migrate/20190109153125_add_merge_request_external_diffs.rb b/db/migrate/20190109153125_add_merge_request_external_diffs.rb new file mode 100644 index 00000000000..c67903c7f67 --- /dev/null +++ b/db/migrate/20190109153125_add_merge_request_external_diffs.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddMergeRequestExternalDiffs < ActiveRecord::Migration[5.0] + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + def change + # Allow the merge request diff to store details about an external file + add_column :merge_request_diffs, :external_diff, :string + add_column :merge_request_diffs, :external_diff_store, :integer + add_column :merge_request_diffs, :stored_externally, :boolean + + # The diff for each file is mapped to a range in the external file + add_column :merge_request_diff_files, :external_diff_offset, :integer + add_column :merge_request_diff_files, :external_diff_size, :integer + + # If the diff is in object storage, it will be null in the database + change_column_null :merge_request_diff_files, :diff, true + end +end diff --git a/db/schema.rb b/db/schema.rb index 4b6e4992056..20c8dab4c3e 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -1203,8 +1203,10 @@ ActiveRecord::Schema.define(version: 20190131122559) do t.string "b_mode", null: false t.text "new_path", null: false t.text "old_path", null: false - t.text "diff", null: false + t.text "diff" t.boolean "binary" + t.integer "external_diff_offset" + t.integer "external_diff_size" t.index ["merge_request_diff_id", "relative_order"], name: "index_merge_request_diff_files_on_mr_diff_id_and_order", unique: true, using: :btree end @@ -1218,6 +1220,9 @@ ActiveRecord::Schema.define(version: 20190131122559) do t.string "head_commit_sha" t.string "start_commit_sha" t.integer "commits_count" + t.string "external_diff" + t.integer "external_diff_store" + t.boolean "stored_externally" t.index ["merge_request_id", "id"], name: "index_merge_request_diffs_on_merge_request_id_and_id", using: :btree end diff --git a/doc/administration/index.md b/doc/administration/index.md index 0b673d61139..184754cd467 100644 --- a/doc/administration/index.md +++ b/doc/administration/index.md @@ -48,6 +48,7 @@ Learn how to install, configure, update, and maintain your GitLab instance. - [Third party offers](../user/admin_area/settings/third_party_offers.md) - [Compliance](compliance.md): A collection of features from across the application that you may configure to help ensure that your GitLab instance and DevOps workflow meet compliance standards. - [Diff limits](../user/admin_area/diff_limits.md): Configure the diff rendering size limits of branch comparison pages. +- [Merge request diffs](merge_request_diffs.md): Configure the diffs shown on merge requests - [Broadcast Messages](../user/admin_area/broadcast_messages.md): Send messages to GitLab users through the UI. #### Customizing GitLab's appearance diff --git a/doc/administration/merge_request_diffs.md b/doc/administration/merge_request_diffs.md new file mode 100644 index 00000000000..94620c3d3a0 --- /dev/null +++ b/doc/administration/merge_request_diffs.md @@ -0,0 +1,154 @@ +# Merge request diffs administration + +> **Notes:** +> - External merge request diffs introduced in GitLab 11.8 + +Merge request diffs are size-limited copies of diffs associated with merge +requests. When viewing a merge request, diffs are sourced from these copies +wherever possible as a performance optimization. + +By default, merge request diffs are stored in the database, in a table named +`merge_request_diff_files`. Larger installations may find this table grows too +large, in which case, switching to external storage is recommended. + +### Using external storage + +Merge request diffs can be stored on disk, or in object storage. In general, it +is better to store the diffs in the database than on disk. + +To enable external storage of merge request diffs: + +--- + +**In Omnibus installations:** + +1. Edit `/etc/gitlab/gitlab.rb` and add the following line: + + ```ruby + gitlab_rails['external_diffs_enabled'] = true + ``` + +1. _The external diffs will be stored in in + `/var/opt/gitlab/gitlab-rails/shared/external-diffs`._ To change the path, + for example to `/mnt/storage/external-diffs`, edit `/etc/gitlab/gitlab.rb` + and add the following line: + + ```ruby + gitlab_rails['external_diffs_storage_path'] = "/mnt/storage/external-diffs" + ``` + +1. Save the file and [reconfigure GitLab][] for the changes to take effect. + +--- + +**In installations from source:** + +1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following + lines: + + ```yaml + external_diffs: + enabled: true + ``` + +1. _The external diffs will be stored in + `/home/git/gitlab/shared/external-diffs`._ To change the path, for example + to `/mnt/storage/external-diffs`, edit `/home/git/gitlab/config/gitlab.yml` + and add or amend the following lines: + + ```yaml + external_diffs: + enabled: true + storage_path: /mnt/storage/external-diffs + ``` + +1. Save the file and [restart GitLab][] for the changes to take effect. + +### Using object storage + +Instead of storing the external diffs on disk, we recommended you use an object +store like AWS S3 instead. This configuration relies on valid AWS credentials to +be configured already. + +### Object Storage Settings + +For source installations, these settings are nested under `external_diffs:` and +then `object_store:`. On omnibus installs, they are prefixed by +`external_diffs_object_store_`. + +| Setting | Description | Default | +|---------|-------------|---------| +| `enabled` | Enable/disable object storage | `false` | +| `remote_directory` | The bucket name where external diffs will be stored| | +| `direct_upload` | Set to true to enable direct upload of external diffs without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. | `false` | +| `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` | +| `proxy_download` | Set to true to enable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data | `false` | +| `connection` | Various connection options described below | | + +#### S3 compatible connection settings + +The connection settings match those provided by [Fog](https://github.com/fog), and are as follows: + +| Setting | Description | Default | +|---------|-------------|---------| +| `provider` | Always `AWS` for compatible hosts | AWS | +| `aws_access_key_id` | AWS credentials, or compatible | | +| `aws_secret_access_key` | AWS credentials, or compatible | | +| `aws_signature_version` | AWS signature version to use. 2 or 4 are valid options. Digital Ocean Spaces and other providers may need 2. | 4 | +| `region` | AWS region | us-east-1 | +| `host` | S3 compatible host for when not using AWS, e.g. `localhost` or `storage.example.com` | s3.amazonaws.com | +| `endpoint` | Can be used when configuring an S3 compatible service such as [Minio](https://www.minio.io), by entering a URL such as `http://127.0.0.1:9000` | (optional) | +| `path_style` | Set to true to use `host/bucket_name/object` style paths instead of `bucket_name.host/object`. Leave as false for AWS S3 | false | +| `use_iam_profile` | Set to true to use IAM profile instead of access keys | false + +**In Omnibus installations:** + +1. Edit `/etc/gitlab/gitlab.rb` and add the following lines by replacing with + the values you want: + + ```ruby + gitlab_rails['external_diffs_enabled'] = true + gitlab_rails['external_diffs_object_store_enabled'] = true + gitlab_rails['external_diffs_object_store_remote_directory'] = "external-diffs" + gitlab_rails['external_diffs_object_store_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'aws_access_key_id' => 'AWS_ACCESS_KEY_ID', + 'aws_secret_access_key' => 'AWS_SECRET_ACCESS_KEY' + } + ``` + + NOTE: if you are using AWS IAM profiles, be sure to omit the + AWS access key and secret access key/value pairs. For example: + + ```ruby + gitlab_rails['external_diffs_object_store_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'use_iam_profile' => true + } + ``` + +1. Save the file and [reconfigure GitLab][] for the changes to take effect. + +--- + +**In installations from source:** + +1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following + lines: + + ```yaml + external_diffs: + enabled: true + object_store: + enabled: true + remote_directory: "external-diffs" # The bucket name + connection: + provider: AWS # Only AWS supported at the moment + aws_access_key_id: AWS_ACCESS_KEY_ID + aws_secret_access_key: AWS_SECRET_ACCESS_KEY + region: eu-central-1 + ``` + +1. Save the file and [restart GitLab][] for the changes to take effect. diff --git a/doc/development/file_storage.md b/doc/development/file_storage.md index b90dc90e424..597812c8c49 100644 --- a/doc/development/file_storage.md +++ b/doc/development/file_storage.md @@ -18,6 +18,7 @@ There are many places where file uploading is used, according to contexts: - Issues/MR/Notes Legacy Markdown attachments - CI Artifacts (archive, metadata, trace) - LFS Objects + - Merge request diffs ## Disk storage @@ -37,6 +38,7 @@ they are still not 100% standardized. You can see them below: | Issues/MR/Notes Legacy Markdown attachments | no | uploads/-/system/note/attachment/:id/:filename | `AttachmentUploader` | Note | | CI Artifacts (CE) | yes | shared/artifacts/:disk_hash[0..1]/:disk_hash[2..3]/:disk_hash/:year_:month_:date/:job_id/:job_artifact_id (:disk_hash is SHA256 digest of project_id) | `JobArtifactUploader` | Ci::JobArtifact | | LFS Objects (CE) | yes | shared/lfs-objects/:hex/:hex/:object_hash | `LfsObjectUploader` | LfsObject | +| External merge request diffs | yes | shared/external-diffs/merge_request_diffs/mr-:parent_id/diff-:id | `ExternalDiffUploader` | MergeRequestDiff | CI Artifacts and LFS Objects behave differently in CE and EE. In CE they inherit the `GitlabUploader` while in EE they inherit the `ObjectStorage` and store files in and S3 API compatible object store. diff --git a/lib/gitlab/import_export/import_export.yml b/lib/gitlab/import_export/import_export.yml index add7ee58da6..099677a791c 100644 --- a/lib/gitlab/import_export/import_export.yml +++ b/lib/gitlab/import_export/import_export.yml @@ -130,9 +130,14 @@ excluded_attributes: snippets: - :expired_at merge_request_diff: + - :external_diff + - :stored_externally + - :external_diff_store - :st_diffs merge_request_diff_files: - :diff + - :external_diff_offset + - :external_diff_size issues: - :milestone_id merge_requests: diff --git a/spec/models/merge_request_diff_spec.rb b/spec/models/merge_request_diff_spec.rb index 33e984dc399..1849d3bac12 100644 --- a/spec/models/merge_request_diff_spec.rb +++ b/spec/models/merge_request_diff_spec.rb @@ -46,7 +46,7 @@ describe MergeRequestDiff do it { expect(first_diff.reload).not_to be_latest } end - describe '#diffs' do + shared_examples_for 'merge request diffs' do let(:merge_request) { create(:merge_request, :with_diffs) } let!(:diff) { merge_request.merge_request_diff.reload } @@ -91,98 +91,110 @@ describe MergeRequestDiff do diff.diffs.diff_files end end - end - describe '#raw_diffs' do - context 'when the :ignore_whitespace_change option is set' do - it 'creates a new compare object instead of loading from the DB' do - expect(diff_with_commits).not_to receive(:load_diffs) - expect(diff_with_commits.compare).to receive(:diffs).and_call_original + describe '#raw_diffs' do + context 'when the :ignore_whitespace_change option is set' do + it 'creates a new compare object instead of using preprocessed data' do + expect(diff_with_commits).not_to receive(:load_diffs) + expect(diff_with_commits.compare).to receive(:diffs).and_call_original - diff_with_commits.raw_diffs(ignore_whitespace_change: true) + diff_with_commits.raw_diffs(ignore_whitespace_change: true) + end + end + + context 'when the raw diffs are empty' do + before do + MergeRequestDiffFile.where(merge_request_diff_id: diff_with_commits.id).delete_all + end + + it 'returns an empty DiffCollection' do + expect(diff_with_commits.raw_diffs).to be_a(Gitlab::Git::DiffCollection) + expect(diff_with_commits.raw_diffs).to be_empty + end + end + + context 'when the raw diffs exist' do + it 'returns the diffs' do + expect(diff_with_commits.raw_diffs).to be_a(Gitlab::Git::DiffCollection) + expect(diff_with_commits.raw_diffs).not_to be_empty + end + + context 'when the :paths option is set' do + let(:diffs) { diff_with_commits.raw_diffs(paths: ['files/ruby/popen.rb', 'files/ruby/popen.rb']) } + + it 'only returns diffs that match the (old path, new path) given' do + expect(diffs.map(&:new_path)).to contain_exactly('files/ruby/popen.rb') + end + + it 'only serializes diff files found by query' do + expect(diff_with_commits.merge_request_diff_files.count).to be > 10 + expect_any_instance_of(MergeRequestDiffFile).to receive(:to_hash).once + + diffs + end + + it 'uses the preprocessed diffs' do + expect(diff_with_commits).to receive(:load_diffs) + + diffs + end + end end end - context 'when the raw diffs are empty' do - before do - MergeRequestDiffFile.where(merge_request_diff_id: diff_with_commits.id).delete_all + describe '#save_diffs' do + it 'saves collected state' do + mr_diff = create(:merge_request).merge_request_diff + + expect(mr_diff.collected?).to be_truthy end - it 'returns an empty DiffCollection' do - expect(diff_with_commits.raw_diffs).to be_a(Gitlab::Git::DiffCollection) - expect(diff_with_commits.raw_diffs).to be_empty - end - end + it 'saves overflow state' do + allow(Commit).to receive(:max_diff_options) + .and_return(max_lines: 0, max_files: 0) - context 'when the raw diffs exist' do - it 'returns the diffs' do - expect(diff_with_commits.raw_diffs).to be_a(Gitlab::Git::DiffCollection) - expect(diff_with_commits.raw_diffs).not_to be_empty + mr_diff = create(:merge_request).merge_request_diff + + expect(mr_diff.overflow?).to be_truthy end - context 'when the :paths option is set' do - let(:diffs) { diff_with_commits.raw_diffs(paths: ['files/ruby/popen.rb', 'files/ruby/popen.rb']) } + it 'saves empty state' do + allow_any_instance_of(described_class).to receive_message_chain(:compare, :commits) + .and_return([]) - it 'only returns diffs that match the (old path, new path) given' do - expect(diffs.map(&:new_path)).to contain_exactly('files/ruby/popen.rb') - end + mr_diff = create(:merge_request).merge_request_diff - it 'only serializes diff files found by query' do - expect(diff_with_commits.merge_request_diff_files.count).to be > 10 - expect_any_instance_of(MergeRequestDiffFile).to receive(:to_hash).once + expect(mr_diff.empty?).to be_truthy + end - diffs - end + it 'expands collapsed diffs before saving' do + mr_diff = create(:merge_request, source_branch: 'expand-collapse-lines', target_branch: 'master').merge_request_diff + diff_file = mr_diff.merge_request_diff_files.find_by(new_path: 'expand-collapse/file-5.txt') - it 'uses the diffs from the DB' do - expect(diff_with_commits).to receive(:load_diffs) + expect(diff_file.diff).not_to be_empty + end - diffs - end + it 'saves binary diffs correctly' do + path = 'files/images/icn-time-tracking.pdf' + mr_diff = create(:merge_request, source_branch: 'add-pdf-text-binary', target_branch: 'master').merge_request_diff + diff_file = mr_diff.merge_request_diff_files.find_by(new_path: path) + + expect(diff_file).to be_binary + expect(diff_file.diff).to eq(mr_diff.compare.diffs(paths: [path]).to_a.first.diff) end end end - describe '#save_diffs' do - it 'saves collected state' do - mr_diff = create(:merge_request).merge_request_diff + describe 'internal diffs configured' do + include_examples 'merge request diffs' + end - expect(mr_diff.collected?).to be_truthy + describe 'external diffs configured' do + before do + stub_external_diffs_setting(enabled: true) end - it 'saves overflow state' do - allow(Commit).to receive(:max_diff_options) - .and_return(max_lines: 0, max_files: 0) - - mr_diff = create(:merge_request).merge_request_diff - - expect(mr_diff.overflow?).to be_truthy - end - - it 'saves empty state' do - allow_any_instance_of(described_class).to receive_message_chain(:compare, :commits) - .and_return([]) - - mr_diff = create(:merge_request).merge_request_diff - - expect(mr_diff.empty?).to be_truthy - end - - it 'expands collapsed diffs before saving' do - mr_diff = create(:merge_request, source_branch: 'expand-collapse-lines', target_branch: 'master').merge_request_diff - diff_file = mr_diff.merge_request_diff_files.find_by(new_path: 'expand-collapse/file-5.txt') - - expect(diff_file.diff).not_to be_empty - end - - it 'saves binary diffs correctly' do - path = 'files/images/icn-time-tracking.pdf' - mr_diff = create(:merge_request, source_branch: 'add-pdf-text-binary', target_branch: 'master').merge_request_diff - diff_file = mr_diff.merge_request_diff_files.find_by(new_path: path) - - expect(diff_file).to be_binary - expect(diff_file.diff).to eq(mr_diff.compare.diffs(paths: [path]).to_a.first.diff) - end + include_examples 'merge request diffs' end describe '#commit_shas' do @@ -245,4 +257,55 @@ describe MergeRequestDiff do expect(subject.modified_paths).to eq(%w{foo bar baz}) end end + + describe '#opening_external_diff' do + subject(:diff) { diff_with_commits } + + context 'external diffs disabled' do + it { expect(diff.external_diff).not_to be_exists } + + it 'yields nil' do + expect { |b| diff.opening_external_diff(&b) }.to yield_with_args(nil) + end + end + + context 'external diffs enabled' do + let(:test_dir) { 'tmp/tests/external-diffs' } + + around do |example| + FileUtils.mkdir_p(test_dir) + + begin + example.run + ensure + FileUtils.rm_rf(test_dir) + end + end + + before do + stub_external_diffs_setting(enabled: true, storage_path: test_dir) + end + + it { expect(diff.external_diff).to be_exists } + + it 'yields an open file' do + expect { |b| diff.opening_external_diff(&b) }.to yield_with_args(File) + end + + it 'is re-entrant' do + outer_file_a = + diff.opening_external_diff do |outer_file| + diff.opening_external_diff do |inner_file| + expect(outer_file).to eq(inner_file) + end + + outer_file + end + + diff.opening_external_diff do |outer_file_b| + expect(outer_file_a).not_to eq(outer_file_b) + end + end + end + end end diff --git a/spec/support/helpers/stub_configuration.rb b/spec/support/helpers/stub_configuration.rb index 2851cd9733c..ff21bbe28ca 100644 --- a/spec/support/helpers/stub_configuration.rb +++ b/spec/support/helpers/stub_configuration.rb @@ -56,6 +56,10 @@ module StubConfiguration allow(Gitlab.config.lfs).to receive_messages(to_settings(messages)) end + def stub_external_diffs_setting(messages) + allow(Gitlab.config.external_diffs).to receive_messages(to_settings(messages)) + end + def stub_artifacts_setting(messages) allow(Gitlab.config.artifacts).to receive_messages(to_settings(messages)) end diff --git a/spec/support/helpers/stub_object_storage.rb b/spec/support/helpers/stub_object_storage.rb index 58b5c6a6435..e0c50e533a6 100644 --- a/spec/support/helpers/stub_object_storage.rb +++ b/spec/support/helpers/stub_object_storage.rb @@ -42,6 +42,13 @@ module StubObjectStorage **params) end + def stub_external_diffs_object_storage(uploader = described_class, **params) + stub_object_storage_uploader(config: Gitlab.config.external_diffs.object_store, + uploader: uploader, + remote_directory: 'external_diffs', + **params) + end + def stub_lfs_object_storage(**params) stub_object_storage_uploader(config: Gitlab.config.lfs.object_store, uploader: LfsObjectUploader, diff --git a/spec/uploaders/external_diff_uploader_spec.rb b/spec/uploaders/external_diff_uploader_spec.rb new file mode 100644 index 00000000000..1c959770dc4 --- /dev/null +++ b/spec/uploaders/external_diff_uploader_spec.rb @@ -0,0 +1,67 @@ +require 'spec_helper' + +describe ExternalDiffUploader do + let(:diff) { create(:merge_request).merge_request_diff } + let(:path) { Gitlab.config.external_diffs.storage_path } + + subject(:uploader) { described_class.new(diff, :external_diff) } + + it_behaves_like "builds correct paths", + store_dir: %r[merge_request_diffs/mr-\d+], + cache_dir: %r[/external-diffs/tmp/cache], + work_dir: %r[/external-diffs/tmp/work] + + context "object store is REMOTE" do + before do + stub_external_diffs_object_storage + end + + include_context 'with storage', described_class::Store::REMOTE + + it_behaves_like "builds correct paths", + store_dir: %r[merge_request_diffs/mr-\d+] + end + + describe 'migration to object storage' do + context 'with object storage disabled' do + it "is skipped" do + expect(ObjectStorage::BackgroundMoveWorker).not_to receive(:perform_async) + + diff + end + end + + context 'with object storage enabled' do + before do + stub_external_diffs_setting(enabled: true) + stub_external_diffs_object_storage(background_upload: true) + end + + it 'is scheduled to run after creation' do + expect(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async).with(described_class.name, 'MergeRequestDiff', :external_diff, kind_of(Numeric)) + + diff + end + end + end + + describe 'remote file' do + context 'with object storage enabled' do + before do + stub_external_diffs_setting(enabled: true) + stub_external_diffs_object_storage + + diff.update!(external_diff_store: described_class::Store::REMOTE) + end + + it 'can store file remotely' do + allow(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async) + + diff + + expect(diff.external_diff_store).to eq(described_class::Store::REMOTE) + expect(diff.external_diff.path).not_to be_blank + end + end + end +end