diff --git a/app/controllers/explore/projects_controller.rb b/app/controllers/explore/projects_controller.rb index 38e5943eb76..a62c6211372 100644 --- a/app/controllers/explore/projects_controller.rb +++ b/app/controllers/explore/projects_controller.rb @@ -21,8 +21,7 @@ class Explore::ProjectsController < Explore::ApplicationController end def trending - @projects = TrendingProjectsFinder.new.execute - @projects = filter_projects(@projects) + @projects = filter_projects(Project.trending) @projects = @projects.page(params[:page]) respond_to do |format| diff --git a/app/finders/trending_projects_finder.rb b/app/finders/trending_projects_finder.rb deleted file mode 100644 index c1e434d9926..00000000000 --- a/app/finders/trending_projects_finder.rb +++ /dev/null @@ -1,16 +0,0 @@ -# Finder for retrieving public trending projects in a given time range. -class TrendingProjectsFinder - # current_user - The currently logged in User, if any. - # last_months - The number of months to limit the trending data to. - def execute(months_limit = 1) - Rails.cache.fetch(cache_key_for(months_limit), expires_in: 1.day) do - Project.public_only.trending(months_limit.months.ago) - end - end - - private - - def cache_key_for(months) - "trending_projects/#{months}" - end -end diff --git a/app/models/project.rb b/app/models/project.rb index 88e4bd14860..74d54e69648 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -375,19 +375,9 @@ class Project < ActiveRecord::Base %r{(?#{name_pattern}/#{name_pattern})} end - def trending(since = 1.month.ago) - # By counting in the JOIN we don't expose the GROUP BY to the outer query. - # This means that calls such as "any?" and "count" just return a number of - # the total count, instead of the counts grouped per project as a Hash. - join_body = "INNER JOIN ( - SELECT project_id, COUNT(*) AS amount - FROM notes - WHERE created_at >= #{sanitize(since)} - AND system IS FALSE - GROUP BY project_id - ) join_note_counts ON projects.id = join_note_counts.project_id" - - joins(join_body).reorder('join_note_counts.amount DESC') + def trending + joins('INNER JOIN trending_projects ON projects.id = trending_projects.project_id'). + reorder('trending_projects.id ASC') end def cached_count diff --git a/app/models/trending_project.rb b/app/models/trending_project.rb new file mode 100644 index 00000000000..27e3732da17 --- /dev/null +++ b/app/models/trending_project.rb @@ -0,0 +1,35 @@ +class TrendingProject < ActiveRecord::Base + belongs_to :project + + # The number of months to include in the trending calculation. + MONTHS_TO_INCLUDE = 1 + + # The maximum number of projects to include in the trending set. + PROJECTS_LIMIT = 100 + + # Populates the trending projects table with the current list of trending + # projects. + def self.refresh! + # The calculation **must** run in a transaction. If the removal of data and + # insertion of new data were to run separately a user might end up with an + # empty list of trending projects for a short period of time. + transaction do + delete_all + + timestamp = connection.quote(MONTHS_TO_INCLUDE.months.ago) + + connection.execute <<-EOF.strip_heredoc + INSERT INTO #{table_name} (project_id) + SELECT project_id + FROM notes + INNER JOIN projects ON projects.id = notes.project_id + WHERE notes.created_at >= #{timestamp} + AND notes.system IS FALSE + AND projects.visibility_level = #{Gitlab::VisibilityLevel::PUBLIC} + GROUP BY project_id + ORDER BY count(*) DESC + LIMIT #{PROJECTS_LIMIT}; + EOF + end + end +end diff --git a/app/workers/trending_projects_worker.rb b/app/workers/trending_projects_worker.rb new file mode 100644 index 00000000000..df4c4a6628b --- /dev/null +++ b/app/workers/trending_projects_worker.rb @@ -0,0 +1,11 @@ +class TrendingProjectsWorker + include Sidekiq::Worker + + sidekiq_options queue: :trending_projects + + def perform + Rails.logger.info('Refreshing trending projects') + + TrendingProject.refresh! + end +end diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index c5ed2162c92..efe0ac9c965 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -304,6 +304,10 @@ Settings.cron_jobs['prune_old_events_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['prune_old_events_worker']['cron'] ||= '* */6 * * *' Settings.cron_jobs['prune_old_events_worker']['job_class'] = 'PruneOldEventsWorker' +Settings.cron_jobs['trending_projects_worker'] ||= Settingslogic.new({}) +Settings.cron_jobs['trending_projects_worker']['cron'] = '0 1 * * *' +Settings.cron_jobs['trending_projects_worker']['job_class'] = 'TrendingProjectsWorker' + # # GitLab Shell # diff --git a/db/migrate/20161007133303_precalculate_trending_projects.rb b/db/migrate/20161007133303_precalculate_trending_projects.rb new file mode 100644 index 00000000000..b324cd94268 --- /dev/null +++ b/db/migrate/20161007133303_precalculate_trending_projects.rb @@ -0,0 +1,38 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class PrecalculateTrendingProjects < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def up + create_table :trending_projects do |t| + t.references :project, index: true, foreign_key: { on_delete: :cascade }, null: false + end + + timestamp = connection.quote(1.month.ago) + + # We're hardcoding the visibility level (public) here so that if it ever + # changes this query doesn't suddenly use the new value (which may break + # later migrations). + visibility = 20 + + execute <<-EOF.strip_heredoc + INSERT INTO trending_projects (project_id) + SELECT project_id + FROM notes + INNER JOIN projects ON projects.id = notes.project_id + WHERE notes.created_at >= #{timestamp} + AND notes.system IS FALSE + AND projects.visibility_level = #{visibility} + GROUP BY project_id + ORDER BY count(*) DESC + LIMIT 100; + EOF + end + + def down + drop_table :trending_projects + end +end diff --git a/db/schema.rb b/db/schema.rb index 56da70b3c02..c5ddf9eee32 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20160926145521) do +ActiveRecord::Schema.define(version: 20161007133303) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -1070,6 +1070,12 @@ ActiveRecord::Schema.define(version: 20160926145521) do add_index "todos", ["target_type", "target_id"], name: "index_todos_on_target_type_and_target_id", using: :btree add_index "todos", ["user_id"], name: "index_todos_on_user_id", using: :btree + create_table "trending_projects", force: :cascade do |t| + t.integer "project_id", null: false + end + + add_index "trending_projects", ["project_id"], name: "index_trending_projects_on_project_id", using: :btree + create_table "u2f_registrations", force: :cascade do |t| t.text "certificate" t.string "key_handle" @@ -1212,5 +1218,6 @@ ActiveRecord::Schema.define(version: 20160926145521) do add_foreign_key "personal_access_tokens", "users" add_foreign_key "protected_branch_merge_access_levels", "protected_branches" add_foreign_key "protected_branch_push_access_levels", "protected_branches" + add_foreign_key "trending_projects", "projects", on_delete: :cascade add_foreign_key "u2f_registrations", "users" end diff --git a/spec/finders/trending_projects_finder_spec.rb b/spec/finders/trending_projects_finder_spec.rb deleted file mode 100644 index cfe15b9defa..00000000000 --- a/spec/finders/trending_projects_finder_spec.rb +++ /dev/null @@ -1,48 +0,0 @@ -require 'spec_helper' - -describe TrendingProjectsFinder do - let(:user) { create(:user) } - let(:public_project1) { create(:empty_project, :public) } - let(:public_project2) { create(:empty_project, :public) } - let(:private_project) { create(:empty_project, :private) } - let(:internal_project) { create(:empty_project, :internal) } - - before do - 3.times do - create(:note_on_commit, project: public_project1) - end - - 2.times do - create(:note_on_commit, project: public_project2, created_at: 5.weeks.ago) - end - - create(:note_on_commit, project: private_project) - create(:note_on_commit, project: internal_project) - end - - describe '#execute', caching: true do - context 'without an explicit time range' do - it 'returns public trending projects' do - projects = described_class.new.execute - - expect(projects).to eq([public_project1]) - end - end - - context 'with an explicit time range' do - it 'returns public trending projects' do - projects = described_class.new.execute(2) - - expect(projects).to eq([public_project1, public_project2]) - end - end - - it 'caches the list of projects' do - projects = described_class.new - - expect(Project).to receive(:trending).once - - 2.times { projects.execute } - end - end -end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 8aadfcb439b..dae546a0cdc 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -800,32 +800,14 @@ describe Project, models: true do end create(:note_on_commit, project: project2) + + TrendingProject.refresh! end - describe 'without an explicit start date' do - subject { described_class.trending.to_a } + subject { described_class.trending.to_a } - it 'sorts Projects by the amount of notes in descending order' do - expect(subject).to eq([project1, project2]) - end - end - - describe 'with an explicit start date' do - let(:date) { 2.months.ago } - - subject { described_class.trending(date).to_a } - - before do - 2.times do - # Little fix for special issue related to Fractional Seconds support for MySQL. - # See: https://github.com/rails/rails/pull/14359/files - create(:note_on_commit, project: project2, created_at: date + 1) - end - end - - it 'sorts Projects by the amount of notes in descending order' do - expect(subject).to eq([project2, project1]) - end + it 'sorts projects by the amount of notes in descending order' do + expect(subject).to eq([project1, project2]) end it 'does not take system notes into account' do diff --git a/spec/models/trending_project_spec.rb b/spec/models/trending_project_spec.rb new file mode 100644 index 00000000000..cc28c6d4004 --- /dev/null +++ b/spec/models/trending_project_spec.rb @@ -0,0 +1,56 @@ +require 'spec_helper' + +describe TrendingProject do + let(:user) { create(:user) } + let(:public_project1) { create(:empty_project, :public) } + let(:public_project2) { create(:empty_project, :public) } + let(:public_project3) { create(:empty_project, :public) } + let(:private_project) { create(:empty_project, :private) } + let(:internal_project) { create(:empty_project, :internal) } + + before do + 3.times do + create(:note_on_commit, project: public_project1) + end + + 2.times do + create(:note_on_commit, project: public_project2) + end + + create(:note_on_commit, project: public_project3, created_at: 5.weeks.ago) + create(:note_on_commit, project: private_project) + create(:note_on_commit, project: internal_project) + end + + describe '.refresh!' do + before do + described_class.refresh! + end + + it 'populates the trending projects table' do + expect(described_class.count).to eq(2) + end + + it 'removes existing rows before populating the table' do + described_class.refresh! + + expect(described_class.count).to eq(2) + end + + it 'stores the project IDs for every trending project' do + rows = described_class.order(id: :asc).all + + expect(rows[0].project_id).to eq(public_project1.id) + expect(rows[1].project_id).to eq(public_project2.id) + end + + it 'does not store projects that fall out of the trending time range' do + expect(described_class.where(project_id: public_project3).any?).to eq(false) + end + + it 'stores only public projects' do + expect(described_class.where(project_id: [public_project1.id, public_project2.id]).count).to eq(2) + expect(described_class.where(project_id: [private_project.id, internal_project.id]).count).to eq(0) + end + end +end diff --git a/spec/workers/trending_projects_worker_spec.rb b/spec/workers/trending_projects_worker_spec.rb new file mode 100644 index 00000000000..c3c6fdcf2d5 --- /dev/null +++ b/spec/workers/trending_projects_worker_spec.rb @@ -0,0 +1,11 @@ +require 'spec_helper' + +describe TrendingProjectsWorker do + describe '#perform' do + it 'refreshes the trending projects' do + expect(TrendingProject).to receive(:refresh!) + + described_class.new.perform + end + end +end