Add repository languages for projects

Our friends at GitHub show the programming languages for a long time,
and inspired by that this commit means to create about the same
functionality.

Language detection is done through Linguist, as before, where the
difference is that we cache the result in the database. Also, Gitaly can
incrementaly scan a repository. This is done through a shell out, which
creates overhead of about 3s each run. For now this won't be improved.

Scans are triggered by pushed to the default branch, usually `master`.
However, one exception to this rule the charts page. If we're requesting
this expensive data anyway, we just cache it in the database.

Edge cases where there is no repository, or its empty are caught in the
Repository model. This makes use of Redis caching, which is probably
already loaded.

The added model is called RepositoryLanguage, which will make it harder
if/when GitLab supports multiple repositories per project. However, for
now I think this shouldn't be a concern. Also, Language could be
confused with the i18n languages and felt like the current name was
suiteable too.

Design of the Project#Show page is done with help from @dimitrieh. This
change is not visible to the end user unless detections are done.
This commit is contained in:
Zeger-Jan van de Weg 2018-06-06 13:10:59 +02:00
parent f175014067
commit 79a5d76801
No known key found for this signature in database
GPG key ID: 65F6A8D64A88ABAC
29 changed files with 478 additions and 4 deletions

View file

@ -754,6 +754,11 @@
}
}
.repository-languages-bar {
height: 6px;
margin-bottom: 8px;
}
pre.light-well {
border-color: $well-light-border;
}

View file

@ -0,0 +1,16 @@
module RepositoryLanguagesHelper
def repository_languages_bar(languages)
return if languages.none?
content_tag :div, class: 'progress repository-languages-bar' do
safe_join(languages.map { |lang| language_progress(lang) })
end
end
def language_progress(lang)
content_tag :div, nil,
class: "progress-bar has-tooltip",
style: "width: #{lang.share}%; background-color:#{lang.color}",
title: lang.name
end
end

View file

@ -122,6 +122,7 @@ class Namespace < ActiveRecord::Base
def to_param
full_path
end
alias_method :flipper_id, :to_param
def human_name
owner_name

View file

@ -0,0 +1,4 @@
class ProgrammingLanguage < ActiveRecord::Base
validates :name, presence: true
validates :color, allow_blank: false, color: true
end

View file

@ -192,6 +192,7 @@ class Project < ActiveRecord::Base
has_many :hooks, class_name: 'ProjectHook'
has_many :protected_branches
has_many :protected_tags
has_many :repository_languages, -> { order "share DESC" }
has_many :project_authorizations
has_many :authorized_users, through: :project_authorizations, source: :user, class_name: 'User'

View file

@ -235,6 +235,12 @@ class Repository
false
end
def languages
return [] if empty?
raw_repository.languages(root_ref)
end
# Makes sure a commit is kept around when Git garbage collection runs.
# Git GC will delete commits from the repository that are no longer in any
# branches or tags, but we want to keep some of these commits around, for
@ -432,6 +438,8 @@ class Repository
# Runs code after a repository has been forked/imported.
def after_import
expire_content_cache
DetectRepositoryLanguagesWorker.perform_async(project.id, project.owner.id)
end
# Runs code after a new commit has been pushed.

View file

@ -0,0 +1,12 @@
class RepositoryLanguage < ActiveRecord::Base
belongs_to :project
belongs_to :programming_language
default_scope { includes(:programming_language) }
validates :project, presence: true
validates :share, inclusion: { in: 0..100, message: "The share of a lanuage is between 0 and 100" }
validates :programming_language, uniqueness: { scope: :project_id }
delegate :name, :color, to: :programming_language
end

View file

@ -85,6 +85,8 @@ class GitPushService < BaseService
types = Gitlab::FileDetector.types_in_paths(paths.to_a)
end
DetectRepositoryLanguagesWorker.perform_async(@project.id, current_user.id)
else
types = []
end

View file

@ -0,0 +1,53 @@
module Projects
class DetectRepositoryLanguagesService < BaseService
attr_reader :detected_repository_languages, :programming_languages
def execute
repository_languages = project.repository_languages
detection = Gitlab::LanguageDetection.new(repository, repository_languages)
matching_programming_languages = ensure_programming_languages(detection)
RepositoryLanguage.transaction do
project.repository_languages.where(programming_language_id: detection.deletions).delete_all
detection.updates.each do |update|
RepositoryLanguage
.arel_table.update_manager
.where(project_id: project.id)
.where(programming_language_id: update[:programming_language_id])
.set(share: update[:share])
end
Gitlab::Database.bulk_insert(
RepositoryLanguage.table_name,
detection.insertions(matching_programming_languages)
)
end
project.repository_languages.reload
end
private
def ensure_programming_languages(detection)
existing_languages = ProgrammingLanguage.where(name: detection.languages)
return existing_languages if detection.languages.size == existing_languages.size
missing_languages = detection.languages - existing_languages.map(&:name)
created_languages = missing_languages.map do |name|
create_language(name, detection.language_color(name))
end
existing_languages + created_languages
end
def create_language(name, color)
ProgrammingLanguage.transaction do
ProgrammingLanguage.where(name: name).first_or_create(color: color)
end
rescue ActiveRecord::RecordNotUnique
retry
end
end
end

View file

@ -18,10 +18,11 @@
= render "home_panel"
- if can?(current_user, :download_code, @project)
%nav.project-stats{ class: container_class }
%nav.project-stats{ class: [container_class, ("limit-container-width" unless fluid_layout)] }
= render 'stat_anchor_list', anchors: @project.statistics_anchors(show_auto_devops_callout: show_auto_devops_callout)
= render 'stat_anchor_list', anchors: @project.statistics_buttons(show_auto_devops_callout: show_auto_devops_callout)
- if Feature.enabled?(:repository_languages, @project.namespace.becomes(Namespace))
= repository_languages_bar(@project.repository_languages)
%div{ class: [container_class, ("limit-container-width" unless fluid_layout)] }
- if @project.archived?

View file

@ -123,3 +123,4 @@
- repository_update_remote_mirror
- create_note_diff_file
- delete_diff_files
- detect_repository_languages

View file

@ -0,0 +1,33 @@
class DetectRepositoryLanguagesWorker
include ApplicationWorker
include ExceptionBacktrace
include ExclusiveLeaseGuard
sidekiq_options retry: 1
LEASE_TIMEOUT = 300
attr_reader :project
def perform(project_id, user_id)
@project = Project.find_by(id: project_id)
user = User.find_by(id: user_id)
return unless project && user
return if Feature.disabled?(:repository_languages, project.namespace)
try_obtain_lease do
::Projects::DetectRepositoryLanguagesService.new(project, user).execute
end
end
private
def lease_timeout
LEASE_TIMEOUT
end
def lease_key
"gitlab:detect_repository_languages:#{project.id}"
end
end

View file

@ -0,0 +1,5 @@
---
title: Show repository languages for projects
merge_request: 19480
author:
type: added

View file

@ -77,3 +77,4 @@
- [repository_remove_remote, 1]
- [create_note_diff_file, 1]
- [delete_diff_files, 1]
- [detect_repository_languages, 1]

View file

@ -0,0 +1,28 @@
class AddRepositoryLanguages < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
create_table(:programming_languages) do |t|
t.string :name, null: false
t.string :color, null: false
t.datetime_with_timezone :created_at, null: false
end
create_table(:repository_languages, id: false) do |t|
t.references :project, null: false, foreign_key: { on_delete: :cascade }
t.references :programming_language, null: false
t.float :share, null: false
end
add_index :programming_languages, :name, unique: true
add_index :repository_languages, [:project_id, :programming_language_id],
unique: true, name: "index_repository_languages_on_project_and_languages_id"
end
def down
drop_table :repository_languages
drop_table :programming_languages
end
end

View file

@ -1502,6 +1502,14 @@ ActiveRecord::Schema.define(version: 20180726172057) do
add_index "personal_access_tokens", ["token"], name: "index_personal_access_tokens_on_token", unique: true, using: :btree
add_index "personal_access_tokens", ["user_id"], name: "index_personal_access_tokens_on_user_id", using: :btree
create_table "programming_languages", force: :cascade do |t|
t.string "name", null: false
t.string "color", null: false
t.datetime_with_timezone "created_at", null: false
end
add_index "programming_languages", ["name"], name: "index_programming_languages_on_name", unique: true, using: :btree
create_table "project_authorizations", id: false, force: :cascade do |t|
t.integer "user_id", null: false
t.integer "project_id", null: false
@ -1788,6 +1796,14 @@ ActiveRecord::Schema.define(version: 20180726172057) do
add_index "remote_mirrors", ["last_successful_update_at"], name: "index_remote_mirrors_on_last_successful_update_at", using: :btree
add_index "remote_mirrors", ["project_id"], name: "index_remote_mirrors_on_project_id", using: :btree
create_table "repository_languages", id: false, force: :cascade do |t|
t.integer "project_id", null: false
t.integer "programming_language_id", null: false
t.float "share", null: false
end
add_index "repository_languages", ["project_id", "programming_language_id"], name: "index_repository_languages_on_project_and_languages_id", unique: true, using: :btree
create_table "resource_label_events", id: :bigserial, force: :cascade do |t|
t.integer "action", null: false
t.integer "issue_id"
@ -2359,6 +2375,7 @@ ActiveRecord::Schema.define(version: 20180726172057) do
add_foreign_key "push_event_payloads", "events", name: "fk_36c74129da", on_delete: :cascade
add_foreign_key "releases", "projects", name: "fk_47fe2a0596", on_delete: :cascade
add_foreign_key "remote_mirrors", "projects", on_delete: :cascade
add_foreign_key "repository_languages", "projects", on_delete: :cascade
add_foreign_key "resource_label_events", "issues", on_delete: :cascade
add_foreign_key "resource_label_events", "labels", on_delete: :nullify
add_foreign_key "resource_label_events", "merge_requests", on_delete: :cascade

View file

@ -46,6 +46,10 @@ class Feature
get(key).enabled?(thing)
end
def disabled?(key, thing = nil)
!enabled?(key, thing)
end
def enable(key, thing = true)
get(key).enable(thing)
end

View file

@ -107,6 +107,7 @@ excluded_attributes:
- :storage_version
- :remote_mirror_available_overridden
- :description_html
- :repository_languages
snippets:
- :expired_at
merge_request_diff:

View file

@ -0,0 +1,68 @@
module Gitlab
class LanguageDetection
MAX_LANGUAGES = 5
def initialize(repository, repository_languages)
@repository = repository
@repository_languages = repository_languages
end
def languages
detection.keys
end
def language_color(name)
detection.dig(name, :color)
end
# Newly detected languages, returned in a structure accepted by
# Gitlab::Database.bulk_insert
def insertions(programming_languages)
lang_to_id = programming_languages.map { |p| [p.name, p.id] }.to_h
(languages - previous_language_names).map do |new_lang|
{
project_id: @repository.project.id,
share: detection[new_lang][:value],
programming_language_id: lang_to_id[new_lang]
}
end
end
# updates analyses which records only require updating of their share
def updates
to_update = @repository_languages.select do |lang|
detection.key?(lang.name) && detection[lang.name][:value] != lang.share
end
to_update.map do |lang|
{ programming_language_id: lang.programming_language_id, share: detection[lang.name][:value] }
end
end
# Returns the ids of the programming languages that do not occur in the detection
# as current repository languages
def deletions
@repository_languages.map do |repo_lang|
next if detection.key?(repo_lang.name)
repo_lang.programming_language_id
end.compact
end
private
def previous_language_names
@previous_language_names ||= @repository_languages.map(&:name)
end
def detection
@detection ||=
@repository
.languages
.first(MAX_LANGUAGES)
.map { |l| [l[:label], l] }
.to_h
end
end
end

View file

@ -0,0 +1,6 @@
FactoryBot.define do
factory :programming_language do
name 'Ruby'
color '#123456'
end
end

View file

@ -0,0 +1,7 @@
FactoryBot.define do
factory :repository_language do
project
programming_language
share 98.5
end
end

View file

@ -297,6 +297,7 @@ project:
- settings
- ci_cd_settings
- import_export_upload
- repository_languages
award_emoji:
- awardable
- user

View file

@ -0,0 +1,85 @@
require 'spec_helper'
describe Gitlab::LanguageDetection do
set(:project) { create(:project, :repository) }
set(:ruby) { create(:programming_language, name: 'Ruby') }
set(:haskell) { create(:programming_language, name: 'Haskell') }
let(:repository) { project.repository }
let(:detection) do
[{ value: 66.63, label: "Ruby", color: "#701516", highlight: "#701516" },
{ value: 12.96, label: "JavaScript", color: "#f1e05a", highlight: "#f1e05a" },
{ value: 7.9, label: "Elixir", color: "#e34c26", highlight: "#e34c26" },
{ value: 2.51, label: "CoffeeScript", color: "#244776", highlight: "#244776" },
{ value: 1.51, label: "Go", color: "#2a4776", highlight: "#244776" },
{ value: 1.1, label: "MepmepLang", color: "#2a4776", highlight: "#244776" }]
end
let(:repository_languages) do
[RepositoryLanguage.new(share: 10, programming_language: ruby)]
end
subject { described_class.new(repository, repository_languages) }
before do
allow(repository).to receive(:languages).and_return(detection)
end
describe '#languages' do
it 'returns the language names' do
expect(subject.languages).to eq(%w[Ruby JavaScript Elixir CoffeeScript Go])
end
end
describe '#insertions' do
let(:programming_languages) { [ruby, haskell] }
let(:detection) do
[{ value: 10, label: haskell.name, color: haskell.color }]
end
it 'only includes new languages' do
insertions = subject.insertions(programming_languages)
expect(insertions).not_to be_empty
expect(insertions.first[:project_id]).to be(project.id)
expect(insertions.first[:programming_language_id]).to be(haskell.id)
expect(insertions.first[:share]).to be(10)
end
end
describe '#updates' do
it 'updates the share of languages' do
first_update = subject.updates.first
expect(first_update).not_to be_nil
expect(first_update[:programming_language_id]).to eq(ruby.id)
expect(first_update[:share]).to eq(66.63)
end
it 'does not include languages to be removed' do
ids = subject.updates.map { |h| h[:programming_language_id] }
expect(ids).not_to include(haskell.id)
end
context 'when silent writes occur' do
let(:repository_languages) do
[RepositoryLanguage.new(share: 66.63, programming_language: ruby)]
end
it "doesn't include them in the result" do
expect(subject.updates).to be_empty
end
end
end
describe '#deletions' do
let(:repository_languages) do
[RepositoryLanguage.new(share: 10, programming_language: ruby),
RepositoryLanguage.new(share: 5, programming_language: haskell)]
end
it 'lists undetected languages' do
expect(subject.deletions).not_to be_empty
expect(subject.deletions).to include(haskell.id)
end
end
end

View file

@ -0,0 +1,11 @@
require 'spec_helper'
describe ProgrammingLanguage do
it { is_expected.to respond_to(:name) }
it { is_expected.to respond_to(:color) }
it { is_expected.to validate_presence_of(:name) }
it { is_expected.to allow_value("#000000").for(:color) }
it { is_expected.not_to allow_value("000000").for(:color) }
it { is_expected.not_to allow_value("#0z0000").for(:color) }
end

View file

@ -69,6 +69,7 @@ describe Project do
it { is_expected.to have_many(:pages_domains) }
it { is_expected.to have_many(:labels).class_name('ProjectLabel') }
it { is_expected.to have_many(:users_star_projects) }
it { is_expected.to have_many(:repository_languages) }
it { is_expected.to have_many(:environments) }
it { is_expected.to have_many(:deployments) }
it { is_expected.to have_many(:todos) }

View file

@ -0,0 +1,16 @@
require 'spec_helper'
describe RepositoryLanguage do
let(:repository_language) { build(:repository_language) }
describe 'associations' do
it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:programming_language) }
end
describe 'validations' do
it { is_expected.to allow_value(0).for(:share) }
it { is_expected.to allow_value(100.0).for(:share) }
it { is_expected.not_to allow_value(100.1).for(:share) }
end
end

View file

@ -3,8 +3,8 @@ require 'spec_helper'
describe GitPushService, services: true do
include RepoHelpers
let(:user) { create(:user) }
let(:project) { create(:project, :repository) }
set(:user) { create(:user) }
set(:project) { create(:project, :repository) }
let(:blankrev) { Gitlab::Git::BLANK_SHA }
let(:oldrev) { sample_commit.parent_id }
let(:newrev) { sample_commit.id }

View file

@ -0,0 +1,54 @@
require 'spec_helper'
describe Projects::DetectRepositoryLanguagesService, :clean_gitlab_redis_shared_state do
set(:project) { create(:project, :repository) }
subject { described_class.new(project, project.owner) }
before do
allow(Feature).to receive(:disabled?).and_return(false)
end
describe '#execute' do
context 'without previous detection' do
it 'inserts new programming languages in the database' do
subject.execute
expect(ProgrammingLanguage.exists?(name: 'Ruby')).to be(true)
expect(ProgrammingLanguage.count).to be(4)
end
it 'inserts the repository langauges' do
names = subject.execute.map(&:name)
expect(names).to eq(%w[Ruby JavaScript HTML CoffeeScript])
end
end
context 'with a previous detection' do
before do
subject.execute
allow(project.repository).to receive(:languages).and_return(
[{ value: 99.63, label: "Ruby", color: "#701516", highlight: "#701516" },
{ value: 0.3, label: "D", color: "#701516", highlight: "#701516" }]
)
end
it 'updates the repository languages' do
repository_languages = subject.execute.map(&:name)
expect(repository_languages).to eq(%w[Ruby D])
end
end
context 'when no repository exists' do
set(:project) { create(:project) }
it 'has no languages' do
expect(subject.execute).to be_empty
expect(project.repository_languages).to be_empty
end
end
end
end

View file

@ -0,0 +1,32 @@
require 'spec_helper'
describe DetectRepositoryLanguagesWorker do
set(:project) { create(:project) }
let(:user) { project.owner }
subject { described_class.new }
describe '#perform' do
it 'calls de DetectRepositoryLanguages service' do
service = double
allow(::Projects::DetectRepositoryLanguagesService).to receive(:new).and_return(service)
expect(service).to receive(:execute)
subject.perform(project.id, user.id)
end
context 'when invalid ids are used' do
it 'does not raise when the project could not be found' do
expect do
subject.perform(-1, user.id)
end.not_to raise_error
end
it 'does not raise when the user could not be found' do
expect do
subject.perform(project.id, -1)
end.not_to raise_error
end
end
end
end