Merge branch 'zj-remove-linguist' into 'master'

Remove dependencies on Linguist

Closes #35450

See merge request gitlab-org/gitlab-ce!21008
This commit is contained in:
Robert Speicher 2018-10-15 15:27:15 +00:00
commit f5d088eb11
23 changed files with 5846 additions and 90 deletions

View file

@ -83,9 +83,6 @@ gem 'net-ldap'
# Only used to compute wiki page slugs
gem 'gitlab-gollum-lib', '~> 4.2', require: false
# Language detection
gem 'github-linguist', '~> 5.3.3', require: 'linguist'
# API
gem 'grape', '~> 1.1'
gem 'grape-entity', '~> 0.7.1'
@ -146,6 +143,7 @@ gem 'rouge', '~> 3.1'
gem 'truncato', '~> 0.7.9'
gem 'bootstrap_form', '~> 2.7.0'
gem 'nokogiri', '~> 1.8.2'
gem 'escape_utils', '~> 1.1'
# Calendar rendering
gem 'icalendar'

View file

@ -277,11 +277,6 @@ GEM
gitaly-proto (0.118.1)
google-protobuf (~> 3.1)
grpc (~> 1.10)
github-linguist (5.3.3)
charlock_holmes (~> 0.7.5)
escape_utils (~> 1.1.0)
mime-types (>= 1.19)
rugged (>= 0.25.1)
github-markup (1.7.0)
gitlab-flowdock-git-hook (1.0.1)
flowdock (~> 0.7)
@ -1006,6 +1001,7 @@ DEPENDENCIES
ed25519 (~> 1.2)
email_reply_trimmer (~> 0.1)
email_spec (~> 2.2.0)
escape_utils (~> 1.1)
factory_bot_rails (~> 4.8.2)
faraday (~> 0.12)
fast_blank
@ -1028,7 +1024,6 @@ DEPENDENCIES
gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3)
gitaly-proto (~> 0.118.1)
github-linguist (~> 5.3.3)
github-markup (~> 1.7.0)
gitlab-flowdock-git-hook (~> 1.0.1)
gitlab-gollum-lib (~> 4.2)
@ -1187,4 +1182,4 @@ DEPENDENCIES
wikicloth (= 0.8.1)
BUNDLED WITH
1.16.4
1.16.6

View file

@ -280,11 +280,6 @@ GEM
gitaly-proto (0.118.1)
google-protobuf (~> 3.1)
grpc (~> 1.10)
github-linguist (5.3.3)
charlock_holmes (~> 0.7.5)
escape_utils (~> 1.1.0)
mime-types (>= 1.19)
rugged (>= 0.25.1)
github-markup (1.7.0)
gitlab-flowdock-git-hook (1.0.1)
flowdock (~> 0.7)
@ -1015,6 +1010,7 @@ DEPENDENCIES
ed25519 (~> 1.2)
email_reply_trimmer (~> 0.1)
email_spec (~> 2.2.0)
escape_utils (~> 1.1)
factory_bot_rails (~> 4.8.2)
faraday (~> 0.12)
fast_blank
@ -1037,7 +1033,6 @@ DEPENDENCIES
gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3)
gitaly-proto (~> 0.118.1)
github-linguist (~> 5.3.3)
github-markup (~> 1.7.0)
gitlab-flowdock-git-hook (~> 1.0.1)
gitlab-gollum-lib (~> 4.2)
@ -1196,4 +1191,4 @@ DEPENDENCIES
wikicloth (= 0.8.1)
BUNDLED WITH
1.16.4
1.16.6

View file

@ -13,11 +13,11 @@ export default () => {
if (editBlobForm.length) {
const urlRoot = editBlobForm.data('relativeUrlRoot');
const assetsPath = editBlobForm.data('assetsPrefix');
const blobLanguage = editBlobForm.data('blobLanguage');
const filePath = editBlobForm.data('blobFilename')
const currentAction = $('.js-file-title').data('currentAction');
const projectId = editBlobForm.data('project-id');
new EditBlob(`${urlRoot}${assetsPath}`, blobLanguage, currentAction, projectId);
new EditBlob(`${urlRoot}${assetsPath}`, filePath, currentAction, projectId);
new NewCommitForm(editBlobForm);
}

View file

@ -5,6 +5,7 @@ import axios from '~/lib/utils/axios_utils';
import createFlash from '~/flash';
import { __ } from '~/locale';
import TemplateSelectorMediator from '../blob/file_template_mediator';
import getModeByFileExtension from '~/lib/utils/ace_utils';
export default class EditBlob {
constructor(assetsPath, aceMode, currentAction, projectId) {
@ -14,9 +15,10 @@ export default class EditBlob {
this.initFileSelectors(currentAction, projectId);
}
configureAceEditor(aceMode, assetsPath) {
configureAceEditor(filePath, assetsPath) {
ace.config.set('modePath', `${assetsPath}/ace`);
ace.config.loadModule('ace/ext/searchbox');
ace.config.loadModule('ace/ext/modelist');
this.editor = ace.edit('editor');
@ -25,8 +27,8 @@ export default class EditBlob {
this.editor.focus();
if (aceMode) {
this.editor.getSession().setMode(`ace/mode/${aceMode}`);
if (filePath) {
this.editor.getSession().setMode(getModeByFileExtension(filePath));
}
}

View file

@ -1,3 +1,4 @@
/*= require ace/ace */
/*= require ace/ext-modelist */
/*= require ace/ext-searchbox */
/*= require ./ace/ace_config_paths */

View file

@ -0,0 +1,6 @@
/* global ace */
export default function getModeByFileExtension(path) {
const modelist = ace.require("ace/ext/modelist");
return modelist.getModeForPath(path).mode;
};

View file

@ -5,6 +5,7 @@ import Vue from 'vue';
import axios from '~/lib/utils/axios_utils';
import flash from '~/flash';
import { __ } from '~/locale';
import getModeByFileExtension from '~/lib/utils/ace_utils';
(global => {
global.mergeConflicts = global.mergeConflicts || {};
@ -72,7 +73,7 @@ import { __ } from '~/locale';
this.fileLoaded = true;
this.editor = ace.edit(content);
this.editor.$blockScrolling = Infinity; // Turn off annoying warning
this.editor.getSession().setMode(`ace/mode/${data.blob_ace_mode}`);
this.editor.getSession().setMode(getModeByFileExtension(data.new_path));
this.editor.on('change', () => {
this.saveDiffResolution();
});

View file

@ -195,7 +195,7 @@ module BlobHelper
{
'relative-url-root' => Rails.application.config.relative_url_root,
'assets-prefix' => Gitlab::Application.config.assets.prefix,
'blob-language' => @blob && @blob.language.try(:ace_mode),
'blob-filename' => @blob && @blob.path,
'project-id' => project.id
}
end

View file

@ -162,7 +162,7 @@ class Blob < SimpleDelegator
if stored_externally?
if rich_viewer
rich_viewer.binary?
elsif Linguist::Language.find_by_extension(name).any?
elsif known_extension?
false
elsif _mime_type
_mime_type.binary?

View file

@ -2,7 +2,7 @@
module BlobLike
extend ActiveSupport::Concern
include Linguist::BlobHelper
include Gitlab::BlobHelper
def id
raise NotImplementedError

View file

@ -0,0 +1,5 @@
---
title: Remove Linguist gem, reducing Rails memory usage by 128MB per process
merge_request: 21008
author:
type: changed

145
lib/gitlab/blob_helper.rb Normal file
View file

@ -0,0 +1,145 @@
# This has been extracted from https://github.com/github/linguist/blob/master/lib/linguist/blob_helper.rb
module Gitlab
module BlobHelper
def extname
File.extname(name.to_s)
end
def known_extension?
LanguageData.extensions.include?(extname)
end
def viewable?
!large? && text?
end
MEGABYTE = 1024 * 1024
def large?
size.to_i > MEGABYTE
end
def binary?
# Large blobs aren't even loaded into memory
if data.nil?
true
# Treat blank files as text
elsif data == ""
false
# Charlock doesn't know what to think
elsif encoding.nil?
true
# If Charlock says its binary
else
detect_encoding[:type] == :binary
end
end
def text?
!binary?
end
def image?
['.png', '.jpg', '.jpeg', '.gif'].include?(extname.downcase)
end
# Internal: Lookup mime type for extension.
#
# Returns a MIME::Type
# rubocop:disable Gitlab/ModuleWithInstanceVariables
def _mime_type
if defined? @_mime_type
@_mime_type
else
guesses = ::MIME::Types.type_for(extname.to_s)
# Prefer text mime types over binary
@_mime_type = guesses.detect { |type| type.ascii? } || guesses.first
end
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
# Public: Get the actual blob mime type
#
# Examples
#
# # => 'text/plain'
# # => 'text/html'
#
# Returns a mime type String.
def mime_type
_mime_type ? _mime_type.to_s : 'text/plain'
end
def binary_mime_type?
_mime_type ? _mime_type.binary? : false
end
def lines
@lines ||=
if viewable? && data
# `data` is usually encoded as ASCII-8BIT even when the content has
# been detected as a different encoding. However, we are not allowed
# to change the encoding of `data` because we've made the implicit
# guarantee that each entry in `lines` is encoded the same way as
# `data`.
#
# Instead, we re-encode each possible newline sequence as the
# detected encoding, then force them back to the encoding of `data`
# (usually a binary encoding like ASCII-8BIT). This means that the
# byte sequence will match how newlines are likely encoded in the
# file, but we don't have to change the encoding of `data` as far as
# Ruby is concerned. This allows us to correctly parse out each line
# without changing the encoding of `data`, and
# also--importantly--without having to duplicate many (potentially
# large) strings.
begin
data.split(encoded_newlines_re, -1)
rescue Encoding::ConverterNotFoundError
# The data is not splittable in the detected encoding. Assume it's
# one big line.
[data]
end
else
[]
end
end
def content_type
# rubocop:disable Style/MultilineTernaryOperator
# rubocop:disable Style/NestedTernaryOperator
@content_type ||= binary_mime_type? || binary? ? mime_type :
(encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
# rubocop:enable Style/NestedTernaryOperator
# rubocop:enable Style/MultilineTernaryOperator
end
def encoded_newlines_re
@encoded_newlines_re ||=
Regexp.union(["\r\n", "\r", "\n"].map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) })
end
def ruby_encoding
if hash = detect_encoding
hash[:ruby_encoding]
end
end
def encoding
if hash = detect_encoding
hash[:encoding]
end
end
def detect_encoding
@detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def empty?
data.nil? || data == ""
end
end
end

View file

@ -158,7 +158,6 @@ module Gitlab
json_hash.tap do |json_hash|
if opts[:full_content]
json_hash[:content] = content
json_hash[:blob_ace_mode] = our_blob && our_blob.language.try(:ace_mode)
else
json_hash[:sections] = sections if type.text?
json_hash[:type] = type

View file

@ -3,13 +3,13 @@
module Gitlab
module Git
class Blob
include Linguist::BlobHelper
include Gitlab::BlobHelper
include Gitlab::EncodingHelper
# This number is the maximum amount of data that we want to display to
# the user. We load as much as we can for encoding detection
# (Linguist) and LFS pointer parsing. All other cases where we need full
# blob data should use load_all_data!.
# the user. We load as much as we can for encoding detection and LFS
# pointer parsing. All other cases where we need full blob data should
# use load_all_data!.
MAX_DATA_DISPLAY_SIZE = 10.megabytes
# These limits are used as a heuristic to ignore files which can't be LFS

View file

@ -1,34 +0,0 @@
# Gitaly note: JV: no RPC's here.
module Gitlab
module Git
class BlobSnippet
include Linguist::BlobHelper
attr_accessor :ref
attr_accessor :lines
attr_accessor :filename
attr_accessor :startline
def initialize(ref, lines, startline, filename)
@ref, @lines, @startline, @filename = ref, lines, startline, filename
end
def data
lines&.join("\n")
end
def name
filename
end
def size
data.length
end
def mode
nil
end
end
end
end

View file

@ -0,0 +1,33 @@
# frozen_string_literal: true
module Gitlab
module LanguageData
EXTENSION_MUTEX = Mutex.new
class << self
include Gitlab::Utils::StrongMemoize
def extensions
EXTENSION_MUTEX.synchronize do
strong_memoize(:extensions) do
Set.new.tap do |set|
YAML.load_file(Rails.root.join('vendor', 'languages.yml')).each do |_name, details|
details['extensions']&.each do |ext|
next unless ext.start_with?('.')
set << ext.downcase
end
end
end
end
end
end
def clear_extensions!
EXTENSION_MUTEX.synchronize do
clear_memoization(:extensions)
end
end
end
end
end

View file

@ -150,7 +150,6 @@ describe Projects::MergeRequests::ConflictsController do
'new_path' => path,
'blob_icon' => 'file-text-o',
'blob_path' => a_string_ending_with(path),
'blob_ace_mode' => 'ruby',
'content' => content)
end
end

View file

@ -0,0 +1,125 @@
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BlobHelper do
include FakeBlobHelpers
let(:project) { create(:project) }
let(:blob) { fake_blob(path: 'file.txt') }
let(:large_blob) { fake_blob(path: 'test.pdf', size: 2.megabytes, binary: true) }
describe '#extname' do
it 'returns the extension' do
expect(blob.extname).to eq('.txt')
end
end
describe '#known_extension?' do
it 'returns true' do
expect(blob.known_extension?).to be_truthy
end
end
describe '#viewable' do
it 'returns true' do
expect(blob.viewable?).to be_truthy
end
it 'returns false' do
expect(large_blob.viewable?).to be_falsey
end
end
describe '#large?' do
it 'returns false' do
expect(blob.large?).to be_falsey
end
it 'returns true' do
expect(large_blob.large?).to be_truthy
end
end
describe '#binary?' do
it 'returns true' do
expect(large_blob.binary?).to be_truthy
end
it 'returns false' do
expect(blob.binary?).to be_falsey
end
end
describe '#text?' do
it 'returns true' do
expect(blob.text?).to be_truthy
end
it 'returns false' do
expect(large_blob.text?).to be_falsey
end
end
describe '#image?' do
it 'returns false' do
expect(blob.image?).to be_falsey
end
end
describe '#mime_type' do
it 'returns text/plain' do
expect(blob.mime_type).to eq('text/plain')
end
it 'returns application/pdf' do
expect(large_blob.mime_type).to eq('application/pdf')
end
end
describe '#binary_mime_type?' do
it 'returns false' do
expect(blob.binary_mime_type?).to be_falsey
end
end
describe '#lines' do
it 'returns the payload in an Array' do
expect(blob.lines).to eq(['foo'])
end
end
describe '#content_type' do
it 'returns text/plain' do
expect(blob.content_type).to eq('text/plain; charset=utf-8')
end
it 'returns text/plain' do
expect(large_blob.content_type).to eq('application/pdf')
end
end
describe '#encoded_newlines_re' do
it 'returns a regular expression' do
expect(blob.encoded_newlines_re).to eq(/\r\n|\r|\n/)
end
end
describe '#ruby_encoding' do
it 'returns UTF-8' do
expect(blob.ruby_encoding).to eq('UTF-8')
end
end
describe '#encoding' do
it 'returns UTF-8' do
expect(blob.ruby_encoding).to eq('UTF-8')
end
end
describe '#empty?' do
it 'returns false' do
expect(blob.empty?).to be_falsey
end
end
end

View file

@ -267,11 +267,6 @@ FILE
it 'includes the full content of the conflict' do
expect(conflict_file.as_json(full_content: true)).to have_key(:content)
end
it 'includes the detected language of the conflict file' do
expect(conflict_file.as_json(full_content: true)[:blob_ace_mode])
.to eq('ruby')
end
end
end
end

View file

@ -1,19 +0,0 @@
# encoding: UTF-8
require "spec_helper"
describe Gitlab::Git::BlobSnippet, :seed_helper do
describe '#data' do
context 'empty lines' do
let(:snippet) { Gitlab::Git::BlobSnippet.new('master', nil, nil, nil) }
it { expect(snippet.data).to be_nil }
end
context 'present lines' do
let(:snippet) { Gitlab::Git::BlobSnippet.new('master', %w(wow much), 1, 'wow.rb') }
it { expect(snippet.data).to eq("wow\nmuch") }
end
end
end

View file

@ -0,0 +1,22 @@
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::LanguageData do
describe '#extensions' do
before do
described_class.clear_extensions!
end
it 'loads the extensions once' do
expect(YAML).to receive(:load_file).once.and_call_original
2.times do
expect(described_class.extensions).to be_a(Set)
expect(described_class.extensions.count).to be > 0
# Sanity check for known extensions
expect(described_class.extensions).to include(*%w(.rb .yml .json))
end
end
end
end

5488
vendor/languages.yml vendored Executable file

File diff suppressed because it is too large Load diff