Added code for defining SHA attributes
These attributes are stored in binary in the database, but exposed as strings. This allows one to query/create data using plain SHA1 hashes as Strings, while storing them more efficiently as binary.
This commit is contained in:
parent
88c60307bd
commit
af1f6844c9
|
@ -0,0 +1,18 @@
|
|||
module ShaAttribute
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
module ClassMethods
|
||||
def sha_attribute(name)
|
||||
column = columns.find { |c| c.name == name.to_s }
|
||||
|
||||
# In case the table doesn't exist we won't be able to find the column,
|
||||
# thus we will only check the type if the column is present.
|
||||
if column && column.type != :binary
|
||||
raise ArgumentError,
|
||||
"sha_attribute #{name.inspect} is invalid since the column type is not :binary"
|
||||
end
|
||||
|
||||
attribute(name, Gitlab::Database::ShaAttribute.new)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -54,6 +54,7 @@
|
|||
- [Polymorphic Associations](polymorphic_associations.md)
|
||||
- [Single Table Inheritance](single_table_inheritance.md)
|
||||
- [Background Migrations](background_migrations.md)
|
||||
- [Storing SHA1 Hashes As Binary](sha1_as_binary.md)
|
||||
|
||||
## i18n
|
||||
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
# Storing SHA1 Hashes As Binary
|
||||
|
||||
Storing SHA1 hashes as strings is not very space efficient. A SHA1 as a string
|
||||
requires at least 40 bytes, an additional byte to store the encoding, and
|
||||
perhaps more space depending on the internals of PostgreSQL and MySQL.
|
||||
|
||||
On the other hand, if one were to store a SHA1 as binary one would only need 20
|
||||
bytes for the actual SHA1, and 1 or 4 bytes of additional space (again depending
|
||||
on database internals). This means that in the best case scenario we can reduce
|
||||
the space usage by 50%.
|
||||
|
||||
To make this easier to work with you can include the concern `ShaAttribute` into
|
||||
a model and define a SHA attribute using the `sha_attribute` class method. For
|
||||
example:
|
||||
|
||||
```ruby
|
||||
class Commit < ActiveRecord::Base
|
||||
include ShaAttribute
|
||||
|
||||
sha_attribute :sha
|
||||
end
|
||||
```
|
||||
|
||||
This allows you to use the value of the `sha` attribute as if it were a string,
|
||||
while storing it as binary. This means that you can do something like this,
|
||||
without having to worry about converting data to the right binary format:
|
||||
|
||||
```ruby
|
||||
commit = Commit.find_by(sha: '88c60307bd1f215095834f09a1a5cb18701ac8ad')
|
||||
commit.sha = '971604de4cfa324d91c41650fabc129420c8d1cc'
|
||||
commit.save
|
||||
```
|
||||
|
||||
There is however one requirement: the column used to store the SHA has _must_ be
|
||||
a binary type. For Rails this means you need to use the `:binary` type instead
|
||||
of `:text` or `:string`.
|
|
@ -0,0 +1,34 @@
|
|||
module Gitlab
|
||||
module Database
|
||||
BINARY_TYPE = if Gitlab::Database.postgresql?
|
||||
# PostgreSQL defines its own class with slightly different
|
||||
# behaviour from the default Binary type.
|
||||
ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Bytea
|
||||
else
|
||||
ActiveRecord::Type::Binary
|
||||
end
|
||||
|
||||
# Class for casting binary data to hexadecimal SHA1 hashes (and vice-versa).
|
||||
#
|
||||
# Using ShaAttribute allows you to store SHA1 values as binary while still
|
||||
# using them as if they were stored as string values. This gives you the
|
||||
# ease of use of string values, but without the storage overhead.
|
||||
class ShaAttribute < BINARY_TYPE
|
||||
PACK_FORMAT = 'H*'.freeze
|
||||
|
||||
# Casts binary data to a SHA1 in hexadecimal.
|
||||
def type_cast_from_database(value)
|
||||
value = super
|
||||
|
||||
value ? value.unpack(PACK_FORMAT)[0] : nil
|
||||
end
|
||||
|
||||
# Casts a SHA1 in hexadecimal to the proper binary format.
|
||||
def type_cast_for_database(value)
|
||||
arg = value ? [value].pack(PACK_FORMAT) : nil
|
||||
|
||||
super(arg)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,33 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Database::ShaAttribute do
|
||||
let(:sha) do
|
||||
'9a573a369a5bfbb9a4a36e98852c21af8a44ea8b'
|
||||
end
|
||||
|
||||
let(:binary_sha) do
|
||||
[sha].pack('H*')
|
||||
end
|
||||
|
||||
let(:binary_from_db) do
|
||||
if Gitlab::Database.postgresql?
|
||||
"\\x#{sha}"
|
||||
else
|
||||
binary_sha
|
||||
end
|
||||
end
|
||||
|
||||
let(:attribute) { described_class.new }
|
||||
|
||||
describe '#type_cast_from_database' do
|
||||
it 'converts the binary SHA to a String' do
|
||||
expect(attribute.type_cast_from_database(binary_from_db)).to eq(sha)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#type_cast_for_database' do
|
||||
it 'converts a SHA String to binary data' do
|
||||
expect(attribute.type_cast_for_database(sha).to_s).to eq(binary_sha)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,27 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe ShaAttribute do
|
||||
let(:model) { Class.new { include ShaAttribute } }
|
||||
|
||||
before do
|
||||
columns = [
|
||||
double(:column, name: 'name', type: :text),
|
||||
double(:column, name: 'sha1', type: :binary)
|
||||
]
|
||||
|
||||
allow(model).to receive(:columns).and_return(columns)
|
||||
end
|
||||
|
||||
describe '#sha_attribute' do
|
||||
it 'defines a SHA attribute for a binary column' do
|
||||
expect(model).to receive(:attribute)
|
||||
.with(:sha1, an_instance_of(Gitlab::Database::ShaAttribute))
|
||||
|
||||
model.sha_attribute(:sha1)
|
||||
end
|
||||
|
||||
it 'raises ArgumentError when the column type is not :binary' do
|
||||
expect { model.sha_attribute(:name) }.to raise_error(ArgumentError)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue