pg/sample/warehouse_partitions.rb

312 lines
7.2 KiB
Ruby

# -*- ruby -*-
# vim: set nosta noet ts=4 sw=4:
#
# Script to automatically move partitioned tables and their indexes
# to a separate area on disk.
#
# Mahlon E. Smith <mahlon@martini.nu>
#
# Example use case:
#
# - You've got a heavy insert table, such as syslog data.
# - This table has a partitioning trigger (or is manually partitioned)
# by date, to separate incoming stuff from archival/report stuff.
# - You have a tablespace on cheap or slower disk (maybe even
# ZFS compressed, or some such!)
#
# The only assumption this script makes is that your tables are dated, and
# the tablespace they're moving into already exists.
#
# A full example, using the syslog idea from above, where each child
# table is date partitioned by a convention of "syslog_YEAR-WEEKOFYEAR":
#
# syslog # <--- parent
# syslog_2012_06 # <--- inherited
# syslog_2012_07 # <--- inherited
# syslog_2012_08 # <--- inherited
# ...
#
# You'd run this script like so:
#
# ./warehouse_partitions.rb -F syslog_%Y_%U
#
# Assuming this was week 12 of the year, tables syslog_2012_06 through
# syslog_2012_11 would start sequentially migrating into the tablespace
# called 'warehouse'.
#
require 'date'
require 'ostruct'
require 'optparse'
require 'pathname'
require 'etc'
require 'pg'
### A tablespace migration class.
###
class PGWarehouse
def initialize( opts )
@opts = opts
@db = PG.connect(
:dbname => opts.database,
:host => opts.host,
:port => opts.port,
:user => opts.user,
:password => opts.pass,
:sslmode => 'prefer'
)
@db.exec "SET search_path TO %s" % [ opts.schema ] if opts.schema
@relations = self.relations
end
attr_reader :db
######
public
######
### Perform the tablespace moves.
###
def migrate
if @relations.empty?
$stderr.puts 'No tables were found for warehousing.'
return
end
$stderr.puts "Found %d relation%s to move." % [ relations.length, relations.length == 1 ? '' : 's' ]
@relations.sort_by{|_,v| v[:name] }.each do |_, val|
$stderr.print " - Moving table '%s' to '%s'... " % [
val[:name], @opts.tablespace
]
if @opts.dryrun
$stderr.puts '(not really)'
else
age = self.timer do
db.exec "ALTER TABLE %s SET TABLESPACE %s;" % [
val[:name], @opts.tablespace
]
end
puts age
end
val[ :indexes ].each do |idx|
$stderr.print " - Moving index '%s' to '%s'... " % [
idx, @opts.tablespace
]
if @opts.dryrun
$stderr.puts '(not really)'
else
age = self.timer do
db.exec "ALTER INDEX %s SET TABLESPACE %s;" % [
idx, @opts.tablespace
]
end
puts age
end
end
end
end
#########
protected
#########
### Get OIDs and current tablespaces for everything under the
### specified schema.
###
def relations
return @relations if @relations
relations = {}
query = %q{
SELECT c.oid AS oid,
c.relname AS name,
c.relkind AS kind,
t.spcname AS tspace
FROM pg_class AS c
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
LEFT JOIN pg_tablespace t ON t.oid = c.reltablespace
WHERE c.relkind = 'r' }
query << "AND n.nspname='#{@opts.schema}'" if @opts.schema
# Get the relations list, along with each element's current tablespace.
#
self.db.exec( query ) do |res|
res.each do |row|
relations[ row['oid'] ] = {
:name => row['name'],
:tablespace => row['tspace'],
:indexes => [],
:parent => nil
}
end
end
# Add table inheritance information.
#
db.exec 'SELECT inhrelid AS oid, inhparent AS parent FROM pg_inherits' do |res|
res.each do |row|
relations[ row['oid'] ][ :parent ] = row['parent']
end
end
# Remove tables that don't qualify for warehousing.
#
# - Tables that are not children of a parent
# - Tables that are already in the warehouse tablespace
# - The currently active child (it's likely being written to!)
# - Any table that can't be parsed into the specified format
#
relations.reject! do |oid, val|
begin
val[:parent].nil? ||
val[:tablespace] == @opts.tablespace ||
val[:name] == Time.now.strftime( @opts.format ) ||
! DateTime.strptime( val[:name], @opts.format )
rescue ArgumentError
true
end
end
query = %q{
SELECT c.oid AS oid,
i.indexname AS name
FROM pg_class AS c
INNER JOIN pg_indexes AS i
ON i.tablename = c.relname }
query << "AND i.schemaname='#{@opts.schema}'" if @opts.schema
# Attach index names to tables.
#
db.exec( query ) do |res|
res.each do |row|
relations[ row['oid'] ][ :indexes ] << row['name'] if relations[ row['oid'] ]
end
end
return relations
end
### Wrap arbitrary commands in a human readable timer.
###
def timer
start = Time.now
yield
age = Time.now - start
diff = age
secs = diff % 60
diff = ( diff - secs ) / 60
mins = diff % 60
diff = ( diff - mins ) / 60
hour = diff % 24
return "%02d:%02d:%02d" % [ hour, mins, secs ]
end
end
### Parse command line arguments. Return a struct of global options.
###
def parse_args( args )
options = OpenStruct.new
options.database = Etc.getpwuid( Process.uid ).name
options.host = '127.0.0.1'
options.port = 5432
options.user = Etc.getpwuid( Process.uid ).name
options.sslmode = 'prefer'
options.tablespace = 'warehouse'
opts = OptionParser.new do |opts|
opts.banner = "Usage: #{$0} [options]"
opts.separator ''
opts.separator 'Connection options:'
opts.on( '-d', '--database DBNAME',
"specify the database to connect to (default: \"#{options.database}\")" ) do |db|
options.database = db
end
opts.on( '-h', '--host HOSTNAME', 'database server host' ) do |host|
options.host = host
end
opts.on( '-p', '--port PORT', Integer,
"database server port (default: \"#{options.port}\")" ) do |port|
options.port = port
end
opts.on( '-n', '--schema SCHEMA', String,
"operate on the named schema only (default: none)" ) do |schema|
options.schema = schema
end
opts.on( '-T', '--tablespace SPACE', String,
"move old tables to this tablespace (default: \"#{options.tablespace}\")" ) do |tb|
options.tablespace = tb
end
opts.on( '-F', '--tableformat FORMAT', String,
"The naming format (strftime) for the inherited tables (default: none)" ) do |format|
options.format = format
end
opts.on( '-U', '--user NAME',
"database user name (default: \"#{options.user}\")" ) do |user|
options.user = user
end
opts.on( '-W', 'force password prompt' ) do |pw|
print 'Password: '
begin
system 'stty -echo'
options.pass = gets.chomp
ensure
system 'stty echo'
puts
end
end
opts.separator ''
opts.separator 'Other options:'
opts.on_tail( '--dry-run', "don't actually do anything" ) do
options.dryrun = true
end
opts.on_tail( '--help', 'show this help, then exit' ) do
$stderr.puts opts
exit
end
opts.on_tail( '--version', 'output version information, then exit' ) do
puts Stats::VERSION
exit
end
end
opts.parse!( args )
return options
end
if __FILE__ == $0
opts = parse_args( ARGV )
raise ArgumentError, "A naming format (-F) is required." unless opts.format
$stdout.sync = true
PGWarehouse.new( opts ).migrate
end