[vsphere] (#10644) Add servers filter to improve clone performance

The behavior without this patch is that the performance of the vm_clone
operation in unacceptably slow for VMware vCenter deployments with
multiple hundreds of virtual machines.

Performance is unacceptable because the vm_clone operation makes
multiple API calls to list _all_ of the VM's in the inventory.  This
patch eliminates the need to list all VM's by adding path and folder
filters to limit our API calls to subtrees of the VMware inventory.

= API Changes =

 * New datacenters request that caches the Datacenter objects for the
   life of the process.
 * New clone() method on the server model that returns a server model of
   the new VM even if it is not yet done cloning.
 * Ability to limit collections to inventory paths by passing the
 * 'folder' filter to the servers collection.  For example:
   `conn = Fog::Compute[:vsphere];
    conn.servers('path' => '/Datacenters/DC1/vm/Templates')`
   this filter will greatly reduce the number of SOAP API calls by
   limiting the server models in the collection to only those in the
   Templates inventory folder.  Note, this is not recursive yet.

= Tests =

Tests have been updated.  The vm_clone request no longer takes an
instance_uuid because we cannot actually use this to search the
inventory efficiently.  Instead, the vm_clone request now requires a
path attribute to allow Fog to search only a subset of the inventory.
This commit is contained in:
Jeff McCune 2011-11-10 16:01:22 -08:00
parent cf84d2d4be
commit a36f3888e3
7 changed files with 186 additions and 51 deletions

View File

@ -21,6 +21,7 @@ module Fog
request :vm_reboot
request :vm_clone
request :vm_destroy
request :datacenters
module Shared

View File

@ -57,6 +57,24 @@ module Fog
connection.vm_destroy('instance_uuid' => instance_uuid)
end
def clone(options = {})
requires :name, :path
# Convert symbols to strings
req_options = options.inject({}) { |hsh, (k,v)| hsh[k.to_s] = v; hsh }
# Give our path to the request
req_options['path'] ="#{path}/#{name}"
# Perform the actual clone
clone_results = connection.vm_clone(req_options)
# Create the new VM model.
new_vm = self.class.new(clone_results['vm_attributes'])
# We need to assign the collection and the connection otherwise we
# cannot reload the model.
new_vm.collection = self.collection
new_vm.connection = self.connection
# Return the new VM model.
new_vm
end
end
end

View File

@ -9,8 +9,14 @@ module Fog
model Fog::Compute::Vsphere::Server
def all
response = connection.list_virtual_machines
# 'path' => '/Datacenters/vm/Jeff/Templates' will be MUCH faster.
# than simply listing everything.
def all(filters = {})
# REVISIT: I'm not sure if this is the best way to implement search
# filters on a collection but it does work. I need to study the AWS
# code more to make sure this matches up.
filters['folder'] ||= attributes['folder']
response = connection.list_virtual_machines(filters)
load(response['virtual_machines'])
end

View File

@ -0,0 +1,34 @@
module Fog
module Compute
class Vsphere
class Real
def datacenters
@datacenters ||= datacenters_reload
# Hide the values which are the RbVmomi instances
@datacenters.keys
end
private
def datacenters_reload
@rootfolder ||= @connection.rootFolder
inventory = @rootfolder.inventory(:Datacenter => [ 'name' ])[@rootfolder]
# Convert the inventory into a Hash of the form: We remove the
# property selectors. { "<dc_name>" => #<RbVmomi::VIM::Datacenter> }
# The Datacenter instance itself is at index 0 and the properties we
# collected are at index 1.
inventory.inject({}) do |memo, (name,dc_ary)|
memo[name] = dc_ary[0]
memo
end
end
end
class Mock
def datacenters
[ "Solutions", "Solutions2", "Solutions3" ]
end
end
end
end
end

View File

@ -1,13 +1,15 @@
module Fog
module Compute
class Vsphere
class Real
def list_virtual_machines(options = {})
# First, determine if there's a search filter
# Listing all VM's can be quite slow and expensive. Try and optimize
# based on the available options we have. These conditions are in
# ascending order of time to complete for large deployments.
if options['instance_uuid'] then
list_all_virtual_machines_by_instance_uuid(options)
elsif options['folder'] then
list_all_virtual_machines_in_folder(options)
else
list_all_virtual_machines
end
@ -15,6 +17,48 @@ module Fog
private
def list_all_virtual_machines_in_folder(options = {})
# Tap gets rid of the leading empty string and "Datacenters" element
# and returns the array.
path_elements = options['folder'].split('/').tap { |ary| ary.shift 2 }
# The DC name itself.
dc_name = path_elements.shift
# If the first path element contains "vm" this denotes the vmFolder
# and needs to be shifted out since each DC contains only one
# vmFolder
path_elements.shift if path_elements[0] = 'vm'
# Make sure @datacenters is populated (the keys are DataCenter instances)
self.datacenters.include? dc_name or raise ArgumentError, "Could not find a Datacenter named #{dc_name}"
# Get the datacenter managed object
dc = @datacenters[dc_name]
# Get the VM Folder (Group) efficiently
vm_folder = dc.vmFolder
# Walk the tree resetting the folder pointer as we go
folder = path_elements.inject(vm_folder) do |current_folder, sub_folder_name|
# JJM VIM::Folder#find appears to be quite efficient as it uses the
# searchIndex It certainly appears to be faster than
# VIM::Folder#inventory since that returns _all_ managed objects of
# a certain type _and_ their properties.
sub_folder = current_folder.find(sub_folder_name, RbVmomi::VIM::Folder)
raise ArgumentError, "Could not descend into #{sub_folder_name}. Please check your path." unless sub_folder
sub_folder
end
# This should be efficient since we're not obtaining properties
virtual_machines = folder.children.inject([]) do |ary, child|
if child.is_a? RbVmomi::VIM::VirtualMachine then
ary << convert_vm_mob_ref_to_attr_hash(child)
end
ary
end
# Return the managed objects themselves as an array. These may be converted
# to an attribute has using convert_vm_mob_ref_to_attr_hash
{ 'virtual_machines' => virtual_machines }
end
def list_all_virtual_machines_by_instance_uuid(options = {})
uuid = options['instance_uuid']
search_filter = { :uuid => uuid, 'vmSearch' => true, 'instanceUuid' => true }
@ -186,10 +230,7 @@ module Fog
{ 'virtual_machines' => [] }
end
end
end
end
end
end

View File

@ -6,14 +6,19 @@ module Fog
private
def vm_clone_check_options(options)
options = { 'force' => false }.merge(options)
required_options = %w{ instance_uuid name }
required_options = %w{ path name }
required_options.each do |param|
raise ArgumentError, "#{required_options.join(', ')} are required" unless options.has_key? param
end
# First, figure out if there's already a VM of the same name.
all_virtual_machines = list_virtual_machines['virtual_machines']
if not options['force'] and all_virtual_machines.detect { |vm| vm['name'] == options['name'] } then
raise Fog::Vsphere::Errors::ServiceError, "A VM already exists with name #{options['name']}"
# The tap removes the leading empty string
path_elements = options['path'].split('/').tap { |o| o.shift }
first_folder = path_elements.shift
if first_folder != 'Datacenters' then
raise ArgumentError, "vm_clone path option must start with /Datacenters. Got: #{options['path']}"
end
dc_name = path_elements.shift
if not self.datacenters.include? dc_name then
raise ArgumentError, "Datacenter #{dc_name} does not exist, only datacenters #{self.dacenters.join(",")} are accessible."
end
options
end
@ -25,23 +30,47 @@ module Fog
# Option handling
options = vm_clone_check_options(options)
notfound = lambda { raise Fog::Compute::Vsphere::NotFound, "Cloud not find VM template" }
notfound = lambda { raise Fog::Compute::Vsphere::NotFound, "Could not find VM template" }
# REVISIT: This will have horrible performance for large sites.
# Find the Managed Object reference of the template VM (Wish I could do this with the API)
vm_mob_ref = list_all_virtual_machine_mobs.find(notfound) do |vm|
convert_vm_mob_ref_to_attr_hash(vm)['instance_uuid'] == options['instance_uuid']
# Find the template in the folder. This is more efficient than
# searching ALL VM's looking for the template.
# Tap gets rid of the leading empty string and "Datacenters" element
# and returns the array.
path_elements = options['path'].split('/').tap { |ary| ary.shift 2 }
# The DC name itself.
template_dc = path_elements.shift
# If the first path element contains "vm" this denotes the vmFolder
# and needs to be shifted out
path_elements.shift if path_elements[0] = 'vm'
# The template name. The remaining elements are the folders in the
# datacenter.
template_name = path_elements.pop
# Make sure @datacenters is populated. We need the instances from the Hash keys.
self.datacenters
# Get the datacenter managed object from the hash
dc = @datacenters[template_dc]
# Get the VM Folder (Group) efficiently
vm_folder = dc.vmFolder
# Walk the tree resetting the folder pointer as we go
folder = path_elements.inject(vm_folder) do |current_folder, sub_folder_name|
# JJM VIM::Folder#find appears to be quite efficient as it uses the
# searchIndex It certainly appears to be faster than
# VIM::Folder#inventory since that returns _all_ managed objects of
# a certain type _and_ their properties.
sub_folder = current_folder.find(sub_folder_name, RbVmomi::VIM::Folder)
raise ArgumentError, "Could not descend into #{sub_folder_name}. Please check your path." unless sub_folder
sub_folder
end
# We need to locate the datacenter object to find the
# default resource pool.
container = vm_mob_ref.parent
until container.kind_of? RbVmomi::VIM::Datacenter
container = container.parent
end
dc = container
# With the Datacenter Object we can obtain the resource pool
resource_pool = dc.hostFolder.children.first.resourcePool
# Now find the template itself using the efficient find method
vm_mob_ref = folder.find(template_name, RbVmomi::VIM::VirtualMachine)
# Now find _a_ resource pool of the template's host (REVISIT: We need
# to support cloning into a specific RP)
esx_host = vm_mob_ref.collect!('runtime.host')['runtime.host']
# The parent of the ESX host itself is a ComputeResource which has a resourcePool
resource_pool = esx_host.parent.resourcePool
# Next, create a Relocation Spec instance
relocation_spec = RbVmomi::VIM.VirtualMachineRelocateSpec(:pool => resource_pool,
:transform => options['transform'] || 'sparse')
@ -50,27 +79,37 @@ module Fog
:powerOn => options['power_on'] || true,
:template => false)
task = vm_mob_ref.CloneVM_Task(:folder => vm_mob_ref.parent, :name => options['name'], :spec => clone_spec)
# REVISIT: The task object contains a reference to the template but does
# not appear to contain a reference to the newly created VM.
# This is a really naive way to find the managed object reference
# of the newly created VM.
tries = 0
new_vm = begin
list_virtual_machines['virtual_machines'].detect(lambda { raise Fog::Vsphere::Errors::NotFound }) do |vm|
vm['name'] == options['name']
# Waiting for the VM to complete allows us to get the VirtulMachine
# object of the new machine when it's done. It is HIGHLY recommended
# to set 'wait' => true if your app wants to wait. Otherwise, you're
# going to have to reload the server model over and over which
# generates a lot of time consuming API calls to vmware.
if options['wait'] then
# REVISIT: It would be awesome to call a block passed to this
# request to notify the application how far along in the process we
# are. I'm thinking of updating a progress bar, etc...
new_vm = task.wait_for_completion
else
tries = 0
new_vm = begin
# Try and find the new VM (folder.find is quite efficient)
folder.find(options['name'], RbVmomi::VIM::VirtualMachine) or raise Fog::Vsphere::Errors::NotFound
rescue Fog::Vsphere::Errors::NotFound
tries += 1
if tries <= 10 then
sleep 15
retry
end
nil
end
rescue Fog::Vsphere::Errors::NotFound
tries += 1
if tries <= 10 then
sleep 1
retry
end
nil
end
# Return hash
{
'vm_ref' => new_vm ? new_vm['mo_ref'] : nil,
'task_ref' => task._ref
'vm_ref' => new_vm ? new_vm._ref : nil,
'vm_attributes' => new_vm ? convert_vm_mob_ref_to_attr_hash(new_vm) : {},
'task_ref' => task._ref
}
end
@ -83,7 +122,7 @@ module Fog
options = vm_clone_check_options(options)
notfound = lambda { raise Fog::Compute::Vsphere::NotFound, "Cloud not find VM template" }
vm_mob_ref = list_virtual_machines['virtual_machines'].find(notfound) do |vm|
vm['instance_uuid'] == options['instance_uuid']
vm['name'] == options['path'].split("/")[-1]
end
{
'vm_ref' => 'vm-123',

View File

@ -1,10 +1,10 @@
Shindo.tests("Fog::Compute[:vsphere] | vm_clone request", 'vsphere') do
#require 'guid'
template = "50323f93-6835-1178-8b8f-9e2109890e1a"
template = "/Datacenters/Solutions/vm/Jeff/Templates/centos56gm2"
compute = Fog::Compute[:vsphere]
tests("The return value should") do
response = compute.vm_clone('instance_uuid' => template, 'name' => 'cloning_vm')
response = compute.vm_clone('path' => template, 'name' => 'cloning_vm')
test("be a kind of Hash") { response.kind_of? Hash }
%w{ vm_ref task_ref }.each do |key|
test("have a #{key} key") { response.has_key? key }
@ -12,10 +12,6 @@ Shindo.tests("Fog::Compute[:vsphere] | vm_clone request", 'vsphere') do
end
tests("When invalid input is presented") do
raises(ArgumentError, 'it should raise ArgumentError') { compute.vm_clone(:foo => 1) }
raises(Fog::Vsphere::Errors::ServiceError,
'it should raise ServiceError if a VM already exists with the provided name') do
compute.vm_clone('instance_uuid' => '123', 'name' => 'jefftest')
end
raises(Fog::Compute::Vsphere::NotFound, 'it should raise Fog::Compute::Vsphere::NotFound when the UUID is not a string') do
pending # require 'guid'
compute.vm_clone('instance_uuid' => Guid.from_s(template), 'name' => 'jefftestfoo')