From a36f3888e33bb8bf38c53df3e888e1d9aa47879b Mon Sep 17 00:00:00 2001 From: Jeff McCune Date: Thu, 10 Nov 2011 16:01:22 -0800 Subject: [PATCH] [vsphere] (#10644) Add servers filter to improve clone performance The behavior without this patch is that the performance of the vm_clone operation in unacceptably slow for VMware vCenter deployments with multiple hundreds of virtual machines. Performance is unacceptable because the vm_clone operation makes multiple API calls to list _all_ of the VM's in the inventory. This patch eliminates the need to list all VM's by adding path and folder filters to limit our API calls to subtrees of the VMware inventory. = API Changes = * New datacenters request that caches the Datacenter objects for the life of the process. * New clone() method on the server model that returns a server model of the new VM even if it is not yet done cloning. * Ability to limit collections to inventory paths by passing the * 'folder' filter to the servers collection. For example: `conn = Fog::Compute[:vsphere]; conn.servers('path' => '/Datacenters/DC1/vm/Templates')` this filter will greatly reduce the number of SOAP API calls by limiting the server models in the collection to only those in the Templates inventory folder. Note, this is not recursive yet. = Tests = Tests have been updated. The vm_clone request no longer takes an instance_uuid because we cannot actually use this to search the inventory efficiently. Instead, the vm_clone request now requires a path attribute to allow Fog to search only a subset of the inventory. --- lib/fog/vsphere/compute.rb | 1 + lib/fog/vsphere/models/compute/server.rb | 18 +++ lib/fog/vsphere/models/compute/servers.rb | 10 +- .../vsphere/requests/compute/datacenters.rb | 34 ++++++ .../requests/compute/list_virtual_machines.rb | 53 +++++++- lib/fog/vsphere/requests/compute/vm_clone.rb | 113 ++++++++++++------ .../requests/compute/vm_clone_tests.rb | 8 +- 7 files changed, 186 insertions(+), 51 deletions(-) create mode 100644 lib/fog/vsphere/requests/compute/datacenters.rb diff --git a/lib/fog/vsphere/compute.rb b/lib/fog/vsphere/compute.rb index 7292cb01e..a817a7915 100644 --- a/lib/fog/vsphere/compute.rb +++ b/lib/fog/vsphere/compute.rb @@ -21,6 +21,7 @@ module Fog request :vm_reboot request :vm_clone request :vm_destroy + request :datacenters module Shared diff --git a/lib/fog/vsphere/models/compute/server.rb b/lib/fog/vsphere/models/compute/server.rb index 0c7aa4738..76e332873 100644 --- a/lib/fog/vsphere/models/compute/server.rb +++ b/lib/fog/vsphere/models/compute/server.rb @@ -57,6 +57,24 @@ module Fog connection.vm_destroy('instance_uuid' => instance_uuid) end + def clone(options = {}) + requires :name, :path + # Convert symbols to strings + req_options = options.inject({}) { |hsh, (k,v)| hsh[k.to_s] = v; hsh } + # Give our path to the request + req_options['path'] ="#{path}/#{name}" + # Perform the actual clone + clone_results = connection.vm_clone(req_options) + # Create the new VM model. + new_vm = self.class.new(clone_results['vm_attributes']) + # We need to assign the collection and the connection otherwise we + # cannot reload the model. + new_vm.collection = self.collection + new_vm.connection = self.connection + # Return the new VM model. + new_vm + end + end end diff --git a/lib/fog/vsphere/models/compute/servers.rb b/lib/fog/vsphere/models/compute/servers.rb index e9ba1612f..b57783bd6 100644 --- a/lib/fog/vsphere/models/compute/servers.rb +++ b/lib/fog/vsphere/models/compute/servers.rb @@ -9,8 +9,14 @@ module Fog model Fog::Compute::Vsphere::Server - def all - response = connection.list_virtual_machines + # 'path' => '/Datacenters/vm/Jeff/Templates' will be MUCH faster. + # than simply listing everything. + def all(filters = {}) + # REVISIT: I'm not sure if this is the best way to implement search + # filters on a collection but it does work. I need to study the AWS + # code more to make sure this matches up. + filters['folder'] ||= attributes['folder'] + response = connection.list_virtual_machines(filters) load(response['virtual_machines']) end diff --git a/lib/fog/vsphere/requests/compute/datacenters.rb b/lib/fog/vsphere/requests/compute/datacenters.rb new file mode 100644 index 000000000..6b76614d5 --- /dev/null +++ b/lib/fog/vsphere/requests/compute/datacenters.rb @@ -0,0 +1,34 @@ +module Fog + module Compute + class Vsphere + class Real + def datacenters + @datacenters ||= datacenters_reload + # Hide the values which are the RbVmomi instances + @datacenters.keys + end + + private + + def datacenters_reload + @rootfolder ||= @connection.rootFolder + inventory = @rootfolder.inventory(:Datacenter => [ 'name' ])[@rootfolder] + # Convert the inventory into a Hash of the form: We remove the + # property selectors. { "" => # } + # The Datacenter instance itself is at index 0 and the properties we + # collected are at index 1. + inventory.inject({}) do |memo, (name,dc_ary)| + memo[name] = dc_ary[0] + memo + end + end + end + + class Mock + def datacenters + [ "Solutions", "Solutions2", "Solutions3" ] + end + end + end + end +end diff --git a/lib/fog/vsphere/requests/compute/list_virtual_machines.rb b/lib/fog/vsphere/requests/compute/list_virtual_machines.rb index 84a48cb40..7255faceb 100644 --- a/lib/fog/vsphere/requests/compute/list_virtual_machines.rb +++ b/lib/fog/vsphere/requests/compute/list_virtual_machines.rb @@ -1,13 +1,15 @@ module Fog module Compute class Vsphere - class Real - def list_virtual_machines(options = {}) - # First, determine if there's a search filter + # Listing all VM's can be quite slow and expensive. Try and optimize + # based on the available options we have. These conditions are in + # ascending order of time to complete for large deployments. if options['instance_uuid'] then list_all_virtual_machines_by_instance_uuid(options) + elsif options['folder'] then + list_all_virtual_machines_in_folder(options) else list_all_virtual_machines end @@ -15,6 +17,48 @@ module Fog private + def list_all_virtual_machines_in_folder(options = {}) + # Tap gets rid of the leading empty string and "Datacenters" element + # and returns the array. + path_elements = options['folder'].split('/').tap { |ary| ary.shift 2 } + # The DC name itself. + dc_name = path_elements.shift + # If the first path element contains "vm" this denotes the vmFolder + # and needs to be shifted out since each DC contains only one + # vmFolder + path_elements.shift if path_elements[0] = 'vm' + # Make sure @datacenters is populated (the keys are DataCenter instances) + self.datacenters.include? dc_name or raise ArgumentError, "Could not find a Datacenter named #{dc_name}" + # Get the datacenter managed object + dc = @datacenters[dc_name] + + # Get the VM Folder (Group) efficiently + vm_folder = dc.vmFolder + + # Walk the tree resetting the folder pointer as we go + folder = path_elements.inject(vm_folder) do |current_folder, sub_folder_name| + # JJM VIM::Folder#find appears to be quite efficient as it uses the + # searchIndex It certainly appears to be faster than + # VIM::Folder#inventory since that returns _all_ managed objects of + # a certain type _and_ their properties. + sub_folder = current_folder.find(sub_folder_name, RbVmomi::VIM::Folder) + raise ArgumentError, "Could not descend into #{sub_folder_name}. Please check your path." unless sub_folder + sub_folder + end + + # This should be efficient since we're not obtaining properties + virtual_machines = folder.children.inject([]) do |ary, child| + if child.is_a? RbVmomi::VIM::VirtualMachine then + ary << convert_vm_mob_ref_to_attr_hash(child) + end + ary + end + + # Return the managed objects themselves as an array. These may be converted + # to an attribute has using convert_vm_mob_ref_to_attr_hash + { 'virtual_machines' => virtual_machines } + end + def list_all_virtual_machines_by_instance_uuid(options = {}) uuid = options['instance_uuid'] search_filter = { :uuid => uuid, 'vmSearch' => true, 'instanceUuid' => true } @@ -186,10 +230,7 @@ module Fog { 'virtual_machines' => [] } end end - end - end end end - diff --git a/lib/fog/vsphere/requests/compute/vm_clone.rb b/lib/fog/vsphere/requests/compute/vm_clone.rb index 3b68b43b2..9b1add83f 100644 --- a/lib/fog/vsphere/requests/compute/vm_clone.rb +++ b/lib/fog/vsphere/requests/compute/vm_clone.rb @@ -6,14 +6,19 @@ module Fog private def vm_clone_check_options(options) options = { 'force' => false }.merge(options) - required_options = %w{ instance_uuid name } + required_options = %w{ path name } required_options.each do |param| raise ArgumentError, "#{required_options.join(', ')} are required" unless options.has_key? param end - # First, figure out if there's already a VM of the same name. - all_virtual_machines = list_virtual_machines['virtual_machines'] - if not options['force'] and all_virtual_machines.detect { |vm| vm['name'] == options['name'] } then - raise Fog::Vsphere::Errors::ServiceError, "A VM already exists with name #{options['name']}" + # The tap removes the leading empty string + path_elements = options['path'].split('/').tap { |o| o.shift } + first_folder = path_elements.shift + if first_folder != 'Datacenters' then + raise ArgumentError, "vm_clone path option must start with /Datacenters. Got: #{options['path']}" + end + dc_name = path_elements.shift + if not self.datacenters.include? dc_name then + raise ArgumentError, "Datacenter #{dc_name} does not exist, only datacenters #{self.dacenters.join(",")} are accessible." end options end @@ -25,23 +30,47 @@ module Fog # Option handling options = vm_clone_check_options(options) - notfound = lambda { raise Fog::Compute::Vsphere::NotFound, "Cloud not find VM template" } + notfound = lambda { raise Fog::Compute::Vsphere::NotFound, "Could not find VM template" } - # REVISIT: This will have horrible performance for large sites. - # Find the Managed Object reference of the template VM (Wish I could do this with the API) - vm_mob_ref = list_all_virtual_machine_mobs.find(notfound) do |vm| - convert_vm_mob_ref_to_attr_hash(vm)['instance_uuid'] == options['instance_uuid'] + # Find the template in the folder. This is more efficient than + # searching ALL VM's looking for the template. + # Tap gets rid of the leading empty string and "Datacenters" element + # and returns the array. + path_elements = options['path'].split('/').tap { |ary| ary.shift 2 } + # The DC name itself. + template_dc = path_elements.shift + # If the first path element contains "vm" this denotes the vmFolder + # and needs to be shifted out + path_elements.shift if path_elements[0] = 'vm' + # The template name. The remaining elements are the folders in the + # datacenter. + template_name = path_elements.pop + # Make sure @datacenters is populated. We need the instances from the Hash keys. + self.datacenters + # Get the datacenter managed object from the hash + dc = @datacenters[template_dc] + # Get the VM Folder (Group) efficiently + vm_folder = dc.vmFolder + # Walk the tree resetting the folder pointer as we go + folder = path_elements.inject(vm_folder) do |current_folder, sub_folder_name| + # JJM VIM::Folder#find appears to be quite efficient as it uses the + # searchIndex It certainly appears to be faster than + # VIM::Folder#inventory since that returns _all_ managed objects of + # a certain type _and_ their properties. + sub_folder = current_folder.find(sub_folder_name, RbVmomi::VIM::Folder) + raise ArgumentError, "Could not descend into #{sub_folder_name}. Please check your path." unless sub_folder + sub_folder end - # We need to locate the datacenter object to find the - # default resource pool. - container = vm_mob_ref.parent - until container.kind_of? RbVmomi::VIM::Datacenter - container = container.parent - end - dc = container - # With the Datacenter Object we can obtain the resource pool - resource_pool = dc.hostFolder.children.first.resourcePool + # Now find the template itself using the efficient find method + vm_mob_ref = folder.find(template_name, RbVmomi::VIM::VirtualMachine) + + # Now find _a_ resource pool of the template's host (REVISIT: We need + # to support cloning into a specific RP) + esx_host = vm_mob_ref.collect!('runtime.host')['runtime.host'] + # The parent of the ESX host itself is a ComputeResource which has a resourcePool + resource_pool = esx_host.parent.resourcePool + # Next, create a Relocation Spec instance relocation_spec = RbVmomi::VIM.VirtualMachineRelocateSpec(:pool => resource_pool, :transform => options['transform'] || 'sparse') @@ -50,27 +79,37 @@ module Fog :powerOn => options['power_on'] || true, :template => false) task = vm_mob_ref.CloneVM_Task(:folder => vm_mob_ref.parent, :name => options['name'], :spec => clone_spec) - # REVISIT: The task object contains a reference to the template but does - # not appear to contain a reference to the newly created VM. - # This is a really naive way to find the managed object reference - # of the newly created VM. - tries = 0 - new_vm = begin - list_virtual_machines['virtual_machines'].detect(lambda { raise Fog::Vsphere::Errors::NotFound }) do |vm| - vm['name'] == options['name'] + + # Waiting for the VM to complete allows us to get the VirtulMachine + # object of the new machine when it's done. It is HIGHLY recommended + # to set 'wait' => true if your app wants to wait. Otherwise, you're + # going to have to reload the server model over and over which + # generates a lot of time consuming API calls to vmware. + if options['wait'] then + # REVISIT: It would be awesome to call a block passed to this + # request to notify the application how far along in the process we + # are. I'm thinking of updating a progress bar, etc... + new_vm = task.wait_for_completion + else + tries = 0 + new_vm = begin + # Try and find the new VM (folder.find is quite efficient) + folder.find(options['name'], RbVmomi::VIM::VirtualMachine) or raise Fog::Vsphere::Errors::NotFound + rescue Fog::Vsphere::Errors::NotFound + tries += 1 + if tries <= 10 then + sleep 15 + retry + end + nil end - rescue Fog::Vsphere::Errors::NotFound - tries += 1 - if tries <= 10 then - sleep 1 - retry - end - nil end + # Return hash { - 'vm_ref' => new_vm ? new_vm['mo_ref'] : nil, - 'task_ref' => task._ref + 'vm_ref' => new_vm ? new_vm._ref : nil, + 'vm_attributes' => new_vm ? convert_vm_mob_ref_to_attr_hash(new_vm) : {}, + 'task_ref' => task._ref } end @@ -83,7 +122,7 @@ module Fog options = vm_clone_check_options(options) notfound = lambda { raise Fog::Compute::Vsphere::NotFound, "Cloud not find VM template" } vm_mob_ref = list_virtual_machines['virtual_machines'].find(notfound) do |vm| - vm['instance_uuid'] == options['instance_uuid'] + vm['name'] == options['path'].split("/")[-1] end { 'vm_ref' => 'vm-123', diff --git a/tests/vsphere/requests/compute/vm_clone_tests.rb b/tests/vsphere/requests/compute/vm_clone_tests.rb index 319da29df..28cc73d0e 100644 --- a/tests/vsphere/requests/compute/vm_clone_tests.rb +++ b/tests/vsphere/requests/compute/vm_clone_tests.rb @@ -1,10 +1,10 @@ Shindo.tests("Fog::Compute[:vsphere] | vm_clone request", 'vsphere') do #require 'guid' - template = "50323f93-6835-1178-8b8f-9e2109890e1a" + template = "/Datacenters/Solutions/vm/Jeff/Templates/centos56gm2" compute = Fog::Compute[:vsphere] tests("The return value should") do - response = compute.vm_clone('instance_uuid' => template, 'name' => 'cloning_vm') + response = compute.vm_clone('path' => template, 'name' => 'cloning_vm') test("be a kind of Hash") { response.kind_of? Hash } %w{ vm_ref task_ref }.each do |key| test("have a #{key} key") { response.has_key? key } @@ -12,10 +12,6 @@ Shindo.tests("Fog::Compute[:vsphere] | vm_clone request", 'vsphere') do end tests("When invalid input is presented") do raises(ArgumentError, 'it should raise ArgumentError') { compute.vm_clone(:foo => 1) } - raises(Fog::Vsphere::Errors::ServiceError, - 'it should raise ServiceError if a VM already exists with the provided name') do - compute.vm_clone('instance_uuid' => '123', 'name' => 'jefftest') - end raises(Fog::Compute::Vsphere::NotFound, 'it should raise Fog::Compute::Vsphere::NotFound when the UUID is not a string') do pending # require 'guid' compute.vm_clone('instance_uuid' => Guid.from_s(template), 'name' => 'jefftestfoo')