1
0
Fork 0
mirror of https://github.com/fog/fog.git synced 2022-11-09 13:51:43 -05:00

Merge pull request #520 from rbriski/master

EMR functionality for AWS
This commit is contained in:
Wesley Beary 2011-11-04 09:45:19 -07:00
commit 4662b2635d
21 changed files with 1207 additions and 2 deletions

View file

@ -13,6 +13,7 @@ module Fog
service(:dns, 'aws/dns', 'DNS')
service(:elasticache, 'aws/elasticache', 'Elasticache')
service(:elb, 'aws/elb', 'ELB')
service(:emr, 'aws/emr', 'EMR')
service(:iam, 'aws/iam', 'IAM')
service(:rds, 'aws/rds', 'RDS')
service(:ses, 'aws/ses', 'SES')
@ -27,10 +28,34 @@ module Fog
key << '.%d'
end
[*values].each_with_index do |value, index|
params[format(key, index + 1)] = value
if value.respond_to?('keys')
k = format(key, index + 1)
value.each do | vkey, vvalue |
params["#{k}.#{vkey}"] = vvalue
end
else
params[format(key, index + 1)] = value
end
end
params
end
def self.serialize_keys(key, value, options = {})
case value
when Hash
value.each do | k, v |
options.merge!(serialize_keys("#{key}.#{k}", v))
end
return options
when Array
value.each_with_index do | it, idx |
options.merge!(serialize_keys("#{key}.member.#{(idx + 1)}", it))
end
return options
else
return {key => value}
end
end
def self.indexed_filters(filters)
params = {}

133
lib/fog/aws/emr.rb Normal file
View file

@ -0,0 +1,133 @@
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'aws'))
module Fog
module AWS
class EMR < Fog::Service
class IdentifierTaken < Fog::Errors::Error; end
requires :aws_access_key_id, :aws_secret_access_key
recognizes :region, :host, :path, :port, :scheme, :persistent
request_path 'fog/aws/requests/emr'
request :add_instance_groups
request :add_job_flow_steps
request :describe_job_flows
request :modify_instance_groups
request :run_job_flow
request :set_termination_protection
request :terminate_job_flows
# model_path 'fog/aws/models/rds'
# model :server
# collection :servers
# model :snapshot
# collection :snapshots
# model :parameter_group
# collection :parameter_groups
#
# model :parameter
# collection :parameters
#
# model :security_group
# collection :security_groups
class Mock
def initialize(options={})
Fog::Mock.not_implemented
end
end
class Real
# Initialize connection to EMR
#
# ==== Notes
# options parameter must include values for :aws_access_key_id and
# :aws_secret_access_key in order to create a connection
#
# ==== Examples
# emr = EMR.new(
# :aws_access_key_id => your_aws_access_key_id,
# :aws_secret_access_key => your_aws_secret_access_key
# )
#
# ==== Parameters
# * options<~Hash> - config arguments for connection. Defaults to {}.
# * region<~String> - optional region to use, in ['eu-west-1', 'us-east-1', 'us-west-1'i, 'ap-southeast-1']
#
# ==== Returns
# * EMR object with connection to AWS.
def initialize(options={})
@aws_access_key_id = options[:aws_access_key_id]
@aws_secret_access_key = options[:aws_secret_access_key]
@connection_options = options[:connection_options] || {}
@hmac = Fog::HMAC.new('sha256', @aws_secret_access_key)
options[:region] ||= 'us-east-1'
@host = options[:host] || 'elasticmapreduce.amazonaws.com'
@path = options[:path] || '/'
@persistent = options[:persistent] || false
@port = options[:port] || 443
@scheme = options[:scheme] || 'https'
@connection = Fog::Connection.new("#{@scheme}://#{@host}:#{@port}#{@path}", @persistent, @connection_options)
end
def reload
@connection.reset
end
private
def request(params)
idempotent = params.delete(:idempotent)
parser = params.delete(:parser)
body = Fog::AWS.signed_params(
params,
{
:aws_access_key_id => @aws_access_key_id,
:hmac => @hmac,
:host => @host,
:path => @path,
:port => @port,
:version => '2009-03-31' #'2010-07-28'
}
)
begin
response = @connection.request({
:body => body,
:expects => 200,
:headers => { 'Content-Type' => 'application/x-www-form-urlencoded' },
:idempotent => idempotent,
:host => @host,
:method => 'POST',
:parser => parser
})
rescue Excon::Errors::HTTPStatusError => error
if match = error.message.match(/<Code>(.*)<\/Code>[\s\\\w]+<Message>(.*)<\/Message>/m)
# case match[1].split('.').last
# when 'DBInstanceNotFound', 'DBParameterGroupNotFound', 'DBSnapshotNotFound', 'DBSecurityGroupNotFound'
# raise Fog::AWS::RDS::NotFound.slurp(error, match[2])
# when 'DBParameterGroupAlreadyExists'
# raise Fog::AWS::RDS::IdentifierTaken.slurp(error, match[2])
# else
# raise
# end
raise
else
raise
end
end
response
end
end
end
end
end

View file

@ -0,0 +1,28 @@
module Fog
module Parsers
module AWS
module EMR
class AddInstanceGroups < Fog::Parsers::Base
def start_element(name, attrs = [])
super
case name
when 'InstanceGroupIds'
@response['InstanceGroupIds'] = []
end
end
def end_element(name)
case name
when 'JobFlowId'
@response[name] = value
when 'member'
@response['InstanceGroupIds'] << value
end
end
end
end
end
end
end

View file

@ -0,0 +1,17 @@
module Fog
module Parsers
module AWS
module EMR
class AddJobFlowSteps < Fog::Parsers::Base
def end_element(name)
case name
when 'RequestId'
@response[name] = value
end
end
end
end
end
end
end

View file

@ -0,0 +1,140 @@
module Fog
module Parsers
module AWS
module EMR
class DescribeJobFlows < Fog::Parsers::Base
def reset
@context = []
@contexts = ['BootstrapActions', 'ExecutionStatusDetail', 'Instances', 'Steps', 'InstanceGroups', 'Args']
@response = { 'JobFlows' => [] }
@bootstrap_actions = {'ScriptBootstrapActionConfig' => {'Args' => []}}
@instance = { 'InstanceGroups' => [], 'Placement' => {}}
@step = {
'ExecutionStatusDetail' => {},
'StepConfig' => {
'HadoopJarStepConfig' => {
'Args' => [],
'Properties' => []
}
}
}
@flow = {'Instances' => [], 'ExecutionStatusDetail' => {}, 'BootstrapActions' => [], 'Steps' => []}
@instance_group_detail = {}
@execution_status_detail = {}
end
def start_element(name, attrs = [])
super
if @contexts.include?(name)
@context.push(name)
end
end
def end_element(name)
if @context.last == 'BootstrapActions'
case name
when 'Name'
@bootstrap_actions[name] = value
when 'Path'
@bootstrap_actions['ScriptBootstrapActionConfig'][name] = value
when 'BootstrapActions'
@flow['BootstrapActions'] = @bootstrap_actions
@bootstrap_actions = {'ScriptBootstrapActionConfig' => {'Args' => []}}
end
end
if @context.last == 'ExecutionStatusDetail'
case name
when 'CreationDateTime', 'EndDateTime', 'LastStateChangeReason',
'ReadyDateTime', 'StartDateTime', 'State'
@execution_status_detail[name] = value
when 'ExecutionStatusDetail'
if @context.include?('Steps')
@step['ExecutionStatusDetail'] = @execution_status_detail
else
@flow['ExecutionStatusDetail'] = @execution_status_detail
end
@execution_status_detail = {}
end
end
if @context.last == 'Instances'
case name
when 'AvailabilityZone'
@instance['Placement'][name] = value
when 'Ec2KeyName', 'HadoopVersion', 'InstanceCount', 'KeepJobFlowAliveWhenNoSteps',
'MasterInstanceId', 'MasterInstanceType', 'MasterPublicDnsName', 'NormalizedInstanceHours',
'SlaveInstanceType', 'TerminationProtected'
@instance[name] = value
when 'member'
@instance['InstanceGroups'] << @instance_group_detail
@instance_group_detail = {}
when 'Instances'
@flow['Instances'] = @instance
@instance = { 'InstanceGroups' => [], 'Placement' => {}}
end
end
if @context.last == 'InstanceGroups'
case name
when 'member'
@instance['InstanceGroups'] << @instance_group_detail
@instance_group_detail = {}
else
@instance_group_detail[name] = value
end
end
if @context.last == 'Args'
if name == 'member'
if @context.include?('Steps')
@step['StepConfig']['HadoopJarStepConfig']['Args'] << value.strip
else
@bootstrap_actions['ScriptBootstrapActionConfig']['Args'] << value
end
end
end
if @context.last == 'Steps'
case name
when 'ActionOnFailure', 'Name'
@step[name] = value
when 'Jar', 'MainClass'
@step['StepConfig']['HadoopJarStepConfig'][name] = value
when 'member'
@flow['Steps'] << @step
@step = {
'ExecutionStatusDetail' => {},
'StepConfig' => {
'HadoopJarStepConfig' => {
'Args' => [],
'Properties' => []
}
}
}
end
end
if @context.empty?
case name
when 'AmiVersion', 'JobFlowId', 'LogUri', 'Name'
@flow[name] = value
when 'member'
@response['JobFlows'] << @flow
@flow = {'Instances' => [], 'ExecutionStatusDetail' => {}, 'BootstrapActions' => [], 'Steps' => []}
end
end
if @context.last == name
@context.pop
end
end
end
end
end
end
end

View file

@ -0,0 +1,17 @@
module Fog
module Parsers
module AWS
module EMR
class ModifyInstanceGroups < Fog::Parsers::Base
def end_element(name)
case name
when 'RequestId'
@response[name] = value
end
end
end
end
end
end
end

View file

@ -0,0 +1,19 @@
module Fog
module Parsers
module AWS
module EMR
class RunJobFlow < Fog::Parsers::Base
def end_element(name)
case name
when 'JobFlowId'
@response[name] = value
when 'RequestId'
@response[name] = value
end
end
end
end
end
end
end

View file

@ -0,0 +1,17 @@
module Fog
module Parsers
module AWS
module EMR
class SetTerminationProtection < Fog::Parsers::Base
def end_element(name)
case name
when 'RequestId'
@response[name] = value
end
end
end
end
end
end
end

View file

@ -0,0 +1,17 @@
module Fog
module Parsers
module AWS
module EMR
class TerminateJobFlows < Fog::Parsers::Base
def end_element(name)
case name
when 'RequestId'
@response[name] = value
end
end
end
end
end
end
end

View file

@ -0,0 +1,46 @@
module Fog
module AWS
class EMR
class Real
require 'fog/aws/parsers/emr/add_instance_groups'
# adds an instance group to a running cluster
# http://docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_AddInstanceGroups.html
# ==== Parameters
# * JobFlowId <~String> - Job flow in which to add the instance groups
# * InstanceGroups<~Array> - Instance Groups to add
# * 'BidPrice'<~String> - Bid price for each Amazon EC2 instance in the instance group when launching nodes as Spot Instances, expressed in USD.
# * 'InstanceCount'<~Integer> - Target number of instances for the instance group
# * 'InstanceRole'<~String> - MASTER | CORE | TASK The role of the instance group in the cluster
# * 'InstanceType'<~String> - The Amazon EC2 instance type for all instances in the instance group
# * 'MarketType'<~String> - ON_DEMAND | SPOT Market type of the Amazon EC2 instances used to create a cluster node
# * 'Name'<~String> - Friendly name given to the instance group.
#
# ==== Returns
# * response<~Excon::Response>:
# * body<~Hash>:
def add_instance_groups(job_flow_id, options={})
if instance_groups = options.delete('InstanceGroups')
options.merge!(Fog::AWS.indexed_param('InstanceGroups.member.%d', [*instance_groups]))
end
request({
'Action' => 'AddInstanceGroups',
'JobFlowId' => job_flow_id,
:parser => Fog::Parsers::AWS::EMR::AddInstanceGroups.new,
}.merge(options))
end
end
class Mock
def add_instance_groups(job_flow_id, options={})
Fog::Mock.not_implemented
end
end
end
end
end

View file

@ -0,0 +1,49 @@
module Fog
module AWS
class EMR
class Real
require 'fog/aws/parsers/emr/add_job_flow_steps'
# adds new steps to a running job flow.
# http://docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_AddJobFlowSteps.html
# ==== Parameters
# * JobFlowId <~String> - A string that uniquely identifies the job flow
# * Steps <~Array> - A list of steps to be executed by the job flow
# * 'ActionOnFailure'<~String> - TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE Specifies the action to take if the job flow step fails
# * 'HadoopJarStep'<~Array> - Specifies the JAR file used for the job flow step
# * 'Args'<~String list> - A list of command line arguments passed to the JAR file's main function when executed.
# * 'Jar'<~String> - A path to a JAR file run during the step.
# * 'MainClass'<~String> - The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file
# * 'Properties'<~Array> - A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function
# * 'Key'<~String> - The unique identifier of a key value pair
# * 'Value'<~String> - The value part of the identified key
# * 'Name'<~String> - The name of the job flow step
#
# ==== Returns
# * response<~Excon::Response>:
# * body<~Hash>:
def add_job_flow_steps(job_flow_id, options={})
if steps = options.delete('Steps')
options.merge!(Fog::AWS.serialize_keys('Steps', steps))
end
request({
'Action' => 'AddJobFlowSteps',
'JobFlowId' => job_flow_id,
:parser => Fog::Parsers::AWS::EMR::AddJobFlowSteps.new,
}.merge(options))
end
end
class Mock
def add_job_flow_steps(db_name, options={})
Fog::Mock.not_implemented
end
end
end
end
end

View file

@ -0,0 +1,108 @@
module Fog
module AWS
class EMR
class Real
require 'fog/aws/parsers/emr/describe_job_flows'
# returns a list of job flows that match all of the supplied parameters.
# http://docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_DescribeJobFlows.html
# ==== Parameters
# * CreatedAfter <~DateTime> - Return only job flows created after this date and time
# * CreatedBefore <~DateTime> - Return only job flows created before this date and time
# * JobFlowIds <~String list> - Return only job flows whose job flow ID is contained in this list
# * JobFlowStates <~String list> - RUNNING | WAITING | SHUTTING_DOWN | STARTING Return only job flows whose state is contained in this list
#
# ==== Returns
# * response<~Excon::Response>:
# * body<~Hash>:
# * JobFlows <~Array> - A list of job flows matching the parameters supplied.
# * AmiVersion <~String> - A list of bootstrap actions that will be run before Hadoop is started on the cluster nodes.
# * 'BootstrapActions'<~Array> - A list of the bootstrap actions run by the job flow
# * 'BootstrapConfig <~Array> - A description of the bootstrap action
# * 'Name' <~String> - The name of the bootstrap action
# * 'ScriptBootstrapAction' <~Array> - The script run by the bootstrap action.
# * 'Args' <~String list> - A list of command line arguments to pass to the bootstrap action script.
# * 'Path' <~String> - Location of the script to run during a bootstrap action.
# * 'ExecutionStatusDetail'<~Array> - Describes the execution status of the job flow
# * 'CreationDateTime <~DateTime> - The creation date and time of the job flow.
# * 'EndDateTime <~DateTime> - The completion date and time of the job flow.
# * 'LastStateChangeReason <~String> - Description of the job flow last changed state.
# * 'ReadyDateTime <~DateTime> - The date and time when the job flow was ready to start running bootstrap actions.
# * 'StartDateTime <~DateTime> - The start date and time of the job flow.
# * 'State <~DateTime> - COMPLETED | FAILED | TERMINATED | RUNNING | SHUTTING_DOWN | STARTING | WAITING | BOOTSTRAPPING The state of the job flow.
# * Instances <~Array> - A specification of the number and type of Amazon EC2 instances on which to run the job flow.
# * 'Ec2KeyName'<~String> - Specifies the name of the Amazon EC2 key pair that can be used to ssh to the master node as the user called "hadoop.
# * 'HadoopVersion'<~String> - "0.18" | "0.20" Specifies the Hadoop version for the job flow
# * 'InstanceCount'<~Integer> - The number of Amazon EC2 instances used to execute the job flow
# * 'InstanceGroups'<~Array> - Configuration for the job flow's instance groups
# * 'BidPrice' <~String> - Bid price for each Amazon EC2 instance in the instance group when launching nodes as Spot Instances, expressed in USD.
# * 'CreationDateTime' <~DateTime> - The date/time the instance group was created.
# * 'EndDateTime' <~DateTime> - The date/time the instance group was terminated.
# * 'InstanceGroupId' <~String> - Unique identifier for the instance group.
# * 'InstanceRequestCount'<~Integer> - Target number of instances for the instance group
# * 'InstanceRole'<~String> - MASTER | CORE | TASK The role of the instance group in the cluster
# * 'InstanceRunningCount'<~Integer> - Actual count of running instances
# * 'InstanceType'<~String> - The Amazon EC2 instance type for all instances in the instance group
# * 'LastStateChangeReason'<~String> - Details regarding the state of the instance group
# * 'Market'<~String> - ON_DEMAND | SPOT Market type of the Amazon EC2 instances used to create a cluster
# * 'Name'<~String> - Friendly name for the instance group
# * 'ReadyDateTime'<~DateTime> - The date/time the instance group was available to the cluster
# * 'StartDateTime'<~DateTime> - The date/time the instance group was started
# * 'State'<~String> - PROVISIONING | STARTING | BOOTSTRAPPING | RUNNING | RESIZING | ARRESTED | SHUTTING_DOWN | TERMINATED | FAILED | ENDED State of instance group
# * 'KeepJobFlowAliveWhenNoSteps' <~Boolean> - Specifies whether the job flow should terminate after completing all steps
# * 'MasterInstanceId'<~String> - The Amazon EC2 instance identifier of the master node
# * 'MasterInstanceType'<~String> - The EC2 instance type of the master node
# * 'MasterPublicDnsName'<~String> - The DNS name of the master node
# * 'NormalizedInstanceHours'<~Integer> - An approximation of the cost of the job flow, represented in m1.small/hours.
# * 'Placement'<~Array> - Specifies the Availability Zone the job flow will run in
# * 'AvailabilityZone' <~String> - The Amazon EC2 Availability Zone for the job flow.
# * 'SlaveInstanceType'<~String> - The EC2 instance type of the slave nodes
# * 'TerminationProtected'<~Boolean> - Specifies whether to lock the job flow to prevent the Amazon EC2 instances from being terminated by API call, user intervention, or in the event of a job flow error
# * LogUri <~String> - Specifies the location in Amazon S3 to write the log files of the job flow. If a value is not provided, logs are not created
# * Name <~String> - The name of the job flow
# * Steps <~Array> - A list of steps to be executed by the job flow
# * 'ExecutionStatusDetail'<~Array> - Describes the execution status of the job flow
# * 'CreationDateTime <~DateTime> - The creation date and time of the job flow.
# * 'EndDateTime <~DateTime> - The completion date and time of the job flow.
# * 'LastStateChangeReason <~String> - Description of the job flow last changed state.
# * 'ReadyDateTime <~DateTime> - The date and time when the job flow was ready to start running bootstrap actions.
# * 'StartDateTime <~DateTime> - The start date and time of the job flow.
# * 'State <~DateTime> - COMPLETED | FAILED | TERMINATED | RUNNING | SHUTTING_DOWN | STARTING | WAITING | BOOTSTRAPPING The state of the job flow.
# * StepConfig <~Array> - The step configuration
# * 'ActionOnFailure'<~String> - TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE Specifies the action to take if the job flow step fails
# * 'HadoopJarStep'<~Array> - Specifies the JAR file used for the job flow step
# * 'Args'<~String list> - A list of command line arguments passed to the JAR file's main function when executed.
# * 'Jar'<~String> - A path to a JAR file run during the step.
# * 'MainClass'<~String> - The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file
# * 'Properties'<~Array> - A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function
# * 'Key'<~String> - The unique identifier of a key value pair
# * 'Value'<~String> - The value part of the identified key
# * 'Name'<~String> - The name of the job flow step
def describe_job_flows(options={})
if job_ids = options.delete('JobFlowIds')
options.merge!(Fog::AWS.serialize_keys('JobFlowIds', job_ids))
end
if job_states = options.delete('JobFlowStates')
options.merge!(Fog::AWS.serialize_keys('JobFlowStates', job_states))
end
request({
'Action' => 'DescribeJobFlows',
:parser => Fog::Parsers::AWS::EMR::DescribeJobFlows.new,
}.merge(options))
end
end
class Mock
def describe_job_flows(db_name, options={})
Fog::Mock.not_implemented
end
end
end
end
end

View file

@ -0,0 +1,40 @@
module Fog
module AWS
class EMR
class Real
require 'fog/aws/parsers/emr/modify_instance_groups'
# modifies the number of nodes and configuration settings of an instance group..
# http://docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_ModifyInstanceGroups.html
# ==== Parameters
# * InstanceGroups <~InstanceGroupModifyConfig list> - Instance groups to change
# * InstanceCount <~Integer> - Target size for instance group
# * InstanceGroupId <~String> - Unique ID of the instance group to expand or shrink
#
# ==== Returns
# * response<~Excon::Response>:
# * body<~Hash>
def modify_instance_groups(options={})
if job_ids = options.delete('InstanceGroups')
options.merge!(Fog::AWS.serialize_keys('InstanceGroups', job_ids))
end
request({
'Action' => 'ModifyInstanceGroups',
:parser => Fog::Parsers::AWS::EMR::ModifyInstanceGroups.new,
}.merge(options))
end
end
class Mock
def modify_instance_groups(options={})
Fog::Mock.not_implemented
end
end
end
end
end

View file

@ -0,0 +1,106 @@
module Fog
module AWS
class EMR
class Real
require 'fog/aws/parsers/emr/run_job_flow'
# creates and starts running a new job flow
# http://docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_RunJobFlow.html
# ==== Parameters
# * AdditionalInfo <~String> - A JSON string for selecting additional features.
# * BootstrapActions <~Array> - A list of bootstrap actions that will be run before Hadoop is started on the cluster nodes.
# * 'Name'<~String> - The name of the bootstrap action
# * 'ScriptBootstrapAction'<~Array> - The script run by the bootstrap action
# * 'Args' <~Array> - A list of command line arguments to pass to the bootstrap action script
# * 'Path' <~String> - Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system.
# * Instances <~Array> - A specification of the number and type of Amazon EC2 instances on which to run the job flow.
# * 'Ec2KeyName'<~String> - Specifies the name of the Amazon EC2 key pair that can be used to ssh to the master node as the user called "hadoop.
# * 'HadoopVersion'<~String> - "0.18" | "0.20" Specifies the Hadoop version for the job flow
# * 'InstanceCount'<~Integer> - The number of Amazon EC2 instances used to execute the job flow
# * 'InstanceGroups'<~Array> - Configuration for the job flow's instance groups
# * 'BidPrice' <~String> - Bid price for each Amazon EC2 instance in the instance group when launching nodes as Spot Instances, expressed in USD.
# * 'InstanceCount'<~Integer> - Target number of instances for the instance group
# * 'InstanceRole'<~String> - MASTER | CORE | TASK The role of the instance group in the cluster
# * 'InstanceType'<~String> - The Amazon EC2 instance type for all instances in the instance group
# * 'MarketType'<~String> - ON_DEMAND | SPOT Market type of the Amazon EC2 instances used to create a cluster node
# * 'Name'<~String> - Friendly name given to the instance group.
# * 'KeepJobFlowAliveWhenNoSteps' <~Boolean> - Specifies whether the job flow should terminate after completing all steps
# * 'MasterInstanceType'<~String> - The EC2 instance type of the master node
# * 'Placement'<~Array> - Specifies the Availability Zone the job flow will run in
# * 'AvailabilityZone' <~String> - The Amazon EC2 Availability Zone for the job flow.
# * 'SlaveInstanceType'<~String> - The EC2 instance type of the slave nodes
# * 'TerminationProtected'<~Boolean> - Specifies whether to lock the job flow to prevent the Amazon EC2 instances from being terminated by API call, user intervention, or in the event of a job flow error
# * LogUri <~String> - Specifies the location in Amazon S3 to write the log files of the job flow. If a value is not provided, logs are not created
# * Name <~String> - The name of the job flow
# * Steps <~Array> - A list of steps to be executed by the job flow
# * 'ActionOnFailure'<~String> - TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE Specifies the action to take if the job flow step fails
# * 'HadoopJarStep'<~Array> - Specifies the JAR file used for the job flow step
# * 'Args'<~String list> - A list of command line arguments passed to the JAR file's main function when executed.
# * 'Jar'<~String> - A path to a JAR file run during the step.
# * 'MainClass'<~String> - The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file
# * 'Properties'<~Array> - A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function
# * 'Key'<~String> - The unique identifier of a key value pair
# * 'Value'<~String> - The value part of the identified key
# * 'Name'<~String> - The name of the job flow step
#
# ==== Returns
# * response<~Excon::Response>:
# * body<~Hash>:
def run_job_flow(name, options={})
if bootstrap_actions = options.delete('BootstrapActions')
options.merge!(Fog::AWS.serialize_keys('BootstrapActions', bootstrap_actions))
end
if instances = options.delete('Instances')
options.merge!(Fog::AWS.serialize_keys('Instances', instances))
end
if steps = options.delete('Steps')
options.merge!(Fog::AWS.serialize_keys('Steps', steps))
end
request({
'Action' => 'RunJobFlow',
'Name' => name,
:parser => Fog::Parsers::AWS::EMR::RunJobFlow.new,
}.merge(options))
end
def run_hive(name, options={})
steps = []
steps << {
'Name' => 'Setup Hive',
'HadoopJarStep' => {
'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
'Args' => ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script', '--base-path', 's3://us-east-1.elasticmapreduce/libs/hive/', '--install-hive']},
'ActionOnFailure' => 'TERMINATE_JOB_FLOW'
}
steps << {
'Name' => 'Install Hive Site Configuration',
'HadoopJarStep' => {
'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
'Args' => ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script', '--base-path', 's3://us-east-1.elasticmapreduce/libs/hive/', '--install-hive-site', '--hive-site=s3://raybeam.okl/prod/hive/hive-site.xml']},
'ActionOnFailure' => 'TERMINATE_JOB_FLOW'
}
options['Steps'] = steps
if not options['Instances'].nil?
options['Instances']['KeepJobFlowAliveWhenNoSteps'] = true
end
run_job_flow name, options
end
end
class Mock
def run_job_flow(db_name, options={})
Fog::Mock.not_implemented
end
end
end
end
end

View file

@ -0,0 +1,39 @@
module Fog
module AWS
class EMR
class Real
require 'fog/aws/parsers/emr/set_termination_protection'
# locks a job flow so the Amazon EC2 instances in the cluster cannot be terminated by user intervention.
# http://docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_SetTerminationProtection.html
# ==== Parameters
# * JobFlowIds <~String list> - list of strings that uniquely identify the job flows to protect
# * TerminationProtected <~Boolean> - indicates whether to protect the job flow
#
# ==== Returns
# * response<~Excon::Response>:
# * body<~Hash>
def set_termination_protection(is_protected, options={})
if job_ids = options.delete('JobFlowIds')
options.merge!(Fog::AWS.serialize_keys('JobFlowIds', job_ids))
end
request({
'Action' => 'SetTerminationProtection',
'TerminationProtected' => is_protected,
:parser => Fog::Parsers::AWS::EMR::SetTerminationProtection.new,
}.merge(options))
end
end
class Mock
def set_termination_protection(db_name, options={})
Fog::Mock.not_implemented
end
end
end
end
end

View file

@ -0,0 +1,37 @@
module Fog
module AWS
class EMR
class Real
require 'fog/aws/parsers/emr/terminate_job_flows'
# shuts a list of job flows down.
# http://docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_TerminateJobFlows.html
# ==== Parameters
# * JobFlowIds <~String list> - list of strings that uniquely identify the job flows to protect
#
# ==== Returns
# * response<~Excon::Response>:
# * body<~Hash>
def terminate_job_flows(options={})
if job_ids = options.delete('JobFlowIds')
options.merge!(Fog::AWS.serialize_keys('JobFlowIds', job_ids))
end
request({
'Action' => 'TerminateJobFlows',
:parser => Fog::Parsers::AWS::EMR::TerminateJobFlows.new,
}.merge(options))
end
end
class Mock
def terminate_job_flows(db_name, options={})
Fog::Mock.not_implemented
end
end
end
end
end

View file

@ -19,6 +19,8 @@ class AWS < Fog::Bin
Fog::AWS::Elasticache
when :elb
Fog::AWS::ELB
when :emr
Fog::AWS::EMR
when :iam
Fog::AWS::IAM
when :sdb, :simpledb
@ -63,6 +65,8 @@ class AWS < Fog::Bin
Fog::AWS::Elasticache.new
when :elb
Fog::AWS::ELB.new
when :emr
Fog::AWS::EMR.new
when :iam
Fog::AWS::IAM.new
when :rds

View file

@ -18,7 +18,7 @@ module Fog
end
response = @excon.request(params, &block)
if parser
body.finish
response.body = parser.response

View file

@ -0,0 +1,172 @@
class AWS
module EMR
module Formats
BASIC = {
'RequestId' => String
}
RUN_JOB_FLOW = BASIC.merge({
'JobFlowId' => String
})
ADD_INSTANCE_GROUPS = {
'JobFlowId' => String,
'InstanceGroupIds' => Array
}
SIMPLE_DESCRIBE_JOB_FLOW = {
'JobFlows' => [{
'Name' => String,
'BootstrapActions' => {
'ScriptBootstrapActionConfig' => {
'Args' => Array
}
},
'ExecutionStatusDetail' => {
'CreationDateTime' => String,
'State' => String,
'LastStateChangeReason' => String
},
'Steps' => [{
'ActionOnFailure' => String,
'Name' => String,
'StepConfig' => {
'HadoopJarStepConfig' => {
'MainClass' => String,
'Jar' => String,
'Args' => Array,
'Properties' => Array
}
},
'ExecutionStatusDetail' => {
'CreationDateTime' => String,
'State' => String
}
}],
'JobFlowId' => String,
'Instances' => {
'InstanceCount' => String,
'NormalizedInstanceHours' => String,
'KeepJobFlowAliveWhenNoSteps' => String,
'Placement' => {
'AvailabilityZone' => String
},
'MasterInstanceType' => String,
'SlaveInstanceType' => String,
'InstanceGroups' => Array,
'TerminationProtected' => String,
'HadoopVersion' => String
}
}]
}
JOB_FLOW_WITHOUT_CHANGE = {
'JobFlows' => [{
'Name' => String,
'BootstrapActions' => {
'ScriptBootstrapActionConfig' => {
'Args' => Array
}
},
'ExecutionStatusDetail' => {
'CreationDateTime' => String,
'State' => String,
'LastStateChangeReason' => NilClass
},
'Steps' => [{
'ActionOnFailure' => String,
'Name' => String,
'StepConfig' => {
'HadoopJarStepConfig' => {
'MainClass' => String,
'Jar' => String,
'Args' => Array,
'Properties' => Array
}
},
'ExecutionStatusDetail' => {
'CreationDateTime' => String,
'State' => String
}
}],
'JobFlowId' => String,
'Instances' => {
'InstanceCount' => String,
'NormalizedInstanceHours' => String,
'KeepJobFlowAliveWhenNoSteps' => String,
'Placement' => {
'AvailabilityZone' => String
},
'MasterInstanceType' => String,
'SlaveInstanceType' => String,
'InstanceGroups' => Array,
'TerminationProtected' => String,
'HadoopVersion' => String
}
}]
}
DESCRIBE_JOB_FLOW_WITH_INSTANCE_GROUPS = {
'JobFlows' => [{
'Name' => String,
'BootstrapActions' => {
'ScriptBootstrapActionConfig' => {
'Args' => Array
}
},
'ExecutionStatusDetail' => {
'CreationDateTime' => String,
'State' => String,
'LastStateChangeReason' => NilClass
},
'Steps' => [{
'ActionOnFailure' => String,
'Name' => String,
'StepConfig' => {
'HadoopJarStepConfig' => {
'MainClass' => String,
'Jar' => String,
'Args' => Array,
'Properties' => Array
}
},
'ExecutionStatusDetail' => {
'CreationDateTime' => String,
'State' => String
}
}],
'JobFlowId' => String,
'Instances' => {
'InstanceCount' => String,
'NormalizedInstanceHours' => String,
'KeepJobFlowAliveWhenNoSteps' => String,
'Placement' => {
'AvailabilityZone' => String
},
'InstanceGroups' => [{
'Name' => String,
'InstanceRole' => String,
'CreationDateTime' => String,
'LastStateChangeReason' => nil,
'InstanceGroupId' => String,
'Market' => String,
'InstanceType' => String,
'State' => String,
'InstanceRunningCount' => String,
'InstanceRequestCount' => String
}],
'MasterInstanceType' => String,
'SlaveInstanceType' => String,
'InstanceGroups' => Array,
'TerminationProtected' => String,
'HadoopVersion' => String
}
}]
}
end
end
end

View file

@ -0,0 +1,105 @@
Shindo.tests('AWS::EMR | instance groups', ['aws', 'emr']) do
@job_flow_name = "fog_job_flow_#{Time.now.to_f.to_s.gsub('.','')}"
@job_flow_options = {
'Instances' => {
'MasterInstanceType' => 'm1.small',
'SlaveInstanceType' => 'm1.small',
'InstanceCount' => 2,
'Placement' => {
'AvailabilityZone' => 'us-east-1a'
},
'KeepJobFlowAliveWhenNoSteps' => false,
'TerminationProtected' => false,
'HadoopVersion' => '0.20'
}
}
@job_flow_steps = {
'Steps' => [{
'Name' => 'Dummy streaming job',
'ActionOnFailure' => 'CONTINUE',
'HadoopJarStep' => {
'Jar' => '/home/hadoop/contrib/streaming/hadoop-streaming.jar',
'MainClass' => nil,
'Args' => %w(-input s3n://elasticmapreduce/samples/wordcount/input -output hdfs:///examples/output/2011-11-03T090856 -mapper s3n://elasticmapreduce/samples/wordcount/wordSplitter.py -reducer aggregate)
}
}]
}
@instance_group_name = "fog_instance_group_#{Time.now.to_f.to_s.gsub('.','')}"
@instance_groups = {
'InstanceGroups' => [{
'Name' => @instance_group_name,
'InstanceRole' => 'TASK',
'InstanceType' => 'm1.small',
'InstanceCount' => 2
}]
}
result = AWS[:emr].run_job_flow(@job_flow_name, @job_flow_options).body
@job_flow_id = result['JobFlowId']
tests('success') do
tests("#add_instance_groups").formats(AWS::EMR::Formats::ADD_INSTANCE_GROUPS) do
pending if Fog.mocking?
result = AWS[:emr].add_instance_groups(@job_flow_id, @instance_groups).body
@instance_group_id = result['InstanceGroupIds'].first
result
end
tests("#describe_job_flows_with_instance_groups").formats(AWS::EMR::Formats::DESCRIBE_JOB_FLOW_WITH_INSTANCE_GROUPS) do
pending if Fog.mocking?
result = AWS[:emr].describe_job_flows('JobFlowIds' => [@job_flow_id]).body
result
end
tests("#modify_instance_groups").formats(AWS::EMR::Formats::BASIC) do
pending if Fog.mocking?
# Add a step so the state doesn't go directly from STARTING to SHUTTING_DOWN
AWS[:emr].add_job_flow_steps(@job_flow_id, @job_flow_steps)
# Wait until job has started before modifying the instance group
begin
sleep 10
result = AWS[:emr].describe_job_flows('JobFlowIds' => [@job_flow_id]).body
job_flow = result['JobFlows'].first
state = job_flow['ExecutionStatusDetail']['State']
print "."
end while(state == 'STARTING')
# Check results
result = AWS[:emr].modify_instance_groups('InstanceGroups' => [{'InstanceGroupId' => @instance_group_id, 'InstanceCount' => 4}]).body
# Check the it actually modified the instance count
tests("modify worked?") do
ig_res = AWS[:emr].describe_job_flows('JobFlowIds' => [@job_flow_id]).body
matched = false
jf = ig_res['JobFlows'].first
jf['Instances']['InstanceGroups'].each do | ig |
if ig['InstanceGroupId'] == @instance_group_id
matched = true if ig['InstanceRequestCount'].to_i == 4
end
end
matched
end
result
end
end
AWS[:emr].terminate_job_flows('JobFlowIds' => [@job_flow_id])
end

View file

@ -0,0 +1,86 @@
Shindo.tests('AWS::EMR | job flows', ['aws', 'emr']) do
@job_flow_name = "fog_job_flow_#{Time.now.to_f.to_s.gsub('.','')}"
@job_flow_options = {
'Instances' => {
'MasterInstanceType' => 'm1.small',
'SlaveInstanceType' => 'm1.small',
'InstanceCount' => 2,
'Placement' => {
'AvailabilityZone' => 'us-east-1a'
},
'KeepJobFlowAliveWhenNoSteps' => false,
'TerminationProtected' => false,
'HadoopVersion' => '0.20'
}
}
@step_name = "fog_job_flow_step_#{Time.now.to_f.to_s.gsub('.','')}"
@job_flow_steps = {
'Steps' => [{
'Name' => @step_name,
'ActionOnFailure' => 'CONTINUE',
'HadoopJarStep' => {
'Jar' => 'FakeJar',
'MainClass' => 'FakeMainClass',
'Args' => ['arg1', 'arg2']
}
}]
}
@job_flow_id = nil
tests('success') do
tests("#run_job_flow").formats(AWS::EMR::Formats::RUN_JOB_FLOW) do
pending if Fog.mocking?
result = AWS[:emr].run_job_flow(@job_flow_name, @job_flow_options).body
@job_flow_id = result['JobFlowId']
result
end
tests("#add_job_flow_steps").formats(AWS::EMR::Formats::BASIC) do
pending if Fog.mocking?
result = AWS[:emr].add_job_flow_steps(@job_flow_id, @job_flow_steps).body
result
end
tests("#set_termination_protection").formats(AWS::EMR::Formats::BASIC) do
result = AWS[:emr].set_termination_protection(true, 'JobFlowIds' => [@job_flow_id]).body
test("protected?") do
res = AWS[:emr].describe_job_flows('JobFlowIds' => [@job_flow_id]).body
jf = res['JobFlows'].first
jf['Instances']['TerminationProtected'] == 'true'
end
result
end
tests("#terminate_job_flow").formats(AWS::EMR::Formats::BASIC) do
pending if Fog.mocking?
AWS[:emr].set_termination_protection(false, 'JobFlowIds' => [@job_flow_id])
result = AWS[:emr].terminate_job_flows('JobFlowIds' => [@job_flow_id]).body
result
end
tests("#describe_job_flows").formats(AWS::EMR::Formats::SIMPLE_DESCRIBE_JOB_FLOW) do
pending if Fog.mocking?
result = AWS[:emr].describe_job_flows('JobFlowIds' => [@job_flow_id]).body
result
end
end
end