mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	Initial commit of scanf.rb.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4103 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									d675dbc279
								
							
						
					
					
						commit
						e3556affad
					
				
					 1 changed files with 697 additions and 0 deletions
				
			
		
							
								
								
									
										697
									
								
								lib/scanf.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										697
									
								
								lib/scanf.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,697 @@ | |||
| # scanf for Ruby | ||||
| # | ||||
| # $Release Version: 1.1.2 $ | ||||
| # $Revision$ | ||||
| # $Id$ | ||||
| # $Author$ | ||||
| # $Date$ | ||||
| # | ||||
| # A product of the Austin Ruby Codefest (Austin, Texas, August 2002) | ||||
| 
 | ||||
| =begin | ||||
| 
 | ||||
| =scanf for Ruby | ||||
| 
 | ||||
| ==Description | ||||
| 
 | ||||
| scanf for Ruby is an implementation of the C function scanf(3), | ||||
| modified as necessary for Ruby compatibility. | ||||
| 
 | ||||
| The methods provided are String#scanf, IO#scanf, and | ||||
| Kernel#scanf. Kernel#scanf is a wrapper around STDIN.scanf.  IO#scanf | ||||
| can be used on any IO stream, including file handles and sockets. | ||||
| scanf can be called either with or without a block. | ||||
| 
 | ||||
| scanf for Ruby scans an input string or stream according to a | ||||
| <b>format</b>, as described below ("Conversions"), and returns an | ||||
| array of matches between the format and the input.  The format is | ||||
| defined in a string, and is similar (though not identical) to the | ||||
| formats used in Kernel#printf and Kernel#sprintf. | ||||
| 
 | ||||
| The format may contain <b>conversion specifiers</b>, which tell scanf | ||||
| what form (type) each particular matched substring should be converted | ||||
| to (e.g., decimal integer, floating point number, literal string, | ||||
| etc.)  The matches and conversions take place from left to right, and | ||||
| the conversions themselves are returned as an array. | ||||
| 
 | ||||
| The format string may also contain characters other than those in the | ||||
| conversion specifiers.  White space (blanks, tabs, or newlines) in the | ||||
| format string matches any amount of white space, including none, in | ||||
| the input.  Everything else matches only itself. | ||||
| 
 | ||||
| Scanning stops, and scanf returns, when any input character fails to | ||||
| match the specifications in the format string, or when input is | ||||
| exhausted, or when everything in the format string has been | ||||
| matched. All matches found up to the stopping point are returned in | ||||
| the return array (or yielded to the block, if a block was given). | ||||
| 
 | ||||
| 
 | ||||
| ==Basic usage | ||||
| 
 | ||||
|    require 'scanf.rb' | ||||
| 
 | ||||
|    # String#scanf and IO#scanf take a single argument (a format string) | ||||
|    array = aString.scanf("%d%s") | ||||
|    array = anIO.scanf("%d%s") | ||||
| 
 | ||||
|    # Kernel#scanf reads from STDIN | ||||
|    array = scanf("%d%s") | ||||
| 
 | ||||
| ==Block usage | ||||
| 
 | ||||
| When called with a block, scanf keeps scanning the input, cycling back | ||||
| to the beginning of the format string, and yields a new array of | ||||
| conversions to the block every time the format string is matched | ||||
| (including partial matches, but not including complete failures).  The | ||||
| actual return value of scanf when called with a block is an array | ||||
| containing the results of all the executions of the block.  | ||||
| 
 | ||||
|    str = "123 abc 456 def 789 ghi" | ||||
|    str.scanf("%d%s") { |num,str| [ num * 2, str.upcase ] } | ||||
|      # => [[246, "ABC"], [912, "DEF"], [1578, "GHI"]] | ||||
| 
 | ||||
| ==Conversions | ||||
| 
 | ||||
| The single argument to scanf is a format string, which generally | ||||
| includes one or more conversion specifiers. Conversion specifiers | ||||
| begin with the percent character ('%') and include information about | ||||
| what scanf should next scan for (string, decimal number, single | ||||
| character, etc.). | ||||
| 
 | ||||
| There may be an optional maximum field width, expressed as a decimal | ||||
| integer, between the % and the conversion. If no width is given, a | ||||
| default of `infinity' is used (with the exception of the %c specifier; | ||||
| see below).  Otherwise, given a field width of <em>n</em> for a given | ||||
| conversion, at most <em>n</em> characters are scanned in processing | ||||
| that conversion.  Before conversion begins, most conversions skip | ||||
| white space in the input string; this white space is not counted | ||||
| against the field width. | ||||
| 
 | ||||
| The following conversions are available. (See the files EXAMPLES | ||||
| and <tt>tests/scanftests.rb</tt> for examples.) | ||||
| 
 | ||||
| [%] | ||||
|   Matches a literal `%'. That is, `%%' in the format string matches a | ||||
|   single input `%' character. No conversion is done, and the resulting | ||||
|   '%' is not included in the return array. | ||||
| 
 | ||||
| [d] | ||||
|   Matches an optionally signed decimal integer. | ||||
| 
 | ||||
| [u] | ||||
|   Same as d. | ||||
| 
 | ||||
| [i]  | ||||
|   Matches an optionally signed integer. The integer is read in base | ||||
|   16 if it begins with `0x' or `0X', in base 8 if it begins with `0', | ||||
|   and in base 10 other- wise. Only characters that correspond to the | ||||
|   base are recognized. | ||||
| 
 | ||||
| [o] | ||||
|   Matches an optionally signed octal integer. | ||||
| 
 | ||||
| [x,X] | ||||
|   Matches an optionally signed hexadecimal integer, | ||||
| 
 | ||||
| [f,g,e,E] | ||||
|   Matches an optionally signed floating-point number. | ||||
| 
 | ||||
| [s] | ||||
|   Matches a sequence of non-white-space character. The input string stops at | ||||
|   white space or at the maximum field width, whichever occurs first. | ||||
| 
 | ||||
| [c] | ||||
|   Matches a single character, or a sequence of <em>n</em> characters if a | ||||
|   field width of <em>n</em> is specified. The usual skip of leading white | ||||
|   space is suppressed. To skip white space first, use an explicit space in | ||||
|   the format. | ||||
| 
 | ||||
| [<tt>[</tt>] | ||||
|   Matches a nonempty sequence of characters from the specified set | ||||
|   of accepted characters.  The usual skip of leading white space is | ||||
|   suppressed.  This bracketed sub-expression is interpreted exactly like a | ||||
|   character class in a Ruby regular expression.  (In fact, it is placed as-is | ||||
|   in a regular expression.)  The matching against the input string ends with | ||||
|   the appearance of a character not in (or, with a circumflex, in) the set, | ||||
|   or when the field width runs out, whichever comes first. | ||||
| 
 | ||||
| ===Assignment suppression | ||||
| 
 | ||||
| To require that a particular match occur, but without including the result | ||||
| in the return array, place the <b>assignment suppression flag</b>, which is | ||||
| the star character ('*'), immediately after the leading '%' of a format | ||||
| specifier (just before the field width, if any). | ||||
| 
 | ||||
| ==Examples | ||||
| 
 | ||||
| See the files <tt>EXAMPLES</tt> and <tt>tests/scanftests.rb</tt>. | ||||
| 
 | ||||
| ==scanf for Ruby compared with scanf in C | ||||
| 
 | ||||
| scanf for Ruby is based on the C function scanf(3), but with modifications, | ||||
| dictated mainly by the underlying differences between the languages. | ||||
| 
 | ||||
| ===Unimplemented flags and specifiers | ||||
| 
 | ||||
| * The only flag implemented in scanf for Ruby is '<tt>*</tt>' (ignore | ||||
|   upcoming conversion). Many of the flags available in C versions of scanf(4) | ||||
|   have to do with the type of upcoming pointer arguments, and are literally | ||||
|   meaningless in Ruby. | ||||
| 
 | ||||
| * The <tt>n</tt> specifier (store number of characters consumed so far in | ||||
|   next pointer) is not implemented. | ||||
| 
 | ||||
| * The <tt>p</tt> specifier (match a pointer value) is not implemented. | ||||
| 
 | ||||
| ===Altered specifiers | ||||
| 
 | ||||
| [o,u,x,X] | ||||
|   In scanf for Ruby, all of these specifiers scan for an optionally signed | ||||
|   integer, rather than for an unsigned integer like their C counterparts. | ||||
| 
 | ||||
| ===Return values | ||||
| 
 | ||||
| scanf for Ruby returns an array of successful conversions, whereas | ||||
| scanf(3) returns the number of conversions successfully | ||||
| completed. (See below for more details on scanf for Ruby's return | ||||
| values.) | ||||
| 
 | ||||
| ==Return values | ||||
| 
 | ||||
| Without a block, scanf returns an array containing all the conversions | ||||
| it has found. If none are found, scanf will return an empty array. An | ||||
| unsuccesful match is never ignored, but rather always signals the end | ||||
| of the scanning operation. If the first unsuccessful match takes place | ||||
| after one or more successful matches have already taken place, the | ||||
| returned array will contain the results of those successful matches. | ||||
| 
 | ||||
| With a block scanf returns a 'map'-like array of transformations from | ||||
| the block -- that is, an array reflecting what the block did with each | ||||
| yielded result from the iterative scanf operation.  (See "Block | ||||
| usage", above.) | ||||
| 
 | ||||
| ==Test suite | ||||
| 
 | ||||
| scanf for Ruby includes a suite of unit tests (requiring the | ||||
| <tt>TestUnit</tt> package), which can be run with the command <tt>ruby | ||||
| tests/scanftests.rb</tt> or the command <tt>make test</tt>. | ||||
| 
 | ||||
| ==Current limitations and bugs | ||||
| 
 | ||||
| When using IO#scanf under Windows, make sure you open your files in | ||||
| binary mode: | ||||
| 
 | ||||
|     File.open("filename", "rb") | ||||
| 
 | ||||
| so that scanf can keep track of characters correctly. | ||||
| 
 | ||||
| Support for character classes is reasonably complete (since it | ||||
| essentially piggy-backs on Ruby's regular expression handling of | ||||
| character classes), but users are advised that character class testing | ||||
| has not been exhaustive, and that they should exercise some caution | ||||
| in using any of the more complex and/or arcane character class | ||||
| idioms. | ||||
| 
 | ||||
| 
 | ||||
| ==Technical notes | ||||
| 
 | ||||
| ===Rationale behind scanf for Ruby | ||||
| 
 | ||||
| The impetus for a scanf implementation in Ruby comes chiefly from the fact | ||||
| that existing pattern matching operations, such as Regexp#match and | ||||
| String#scan, return all results as strings, which have to be converted to | ||||
| integers or floats explicitly in cases where what's ultimately wanted are | ||||
| integer or float values. | ||||
| 
 | ||||
| ===Design of scanf for Ruby | ||||
| 
 | ||||
| scanf for Ruby is essentially a <format string>-to-<regular | ||||
| expression> converter. | ||||
| 
 | ||||
| When scanf is called, a FormatString object is generated from the | ||||
| format string ("%d%s...") argument. The FormatString object breaks the | ||||
| format string down into atoms ("%d", "%5f", "blah", etc.), and from | ||||
| each atom it creates a FormatSpecifier object, which it | ||||
| saves. | ||||
| 
 | ||||
| Each FormatSpecifier has a regular expression fragment and a "handler" | ||||
| associated with it. For example, the regular expression fragment | ||||
| associated with the format "%d" is "([-+]?\d+)", and the handler | ||||
| associated with it is a wrapper around String#to_i. scanf itself calls | ||||
| FormatString#match, passing in the input string. FormatString#match | ||||
| iterates through its FormatSpecifiers; for each one, it matches the | ||||
| corresponding regular expression fragment against the string. If | ||||
| there's a match, it sends the matched string to the handler associated | ||||
| with the FormatSpecifier. | ||||
| 
 | ||||
| Thus, to follow up the "%d" example: if "123" occurs in the input | ||||
| string when a FormatSpecifier consisting of "%d" is reached, the "123" | ||||
| will be matched against "([-+]?\d+)", and the matched string will be | ||||
| rendered into an integer by a call to to_i. | ||||
| 
 | ||||
| The rendered match is then saved to an accumulator array, and the | ||||
| input string is reduced to the post-match substring. Thus the string | ||||
| is "eaten" from the left as the FormatSpecifiers are applied in | ||||
| sequence.  (This is done to a duplicate string; the original string is | ||||
| not altered.) | ||||
| 
 | ||||
| As soon as a regular expression fragment fails to match the string, or | ||||
| when the FormatString object runs out of FormatSpecifiers, scanning | ||||
| stops and results accumulated so far are returned in an array. | ||||
| 
 | ||||
| ==License and copyright | ||||
| 
 | ||||
| Copyright:: (c) 2002-2003 David Alan Black | ||||
| License:: Distributed on the same licensing terms as Ruby itself | ||||
| 
 | ||||
| ==Warranty disclaimer | ||||
| 
 | ||||
| This software is provided "as is" and without any express or implied | ||||
| warranties, including, without limitation, the implied warranties of | ||||
| merchantibility and fitness for a particular purpose. | ||||
| 
 | ||||
| ==Credits and acknowledgements | ||||
| 
 | ||||
| scanf for Ruby was developed as the major activity of the Austin | ||||
| Ruby Codefest (Austin, Texas, August 2002). | ||||
| 
 | ||||
| Principal author:: David Alan Black (mailto:dblack@superlink.net) | ||||
| Co-author:: Hal Fulton (mailto:hal9000@hypermetrics.com) | ||||
| Project contributors:: Nolan Darilek, Jason Johnston | ||||
| 
 | ||||
| Thanks to Hal Fulton for hosting the Codefest. | ||||
| 
 | ||||
| Thanks to Matz for suggestions about the class design.   | ||||
| 
 | ||||
| Thanks to Gavin Sinclair for some feedback on the documentation. | ||||
| 
 | ||||
| The text for parts of this document, especially the Description and | ||||
| Conversions sections, above, were adapted from the Linux Programmer's | ||||
| Manual manpage for scanf(3), dated 1995-11-01. | ||||
| 
 | ||||
| ==Bugs and bug reports | ||||
| 
 | ||||
| scanf for Ruby is based on something of an amalgam of C scanf | ||||
| implementations and documentation, rather than on a single canonical | ||||
| description. Suggestions for features and behaviors which appear in | ||||
| other scanfs, and would be meaningful in Ruby, are welcome, as are | ||||
| reports of suspicious behaviors and/or bugs. (Please see "Credits and | ||||
| acknowledgements", above, for email addresses.) | ||||
| 
 | ||||
| =end | ||||
| 
 | ||||
| module Scanf | ||||
| 
 | ||||
|   class FormatSpecifier | ||||
| 
 | ||||
|     attr_reader :re_string, :matched_string, :conversion | ||||
|     attr_writer :i | ||||
| 
 | ||||
|     private | ||||
| 
 | ||||
|     def skip;  /^\s*%\*/.match(@spec_string); end | ||||
| 
 | ||||
|     def extract_float(s); s.to_f if s &&! skip; end | ||||
|     def extract_decimal(s); s.to_i if s &&! skip; end | ||||
|     def extract_hex(s); s.hex if s &&! skip; end | ||||
|     def extract_octal(s); s.oct if s &&! skip; end | ||||
|     def extract_integer(s); Integer(s) if s &&! skip; end | ||||
|     def extract_plain(s); s unless skip; end | ||||
| 
 | ||||
|     def nil_proc(s); nil; end | ||||
| 
 | ||||
|     public | ||||
| 
 | ||||
|     def to_s | ||||
|       @spec_string | ||||
|     end | ||||
| 
 | ||||
|     def count_space? | ||||
|       /(?:\A|\S)%\*?\d*c|\[/.match(@spec_string) | ||||
|     end | ||||
| 
 | ||||
|     def initialize(str) | ||||
|       @spec_string = str | ||||
| 
 | ||||
|       h = '[A-Fa-f0-9]' | ||||
| 
 | ||||
|       @re_string, @handler =  | ||||
|         case @spec_string | ||||
| 
 | ||||
|           # %[[:...:]] | ||||
|         when /%\*?(\[\[:[a-z]+:\]\])/ | ||||
|           [ "(#{$1}+)", :extract_plain ] | ||||
| 
 | ||||
|           # %5[[:...:]] | ||||
|         when /%\*?(\d+)(\[\[:[a-z]+:\]\])/ | ||||
|           [ "(#{$2}{1,#{$1}})", :extract_plain ] | ||||
| 
 | ||||
|           # %[...] | ||||
|         when /%\*?\[([^\]]*)\]/ | ||||
|           yes = $1 | ||||
|           if /^\^/.match(yes) then no = yes[1..-1] else no = '^' + yes end | ||||
|           [ "([#{yes}]+)(?=[#{no}]|\\z)", :extract_plain ] | ||||
| 
 | ||||
|           # %5[...] | ||||
|         when /%\*?(\d+)\[([^\]]*)\]/ | ||||
|           yes = $2 | ||||
|           w = $1 | ||||
|           [ "([#{yes}]{1,#{w}})", :extract_plain ] | ||||
| 
 | ||||
|           # %i | ||||
|         when /%\*?i/ | ||||
|           [ "([-+]?(?:(?:0[0-7]+)|(?:0[Xx]#{h}+)|(?:[1-9]\\d+)))", :extract_integer ] | ||||
| 
 | ||||
|           # %5i | ||||
|         when /%\*?(\d+)i/ | ||||
|           n = $1.to_i | ||||
|           s = "(" | ||||
|           if n > 1 then s += "[1-9]\\d{1,#{n-1}}|" end | ||||
|           if n > 1 then s += "0[0-7]{1,#{n-1}}|" end | ||||
|           if n > 2 then s += "[-+]0[0-7]{1,#{n-2}}|" end | ||||
|           if n > 2 then s += "[-+][1-9]\\d{1,#{n-2}}|" end | ||||
|           if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end | ||||
|           if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end | ||||
|           s += "\\d" | ||||
|           s += ")" | ||||
|           [ s, :extract_integer ] | ||||
| 
 | ||||
|           # %d, %u | ||||
|         when /%\*?[du]/ | ||||
|           [ '([-+]?\d+)', :extract_decimal ] | ||||
| 
 | ||||
|           # %5d, %5u | ||||
|         when /%\*?(\d+)[du]/ | ||||
|           n = $1.to_i | ||||
|           s = "(" | ||||
|           if n > 1 then s += "[-+]\\d{1,#{n-1}}|" end | ||||
|           s += "\\d{1,#{$1}})" | ||||
|           [ s, :extract_decimal ] | ||||
| 
 | ||||
|           # %x | ||||
|         when /%\*?[Xx]/ | ||||
|           [ "([-+]?(?:0[Xx])?#{h}+)", :extract_hex ] | ||||
| 
 | ||||
|           # %5x | ||||
|         when /%\*?(\d+)[Xx]/ | ||||
|           n = $1.to_i | ||||
|           s = "(" | ||||
|           if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end | ||||
|           if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end | ||||
|           if n > 1 then s += "[-+]#{h}{1,#{n-1}}|" end | ||||
|           s += "#{h}{1,#{n}}" | ||||
|           s += ")" | ||||
|           [ s, :extract_hex ] | ||||
| 
 | ||||
|           # %o | ||||
|         when /%\*?o/ | ||||
|           [ '([-+]?[0-7]+)', :extract_octal ] | ||||
| 
 | ||||
|           # %5o | ||||
|         when /%\*?(\d+)o/ | ||||
|           [ "([-+][0-7]{1,#{$1.to_i-1}}|[0-7]{1,#{$1}})", :extract_octal ] | ||||
| 
 | ||||
|           # %f | ||||
|         when /%\*?f/ | ||||
|           [ '([-+]?((\d+(?>(?=[^\d.]|$)))|(\d*(\.(\d*([eE][-+]?\d+)?)))))', :extract_float ] | ||||
| 
 | ||||
|           # %5f | ||||
|         when /%\*?(\d+)f/ | ||||
|           [ "(\\S{1,#{$1}})", :extract_float ] | ||||
| 
 | ||||
|           # %5s | ||||
|         when /%\*?(\d+)s/ | ||||
|           [ "(\\S{1,#{$1}})", :extract_plain ] | ||||
| 
 | ||||
|           # %s | ||||
|         when /%\*?s/ | ||||
|           [ '(\S+)', :extract_plain ] | ||||
| 
 | ||||
|           # %c | ||||
|         when /\s%\*?c/ | ||||
|           [ "\\s*(.)", :extract_plain ] | ||||
| 
 | ||||
|           # %c | ||||
|         when /%\*?c/ | ||||
|           [ "(.)", :extract_plain ] | ||||
| 
 | ||||
|           # %5c (whitespace issues are handled by the count_*_space? methods) | ||||
|         when /%\*?(\d+)c/ | ||||
|           [ "(.{1,#{$1}})", :extract_plain ] | ||||
| 
 | ||||
|           # %% | ||||
|         when /%%/ | ||||
|           [ '(\s*%)', :nil_proc ] | ||||
| 
 | ||||
|           # literal characters | ||||
|         else | ||||
|           [ "(#{Regexp.escape(@spec_string)})", :nil_proc ] | ||||
|         end | ||||
| 
 | ||||
|       @re_string = '\A' + @re_string | ||||
|     end | ||||
| 
 | ||||
|     def to_re | ||||
|       Regexp.new(@re_string,Regexp::MULTILINE) | ||||
|     end | ||||
| 
 | ||||
|     def match(str) | ||||
|       s = str.dup | ||||
|       s.sub!(/\A\s+/,'') unless count_space? | ||||
|       res = to_re.match(s) | ||||
|       if res | ||||
|         @conversion = send(@handler, res[1]) | ||||
|         @matched_string = @matched_item.to_s | ||||
|       end | ||||
|       res | ||||
|     end | ||||
| 
 | ||||
|     def letter | ||||
|       /%\*?\d*([a-z\[])/.match(@spec_string).to_a[1] | ||||
|     end | ||||
| 
 | ||||
|     def width | ||||
|       w = /%\*?(\d+)/.match(@spec_string).to_a[1] | ||||
|       w && w.to_i || 0 | ||||
|     end | ||||
| 
 | ||||
|     def mid_match? | ||||
|       cc_no_width    =   letter == '[' && width.zero? | ||||
|       c_or_cc_width  =   (letter == 'c' || letter == '[') &&! width.zero? | ||||
|       c_or_cc_open   =   c_or_cc_width && (matched_string.size < width) | ||||
|        | ||||
|       return c_or_cc_open || cc_no_width | ||||
|     end | ||||
|      | ||||
|   end | ||||
| 
 | ||||
|   class FormatString | ||||
| 
 | ||||
|     attr_reader :string_left, :last_spec_tried, :last_match_tried, :matched_count, :space | ||||
| 
 | ||||
|     SPECIFIERS = 'diuXxofeEgsc' | ||||
|     REGEX = / | ||||
|         # possible space, followed by... | ||||
|           (?:\s* | ||||
|           # percent sign, followed by... | ||||
|             % | ||||
|             # another percent sign, or... | ||||
|               (?:%| | ||||
|         	 # optional assignment suppression flag | ||||
|         	 \*? | ||||
|         	 # optional maximum field width | ||||
|         	 \d* | ||||
|         	   # named character class, ... | ||||
|         	   (?:\[\[:\w+:\]\]| | ||||
|         	   # traditional character class, or... | ||||
|         	      \[[^\]]*\]| | ||||
|         	   # specifier letter. | ||||
|         	      [#{SPECIFIERS}])))| | ||||
|             # or miscellaneous characters | ||||
|               [^%\s]+/ix | ||||
| 
 | ||||
|     def initialize(str) | ||||
|       @specs = [] | ||||
|       s = str.to_s | ||||
|       return unless /\S/.match(s) | ||||
|       @space = true if /\s\z/.match(s) | ||||
|       @specs.replace s.scan(REGEX).map {|spec| FormatSpecifier.new(spec) } | ||||
|     end | ||||
| 
 | ||||
|     def to_s | ||||
|       @spec_string | ||||
|     end | ||||
| 
 | ||||
|     def prune(n=matched_count) | ||||
|       n.times { @specs.shift } | ||||
|     end | ||||
| 
 | ||||
|     def spec_count | ||||
|       @specs.size | ||||
|     end | ||||
| 
 | ||||
|     def last_spec | ||||
|       @i == spec_count - 1 | ||||
|     end | ||||
| 
 | ||||
|     def match(str) | ||||
|       accum = [] | ||||
|       @string_left = str | ||||
|       @matched_count = 0 | ||||
| 
 | ||||
|       @specs.each_with_index do |spec,@i| | ||||
|         @last_spec_tried = spec | ||||
|         @last_match_tried = spec.match(@string_left) | ||||
|         break unless @last_match_tried | ||||
|         @matched_count += 1 | ||||
| 
 | ||||
|         accum << spec.conversion | ||||
| 
 | ||||
|         @string_left = @last_match_tried.post_match | ||||
|         break if @string_left.empty? | ||||
|       end | ||||
|       return accum.compact | ||||
|     end | ||||
|   end | ||||
| end | ||||
| 
 | ||||
| class IO | ||||
| 
 | ||||
| # The trick here is doing a match where you grab one *line* | ||||
| # of input at a time.  The linebreak may or may not occur | ||||
| # at the boundary where the string matches a format specifier. | ||||
| # And if it does, some rule about whitespace may or may not | ||||
| # be in effect... | ||||
| # | ||||
| # That's why this is much more elaborate than the string | ||||
| # version. | ||||
| # | ||||
| # Match succeeds (non-emptily) | ||||
| # and the last attempted spec/string sub-match succeeded: | ||||
| # | ||||
| # is the current matched spec a '%[...]' or '%c' with a width? | ||||
| #   yes: is current.string.size < available width? | ||||
| #     yes: save interim results | ||||
| #     no: width is used up, so move on (next) | ||||
| #   no: is it a '%[...]' with no width? | ||||
| #     yes: evidently nothing violated it yet, so store | ||||
| #          interim results and continue (next) | ||||
| # | ||||
| # The last attempted spec/string did not match: | ||||
| # | ||||
| # are we on the next-to-last spec in the string? | ||||
| #   yes: | ||||
| #     is fmt_string.string_left all spaces? | ||||
| #       yes: does current spec care about input space? | ||||
| #         yes: fatal failure | ||||
| #         no: save interim results and continue | ||||
| #   no: continue  [this state could be analyzed further] | ||||
| # | ||||
| # | ||||
|   def scanf(str,&b) | ||||
|     return block_scanf(str,&b) if b | ||||
|     return [] unless str.size > 0 | ||||
| 
 | ||||
|     start_position = pos | ||||
|     matched_so_far = 0 | ||||
|     source_buffer = "" | ||||
|     result_buffer = [] | ||||
|     final_result = [] | ||||
| 
 | ||||
|     fstr = Scanf::FormatString.new(str) | ||||
| 
 | ||||
|     loop do | ||||
|       if eof | ||||
|         final_result.concat(result_buffer) | ||||
|         break | ||||
|       end | ||||
| 
 | ||||
|       source_buffer << gets | ||||
|       current_match = fstr.match(source_buffer) | ||||
| 
 | ||||
|       spec = fstr.last_spec_tried | ||||
| 
 | ||||
|       if fstr.last_match_tried | ||||
|         if spec.mid_match? | ||||
|           result_buffer.replace(current_match) | ||||
|           next | ||||
|         end | ||||
|       elsif (fstr.matched_count == fstr.spec_count - 1) | ||||
|         if /\A\s*\z/.match(fstr.string_left) | ||||
|           break if spec.count_space? | ||||
|           result_buffer.replace(current_match) | ||||
|           next | ||||
|         end | ||||
|       end | ||||
| 
 | ||||
|       final_result.concat(current_match) | ||||
| 
 | ||||
|       matched_so_far += source_buffer.size | ||||
|       source_buffer.replace(fstr.string_left) | ||||
|       matched_so_far -= source_buffer.size | ||||
|       break if fstr.last_spec | ||||
|       fstr.prune | ||||
|     end | ||||
|      | ||||
|     seek(start_position + matched_so_far, IO::SEEK_SET) rescue Errno::ESPIPE | ||||
|     soak_up_spaces if fstr.last_spec && fstr.space | ||||
|      | ||||
|     return final_result | ||||
|   end | ||||
| 
 | ||||
|   private | ||||
| 
 | ||||
|   def soak_up_spaces | ||||
|     c = getc | ||||
|     ungetc(c) if c | ||||
|     until eof ||! c || /\S/.match(c.chr) | ||||
|       c = getc | ||||
|     end | ||||
|     ungetc(c) if c | ||||
|   end | ||||
| 
 | ||||
|   def block_scanf(str) | ||||
|     final = [] | ||||
|     begin | ||||
|       current = scanf(str) | ||||
|       final.push(yield(current)) unless current.empty? | ||||
|     end until current.empty? || eof | ||||
|     return final | ||||
|   end | ||||
| end | ||||
| 
 | ||||
| class String | ||||
| 
 | ||||
|   def scanf(fstr,&b) | ||||
|     if b | ||||
|       block_scanf(fstr,&b) | ||||
|     else | ||||
|       fs =  | ||||
|         if fstr.is_a? Scanf::FormatString | ||||
|           fstr  | ||||
|         else  | ||||
|           Scanf::FormatString.new(fstr) | ||||
|         end | ||||
|       fs.match(self) | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def block_scanf(fstr,&b) | ||||
|     fs = Scanf::FormatString.new(fstr) | ||||
|     str = self.dup | ||||
|     final = [] | ||||
|     begin | ||||
|       current = str.scanf(fs) | ||||
|       final.push(yield(current)) unless current.empty? | ||||
|       str = fs.string_left | ||||
|     end until current.empty? || str.empty? | ||||
|     return final | ||||
|   end | ||||
| end | ||||
| 
 | ||||
| module Kernel | ||||
|   private | ||||
|   def scanf(fs) | ||||
|     STDIN.scanf(fs) | ||||
|   end | ||||
| end | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 dblack
						dblack