mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	* include/ruby/encoding.h (rb_econv_output): declared.
* transcode_data.h (rb_transcoder): add resetsize_func field. * enc/trans/iso2022.trans (iso2022jp_reset_sequence_size): defined. (rb_EUC_JP_to_ISO_2022_JP): provede resetsize_func. * tool/transcode-tblgen.rb: set NULL for resetsize_func. * transcode.c (rb_econv_output): new function for inserting output. (output_replacement_character): use rb_econv_output. (transcode_loop): check return value of output_replacement_character. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18628 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									e0e39e0db8
								
							
						
					
					
						commit
						74a36d5d1f
					
				
					 6 changed files with 114 additions and 49 deletions
				
			
		
							
								
								
									
										16
									
								
								ChangeLog
									
										
									
									
									
								
							
							
						
						
									
										16
									
								
								ChangeLog
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1,3 +1,19 @@
 | 
			
		|||
Fri Aug 15 00:52:40 2008  Tanaka Akira  <akr@fsij.org>
 | 
			
		||||
 | 
			
		||||
	* include/ruby/encoding.h (rb_econv_output): declared.
 | 
			
		||||
 | 
			
		||||
	* transcode_data.h (rb_transcoder): add resetsize_func field.
 | 
			
		||||
 | 
			
		||||
	* enc/trans/iso2022.trans (iso2022jp_reset_sequence_size): defined.
 | 
			
		||||
	  (rb_EUC_JP_to_ISO_2022_JP): provede resetsize_func.
 | 
			
		||||
 | 
			
		||||
	* tool/transcode-tblgen.rb: set NULL for resetsize_func.
 | 
			
		||||
 | 
			
		||||
	* transcode.c (rb_econv_output): new function for inserting output.
 | 
			
		||||
	  (output_replacement_character): use rb_econv_output.
 | 
			
		||||
	  (transcode_loop): check return value of
 | 
			
		||||
	  output_replacement_character.
 | 
			
		||||
 | 
			
		||||
Thu Aug 14 23:47:21 2008  Tanaka Akira  <akr@fsij.org>
 | 
			
		||||
 | 
			
		||||
	* include/ruby/encoding.h (ECONV_UNIVERSAL_NEWLINE_DECODER): defined.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -83,7 +83,7 @@ fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, u
 | 
			
		|||
 | 
			
		||||
    if (t->stateful[0] == 0) {
 | 
			
		||||
        t->stateful[0] = 1; /* initialized flag */
 | 
			
		||||
        t->stateful[1] = 1; /* ASCII mode */
 | 
			
		||||
        t->stateful[1] = 1; /* G0 = ASCII */
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (l != t->stateful[1]) {
 | 
			
		||||
| 
						 | 
				
			
			@ -91,13 +91,13 @@ fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, u
 | 
			
		|||
            *o++ = 0x1b;
 | 
			
		||||
            *o++ = '(';
 | 
			
		||||
            *o++ = 'B';
 | 
			
		||||
            t->stateful[1] = 1;
 | 
			
		||||
            t->stateful[1] = 1; /* G0 = ASCII */
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            *o++ = 0x1b;
 | 
			
		||||
            *o++ = '$';
 | 
			
		||||
            *o++ = 'B';
 | 
			
		||||
            t->stateful[1] = 2;
 | 
			
		||||
            t->stateful[1] = 2; /* G0 = JIS X 0208 1983 */
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -112,6 +112,14 @@ fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, u
 | 
			
		|||
    return o - output0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
iso2022jp_reset_sequence_size(rb_transcoding *t)
 | 
			
		||||
{
 | 
			
		||||
    if (t->stateful[1] == 2)
 | 
			
		||||
        return 3;
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -137,7 +145,8 @@ rb_EUC_JP_to_ISO_2022_JP = {
 | 
			
		|||
    3, /* max_input */
 | 
			
		||||
    5, /* max_output */
 | 
			
		||||
    NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp,
 | 
			
		||||
    finish_eucjp_to_iso2022jp, finish_eucjp_to_iso2022jp
 | 
			
		||||
    finish_eucjp_to_iso2022jp,
 | 
			
		||||
    iso2022jp_reset_sequence_size, finish_eucjp_to_iso2022jp
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -230,11 +230,15 @@ typedef struct {
 | 
			
		|||
    rb_encoding *destination_encoding;
 | 
			
		||||
} rb_econv_t;
 | 
			
		||||
 | 
			
		||||
rb_econv_t *rb_econv_open(const char *from, const char *to, int flags);
 | 
			
		||||
rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int flags);
 | 
			
		||||
rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
 | 
			
		||||
    const unsigned char **input_ptr, const unsigned char *input_stop,
 | 
			
		||||
    unsigned char **output_ptr, unsigned char *output_stop,
 | 
			
		||||
    const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
 | 
			
		||||
    unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
 | 
			
		||||
    int flags);
 | 
			
		||||
int rb_econv_output(rb_econv_t *ec,
 | 
			
		||||
    const unsigned char *str, size_t len,
 | 
			
		||||
    unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
 | 
			
		||||
    size_t *required_size);
 | 
			
		||||
void rb_econv_close(rb_econv_t *ec);
 | 
			
		||||
 | 
			
		||||
/* flags for rb_econv_open */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -449,7 +449,8 @@ static const rb_transcoder
 | 
			
		|||
    #{input_unit_length}, /* input_unit_length */
 | 
			
		||||
    #{max_input}, /* max_input */
 | 
			
		||||
    #{max_output}, /* max_output */
 | 
			
		||||
    NULL, NULL, NULL, NULL, NULL, NULL
 | 
			
		||||
    NULL, NULL, NULL, NULL,
 | 
			
		||||
    NULL, NULL, NULL
 | 
			
		||||
};
 | 
			
		||||
End
 | 
			
		||||
  tree_code + "\n" + transcoder_code
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										106
									
								
								transcode.c
									
										
									
									
									
								
							
							
						
						
									
										106
									
								
								transcode.c
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -937,6 +937,58 @@ rb_econv_convert(rb_econv_t *ec,
 | 
			
		|||
    return res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int
 | 
			
		||||
rb_econv_output(rb_econv_t *ec,
 | 
			
		||||
    const unsigned char *str, size_t len, /* string in destination encoding */
 | 
			
		||||
    unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
 | 
			
		||||
    size_t *required_size)
 | 
			
		||||
{
 | 
			
		||||
    size_t reset_len, total_len;
 | 
			
		||||
    rb_transcoding *tc = ec->last_tc;
 | 
			
		||||
    const rb_transcoder *tr = tc->transcoder;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * Assumption for stateful encoding:
 | 
			
		||||
     *
 | 
			
		||||
     * - str can be output on resetted state and doesn't change the state.
 | 
			
		||||
     * - it is acceptable that extra state changing sequence if str contains
 | 
			
		||||
     *   a state changing sequence.
 | 
			
		||||
     *
 | 
			
		||||
     * Currently the replacement character for stateful encoding such as
 | 
			
		||||
     * ISO-2022-JP is "?" and it has no state changing sequence.
 | 
			
		||||
     * So the extra state changing sequence don't occur when
 | 
			
		||||
     * rb_econv_output is used for replacement characters.
 | 
			
		||||
     *
 | 
			
		||||
     * Thease assumption may be removed in future.
 | 
			
		||||
     * It needs to scan str to check state changing sequences in it.
 | 
			
		||||
     */
 | 
			
		||||
 | 
			
		||||
    reset_len = 0;
 | 
			
		||||
    if (tr->resetsize_func) {
 | 
			
		||||
        reset_len = tr->resetsize_func(tc);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    total_len = reset_len + len;
 | 
			
		||||
    if (total_len < len)
 | 
			
		||||
        return -1;
 | 
			
		||||
 | 
			
		||||
    if (required_size) {
 | 
			
		||||
        *required_size = total_len;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (destination_buffer_end - *destination_buffer_ptr < total_len)
 | 
			
		||||
        return -1;
 | 
			
		||||
 | 
			
		||||
    if (reset_len) {
 | 
			
		||||
        *destination_buffer_ptr += tr->resetstate_func(tc, *destination_buffer_ptr);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    memcpy(*destination_buffer_ptr, str, len);
 | 
			
		||||
    *destination_buffer_ptr += len;
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
rb_econv_close(rb_econv_t *ec)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -968,58 +1020,40 @@ more_output_buffer(
 | 
			
		|||
    *out_stop_ptr = *out_start_ptr + new_len;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
static int
 | 
			
		||||
output_replacement_character(
 | 
			
		||||
        VALUE destination,
 | 
			
		||||
        unsigned char *(*resize_destination)(VALUE, int, int),
 | 
			
		||||
        rb_transcoding *tc,
 | 
			
		||||
        rb_econv_t *ec,
 | 
			
		||||
        unsigned char **out_start_ptr,
 | 
			
		||||
        unsigned char **out_pos,
 | 
			
		||||
        unsigned char **out_stop_ptr)
 | 
			
		||||
 | 
			
		||||
{
 | 
			
		||||
    rb_transcoding *tc = ec->last_tc;
 | 
			
		||||
    const rb_transcoder *tr;
 | 
			
		||||
    int max_output;
 | 
			
		||||
    rb_encoding *enc;
 | 
			
		||||
    const char *replacement;
 | 
			
		||||
    const unsigned char *replacement;
 | 
			
		||||
    int len;
 | 
			
		||||
    size_t required_size;
 | 
			
		||||
 | 
			
		||||
    tr = tc->transcoder;
 | 
			
		||||
    max_output = tr->max_output;
 | 
			
		||||
    enc = rb_enc_find(tr->to_encoding);
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * Assumption for stateful encoding:
 | 
			
		||||
     *
 | 
			
		||||
     * - The replacement character can be output on resetted state and doesn't
 | 
			
		||||
     *   change the state.
 | 
			
		||||
     * - it is acceptable that extra state changing sequence if the replacement
 | 
			
		||||
     *   character contains a state changing sequence.
 | 
			
		||||
     *
 | 
			
		||||
     * Currently the replacement character for stateful encoding such as
 | 
			
		||||
     * ISO-2022-JP is "?" and it has no state changing sequence.
 | 
			
		||||
     * So the extra state changing sequence don't occur.
 | 
			
		||||
     *
 | 
			
		||||
     * Thease assumption may be removed in future.
 | 
			
		||||
     * It needs to scan the replacement character to check
 | 
			
		||||
     * state changing sequences in the replacement character.
 | 
			
		||||
     */
 | 
			
		||||
    replacement = (const unsigned char *)get_replacement_character(enc, &len);
 | 
			
		||||
 | 
			
		||||
    if (tr->resetstate_func) {
 | 
			
		||||
        if (*out_stop_ptr - *out_pos < max_output)
 | 
			
		||||
            more_output_buffer(destination, resize_destination, max_output, out_start_ptr, out_pos, out_stop_ptr);
 | 
			
		||||
        *out_pos += tr->resetstate_func(tc, *out_pos);
 | 
			
		||||
    }
 | 
			
		||||
    if (rb_econv_output(ec, replacement, len, out_pos, *out_stop_ptr, &required_size) == 0)
 | 
			
		||||
        return 0;
 | 
			
		||||
 | 
			
		||||
    if (*out_stop_ptr - *out_pos < max_output)
 | 
			
		||||
        more_output_buffer(destination, resize_destination, max_output, out_start_ptr, out_pos, out_stop_ptr);
 | 
			
		||||
    if (required_size < len)
 | 
			
		||||
        return -1; /* overflow */
 | 
			
		||||
 | 
			
		||||
    replacement = get_replacement_character(enc, &len);
 | 
			
		||||
    more_output_buffer(destination, resize_destination, required_size, out_start_ptr, out_pos, out_stop_ptr);
 | 
			
		||||
 | 
			
		||||
    memcpy(*out_pos, replacement, len);
 | 
			
		||||
    if (rb_econv_output(ec, replacement, len, out_pos, *out_stop_ptr, &required_size) == 0)
 | 
			
		||||
        return 0;
 | 
			
		||||
 | 
			
		||||
    *out_pos += len;
 | 
			
		||||
    return;
 | 
			
		||||
    return -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if 1
 | 
			
		||||
| 
						 | 
				
			
			@ -1054,7 +1088,7 @@ resume:
 | 
			
		|||
            goto resume;
 | 
			
		||||
	}
 | 
			
		||||
	else if (opt&INVALID_REPLACE) {
 | 
			
		||||
	    output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
 | 
			
		||||
	    if (output_replacement_character(destination, resize_destination, ec, &out_start, out_pos, &out_stop) == 0)
 | 
			
		||||
                goto resume;
 | 
			
		||||
	}
 | 
			
		||||
        rb_econv_close(ec);
 | 
			
		||||
| 
						 | 
				
			
			@ -1068,7 +1102,7 @@ resume:
 | 
			
		|||
	    goto resume;
 | 
			
		||||
	}
 | 
			
		||||
	else if (opt&UNDEF_REPLACE) {
 | 
			
		||||
	    output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
 | 
			
		||||
	    if (output_replacement_character(destination, resize_destination, ec, &out_start, out_pos, &out_stop) == 0)
 | 
			
		||||
                goto resume;
 | 
			
		||||
	}
 | 
			
		||||
        rb_econv_close(ec);
 | 
			
		||||
| 
						 | 
				
			
			@ -1135,7 +1169,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
 | 
			
		|||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            else if (opt&INVALID_REPLACE) {
 | 
			
		||||
                output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
 | 
			
		||||
                if (output_replacement_character(destination, resize_destination, ec, &out_start, out_pos, &out_stop) == 0)
 | 
			
		||||
                    break;
 | 
			
		||||
            }
 | 
			
		||||
            rb_econv_close(ec);
 | 
			
		||||
| 
						 | 
				
			
			@ -1150,7 +1184,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
 | 
			
		|||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            else if (opt&UNDEF_REPLACE) {
 | 
			
		||||
                output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
 | 
			
		||||
                if (output_replacement_character(destination, resize_destination, ec, &out_start, out_pos, &out_stop) == 0)
 | 
			
		||||
                    break;
 | 
			
		||||
            }
 | 
			
		||||
            rb_econv_close(ec);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -107,8 +107,9 @@ struct rb_transcoder {
 | 
			
		|||
    VALUE (*func_si)(rb_transcoding*, const unsigned char*, size_t); /* start -> info   */
 | 
			
		||||
    int (*func_io)(rb_transcoding*, VALUE, const unsigned char*); /* info  -> output */
 | 
			
		||||
    int (*func_so)(rb_transcoding*, const unsigned char*, size_t, unsigned char*); /* start -> output */
 | 
			
		||||
    int (*resetstate_func)(rb_transcoding*, unsigned char*); /* -> output */
 | 
			
		||||
    int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */
 | 
			
		||||
    int (*resetsize_func)(rb_transcoding*); /* -> len */
 | 
			
		||||
    int (*resetstate_func)(rb_transcoding*, unsigned char*); /* -> output */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue