1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* follow original nkf.c 1.40.(improve mime encode/decode support)

* add test for mime encode/decode


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7327 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2004-11-19 20:38:24 +00:00
parent 77a23fba35
commit 6caa49e45f
2 changed files with 412 additions and 82 deletions

View file

@ -39,9 +39,9 @@
** E-Mail: furukawa@tcp-ip.or.jp
** $B$^$G8fO"Mm$r$*4j$$$7$^$9!#(B
***********************************************************************/
/* $NKF_Id: nkf.c,v 1.38 2004/11/09 13:08:39 naruse Exp $ */
/* $Id$ */
#define NKF_VERSION "2.0.4"
#define NKF_RELEASE_DATE "2004-11-09"
#define NKF_RELEASE_DATE "2004-11-15"
#include "config.h"
static char *CopyRight =
@ -110,6 +110,8 @@ static char *CopyRight =
#include <stdio.h>
#endif
#include <stdlib.h>
#if defined(MSDOS) || defined(__OS2__)
#include <fcntl.h>
#include <io.h>
@ -142,7 +144,6 @@ static char *CopyRight =
#ifdef OVERWRITE
/* added by satoru@isoternet.org */
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#ifndef MSDOS /* UNIX, OS/2 */
@ -206,6 +207,7 @@ static char *CopyRight =
/* ASCII CODE */
#define BS 0x08
#define TAB 0x09
#define NL 0x0a
#define CR 0x0d
#define ESC 0x1b
@ -216,6 +218,7 @@ static char *CopyRight =
#define SI 0x0f
#define SO 0x0e
#define SSO 0x8e
#define SS3 0x8f
#define is_alnum(c) \
(('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
@ -2066,6 +2069,32 @@ kanji_convert(f)
} else if ((c1 == NL || c1 == CR) && broken_f&4) {
input_mode = ASCII; set_iconv(FALSE, 0);
SEND;
} else if (c1 == NL && mime_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
c1 = NL;
SEND;
} else if (c1 == CR && mime_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF) {
if (c1==SPACE) {
i_ungetc(SPACE,f);
continue;
} else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
i_ungetc(NL,f);
} else {
i_ungetc(c1,f);
}
c1 = CR;
SEND;
} else
SEND;
}
@ -2716,11 +2745,12 @@ base64_conv(c2, c1)
c1;
{
if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
(*o_putc)(EOF);
(*o_putc)(NL);
} else if (base64_count>66 && mimeout_mode) {
(*o_base64conv)(EOF,0);
(*o_putc)(NL);
(*o_putc)('\t'); base64_count += 7;
(*o_base64conv)(NL,0);
(*o_base64conv)(SPACE,0);
}
(*o_base64conv)(c2,c1);
}
@ -3567,6 +3597,10 @@ FILE *f;
{
int c1, c2, c3, c4, cc;
int t1, t2, t3, t4, mode, exit_mode;
int lwsp_count;
char *lwsp_buf;
char *lwsp_buf_new;
int lwsp_size = 128;
if (mime_top != mime_last) { /* Something is in FIFO */
return Fifo(mime_top++);
@ -3595,8 +3629,69 @@ restart_mime_q:
if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
/* end Q encoding */
input_mode = exit_mode;
while((c1=(*i_getc)(f))!=EOF && c1==SPACE
/* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
lwsp_count = 0;
lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
if (lwsp_buf==NULL) {
perror("can't malloc");
return -1;
}
while ((c1=(*i_getc)(f))!=EOF) {
switch (c1) {
case NL:
case CR:
if (c1==NL) {
if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
c1 = NL;
} else {
if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
i_ungetc(NL,f);
} else {
i_ungetc(c1,f);
}
c1 = CR;
}
break;
case SPACE:
case TAB:
lwsp_buf[lwsp_count] = c1;
if (lwsp_count++>lwsp_size){
lwsp_size *= 2;
lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
if (lwsp_buf_new==NULL) {
free(lwsp_buf);
lwsp_buf = NULL;
perror("can't realloc");
return -1;
}
lwsp_buf = lwsp_buf_new;
}
continue;
}
break;
}
if (lwsp_count > 0) {
if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
lwsp_count = 0;
} else {
i_ungetc(c1,f);
for(lwsp_count--;lwsp_count>0;lwsp_count--)
i_ungetc(lwsp_buf[lwsp_count],f);
c1 = lwsp_buf[0];
}
}
free(lwsp_buf);
lwsp_buf = NULL;
return c1;
}
if (c1=='='&&c2<' ') { /* this is soft wrap */
@ -3650,8 +3745,72 @@ mime_c2_retry:
}
if ((c1 == '?') && (c2 == '=')) {
input_mode = ASCII;
while((c1=(*i_getc)(f))!=EOF && c1==SPACE
/* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
lwsp_count = 0;
lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
if (lwsp_buf==NULL) {
perror("can't malloc");
return -1;
}
while ((c1=(*i_getc)(f))!=EOF) {
switch (c1) {
case NL:
case CR:
if (c1==NL) {
if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
c1 = NL;
} else {
if ((c1=(*i_getc)(f))!=EOF) {
if (c1==SPACE) {
i_ungetc(SPACE,f);
continue;
} else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
i_ungetc(SPACE,f);
continue;
} else {
i_ungetc(c1,f);
}
i_ungetc(NL,f);
} else {
i_ungetc(c1,f);
}
c1 = CR;
}
break;
case SPACE:
case TAB:
lwsp_buf[lwsp_count] = c1;
if (lwsp_count++>lwsp_size){
lwsp_size *= 2;
lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
if (lwsp_buf_new==NULL) {
free(lwsp_buf);
lwsp_buf = NULL;
perror("can't realloc");
return -1;
}
lwsp_buf = lwsp_buf_new;
}
continue;
}
break;
}
if (lwsp_count > 0) {
if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
lwsp_count = 0;
} else {
i_ungetc(c1,f);
for(lwsp_count--;lwsp_count>0;lwsp_count--)
i_ungetc(lwsp_buf[lwsp_count],f);
c1 = lwsp_buf[0];
}
}
free(lwsp_buf);
lwsp_buf = NULL;
return c1;
}
mime_c3_retry:
@ -3769,6 +3928,11 @@ static char basis_64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static int b64c;
#define MIMEOUT_BUF_LENGTH (60)
char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
int mimeout_buf_count = 0;
int mimeout_preserve_space = 0;
#define itoh4(c) (c>=10?c+'A'-10:c+'0')
void
open_mime(mode)
@ -3776,6 +3940,7 @@ int mode;
{
unsigned char *p;
int i;
int j;
p = mime_pattern[0];
for(i=0;mime_encode[i];i++) {
if (mode == mime_encode[i]) {
@ -3785,11 +3950,39 @@ int mode;
}
mimeout_mode = mime_encode_method[i];
/* (*o_mputc)(' '); */
i = 0;
if (base64_count>45) {
(*o_mputc)(NL);
(*o_mputc)(SPACE);
base64_count = 1;
if (!mimeout_preserve_space && mimeout_buf_count>0
&& (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
|| mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
i++;
}
}
if (!mimeout_preserve_space) {
for (;i<mimeout_buf_count;i++) {
if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
|| mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
(*o_mputc)(mimeout_buf[i]);
base64_count ++;
} else {
break;
}
}
}
mimeout_preserve_space = FALSE;
while(*p) {
(*o_mputc)(*p++);
base64_count ++;
}
j = mimeout_buf_count;
mimeout_buf_count = 0;
for (;i<j;i++) {
mime_putc(mimeout_buf[i]);
}
}
void
@ -3797,48 +3990,13 @@ close_mime()
{
(*o_mputc)('?');
(*o_mputc)('=');
(*o_mputc)(' ');
base64_count += 3;
base64_count += 2;
mimeout_mode = 0;
}
#define itoh4(c) (c>=10?c+'A'-10:c+'0')
void
mime_putc(c)
int c;
eof_mime()
{
if (mimeout_f==FIXED_MIME) {
if (base64_count>71) {
(*o_mputc)('\n');
base64_count=0;
}
} else if (c==NL) {
base64_count=0;
}
if (c!=EOF) {
if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
&& mimeout_f!=FIXED_MIME) {
if (mimeout_mode=='Q') {
if (c<=SPACE) {
close_mime();
}
(*o_mputc)(c);
return;
}
if (mimeout_mode!='B' || c!=SPACE) {
if (mimeout_mode) {
mime_putc(EOF);
mimeout_mode=0;
}
(*o_mputc)(c);
base64_count ++;
return;
}
} else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
open_mime(output_mode);
}
} else { /* c==EOF */
switch(mimeout_mode) {
case 'Q':
case 'B':
@ -3861,16 +4019,22 @@ mime_putc(c)
} else if (mimeout_mode != 'Q')
mimeout_mode = 'B';
}
return;
}
}
void
mimeout_addchar(c)
int c;
{
switch(mimeout_mode) {
case 'Q':
if(c>=DEL) {
(*o_mputc)('=');
(*o_mputc)(itoh4(((c>>4)&0xf)));
(*o_mputc)(itoh4((c&0xf)));
base64_count += 3;
} else {
(*o_mputc)(c);
base64_count++;
}
break;
case 'B':
@ -3894,6 +4058,125 @@ mime_putc(c)
}
}
void
mime_putc(c)
int c;
{
int i = 0;
int j = 0;
if (mimeout_f==FIXED_MIME && base64_count>50) {
eof_mime();
(*o_mputc)(NL);
base64_count=0;
} else if (c==CR||c==NL) {
base64_count=0;
}
if (c!=EOF && mimeout_f!=FIXED_MIME) {
if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
if (mimeout_mode=='Q') {
if (c<=SPACE) {
close_mime();
(*o_mputc)(SPACE);
base64_count++;
}
(*o_mputc)(c);
base64_count++;
return;
} else if (mimeout_mode) {
if (base64_count>63) {
eof_mime();
(*o_mputc)(NL);
(*o_mputc)(SPACE);
base64_count=1;
mimeout_preserve_space = TRUE;
}
if (c==SPACE || c==TAB || c==CR || c==NL) {
for (i=0;i<mimeout_buf_count;i++) {
if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
eof_mime();
for (i=0;i<mimeout_buf_count;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
mimeout_buf_count = 0;
}
}
mimeout_buf[mimeout_buf_count++] = c;
if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
eof_mime();
base64_count = 0;
for (i=0;i<mimeout_buf_count;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
}
return;
}
if (mimeout_buf_count>0 && SPACE<c) {
mimeout_buf[mimeout_buf_count++] = c;
if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
} else {
return;
}
}
} else if (!mimeout_mode) {
if (c==SPACE || c==TAB || c==CR || c==NL) {
if ((c==CR || c==NL)
&&(mimeout_buf[mimeout_buf_count-1]==SPACE
|| mimeout_buf[mimeout_buf_count-1]==TAB)) {
mimeout_buf_count--;
}
for (i=0;i<mimeout_buf_count;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
mimeout_buf_count = 0;
}
mimeout_buf[mimeout_buf_count++] = c;
if (mimeout_buf_count>75) {
open_mime(output_mode);
}
return;
}
} else if (!mimeout_mode) {
if (mimeout_buf_count>0 && mimeout_buf[mimeout_buf_count-1]==SPACE) {
for (i=0;i<mimeout_buf_count-1;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
mimeout_buf[0] = SPACE;
mimeout_buf_count = 1;
}
open_mime(output_mode);
}
} else { /* c==EOF */
j = mimeout_buf_count;
i = 0;
for (;i<j;i++) {
if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
|| mimeout_buf[i]==CR || mimeout_buf[i]==NL)
break;
(*mime_putc)(mimeout_buf[i]);
}
eof_mime();
for (;i<j;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
return;
}
if (mimeout_buf_count>0) {
j = mimeout_buf_count;
mimeout_buf_count = 0;
for (i=0;i<j;i++) {
mimeout_addchar(mimeout_buf[i]);
}
}
mimeout_addchar(c);
}
#ifdef PERL_XS
void

View file

@ -83,8 +83,8 @@ def test(opt, input, expects)
end
end
puts "Fail"
# puts result.unpack('H*').first
# puts expects.map{|x|x.unpack('H*').first}.join("\n")
puts result.unpack('H*').first
puts expects.map{|x|x.unpack('H*').first}.join("\n\n")
end
@ -650,19 +650,66 @@ eofeof
$example['test_data/mime_out.ans'] = <<'eofeof'.unpack('u')[0]
M"BTM+2T*4W5B:F5C=#H@86%A82!A86%A(&%A86$@86%A82!A86%A(&%A86$@
M86%A82!A86%A(&%A86$*(&%A86$@86%A82!A86%A(&%A86$@86%A80HM+2TM
M"E-U8FIE8W0Z(#T_25-/+3(P,C(M2E`_0C]'>5)#2D-):TI#46U*0V=K2VE1
M<DI#,&M,>5%X2D1-:TY343-*1&MK3WAS;U%G/3T_/2`*"3T_25-/+3(P,C(M
M2E`_0C]'>5)#2D0P:U!Y4D)*15%K4FE224I%;VM3>5)-2D4P:U1I4E!*1DEK
M5E-264=Y:$,_/2`*"3T_25-/+3(P,C(M2E`_0C]'>5)#2D9S:UAI4F9*1T%K
M65-2:4I'46M*0U)M2D-G:V%"<V]19ST]/ST@"BTM+2T*4W5B:F5C=#H@86%A
M82!A86%A(&%A86$@86%A82!A86%A(&%A86$@86%A82`]/TE33RTR,#(R+4I0
M/T(_1WE20TI#26)+14D]/ST@"@D]/TE33RTR,#(R+4I0/T(_1WE20TI#46M*
J:5%O2D-O8DM%23T_/2`@86%A80H@86%A82!A86%A(&%A86$*+2TM+0H*
M86%A82!A86%A(&%A86$@86%A80H@86%A82!A86%A(&%A86$@86%A82!A86%A
M"BTM+2T*4W5B:F5C=#H@/3])4T\M,C`R,BU*4#]"/T=Y4D-*0TEK2D-1;4I#
M9VM+:5%R2D,P:TQY47A*1$UK3E-1,T=Y:$,_/0H@/3])4T\M,C`R,BU*4#]"
M/T=Y4D-*1&MK3WE1.4I$.&M14U)%2D59:U-#4DM*17-K5$-23DI%-&M4>5)3
M2D95:U=#4F)'>6A#/ST*(#T_25-/+3(P,C(M2E`_0C]'>5)#2D8T:UAY4F=*
M1T5K66E2:TI#46M::5%O2D=G8DM%23T_/0HM+2TM"E-U8FIE8W0Z(&%A86$@
M86%A82!A86%A(&%A86$@86%A82!A86%A(&%A86$*(#T_25-/+3(P,C(M2E`_
M0C]'>5)#2D-):TI#46U*0V=K2VAS;U%G/3T_/2!A86%A(&%A86$@86%A82!A
086%A"B!A86%A"BTM+2T*"@``
eofeof
print "test_data/mime_out ";
test("-jM",$example['test_data/mime_out'],[$example['test_data/mime_out.ans']])
# test_data/mime_out2
$example['test_data/mime_out2'] = <<'eofeof'.unpack('u')[0]
M5&AI<R!M96UO(&1E<V-R:6)E<R!S:6UI;&%R('1E8VAN:7%U97,@=&\@86QL
M;W<@=&AE(&5N8V]D:6YG(&]F(&YO;BU!4T-)22!T97AT(&EN('9A<FEO=7,@
M<&]R=&EO;G,@;V8@82!21D,@.#(R(%LR72!M97-S86=E(&AE861E<BP@:6X@
M82!M86YN97(@=VAI8V@@:7,@=6YL:6ME;'D@=&\@8V]N9G5S92!E>&ES=&EN
M9R!M97-S86=E(&AA;F1L:6YG('-O9G1W87)E+@H*4W5B:F5C=#H@=&5S=#$@
M=&5S=#(@@L2"MX+&@J<@=&5S=#,@@L2"MX+&@O$@=&5S=#0*"E-U8FIE8W0Z
M('1E<W0Q("!T97-T,B""Q"""MR""QB""IR!T97-T,R`@@L2"MX+&@O$@('1E
M<W0T"@I!4T-)22"3^I9[C.H@05-#24D@05-#24D@D_J6>XSJ()/ZEGN,ZB!!
M4T-)22!!4T-)29/ZEGN,ZB!!4T-)20H*@J`@@J(@@J0@@J8@@J@@@JD@@JL@
M@JT@@J\@@K$@@K,@@K4@@K<@@KD@@KL@@KT@@K\@@L(@@L0@@L8@@L@@@LD@
8@LH@@LL@@LP*"@H*"@H*"@H*"@H*"@H*
eofeof
$example['test_data/mime_out2.ans'] = <<'eofeof'.unpack('u')[0]
M5&AI<R!M96UO(&1E<V-R:6)E<R!S:6UI;&%R('1E8VAN:7%U97,@=&\@86QL
M;W<@=&AE(&5N8V]D:6YG(&5N8V]D:6YG"B!O9B!N;VXM05-#24D@=&5X="!I
M;B!V87)I;W5S('!O<G1I;VYS(&]F(&$@80H@4D9#(#@R,B!;,ET@;65S<V%G
M92!H96%D97(L(&EN(&$@;6%N;F5R('=H:6-H(&ES('5N;&EK96QY('5N;&EK
M96QY"B!T;R!C;VYF=7-E(&5X:7-T:6YG(&UE<W-A9V4@:&%N9&QI;F<@<V]F
M='=A<F4N"@I3=6)J96-T.B!T97-T,2!T97-T,B`]/TE33RTR,#(R+4I0/T(_
M1WE20TI%66M/4U))2D-K8DM%23T_/2!T97-T,PH@/3])4T\M,C`R,BU*4#]"
M/T=Y4D-*15EK3U-224I(36)+14D]/ST@=&5S=#0*"E-U8FIE8W0Z('1E<W0Q
M("!T97-T,B`]/TE33RTR,#(R+4I0/T(_1WE20TI%66)+14EG1WE20TI$:V)+
M14EG1WE20TI%9V)+14D]/ST*(#T_25-/+3(P,C(M2E`_0C]'>5)#1WEH0TE"
M<VM1:5%P1WEH0S\]('1E<W0S(`H@/3])4T\M,C`R,BU*4#]"/T=Y4D-*15EK
M3U-224I(36)+14D]/ST@('1E<W0T"@I!4T-)22`]/TE33RTR,#(R+4I0/T(_
M1WE20U)N>$Q81&AS1WEH0S\]($%30TE)($%30TE)"B`]/TE33RTR,#(R+4I0
M/T(_1WE20U)N>$Q81&AS1WEH0TE"<VM1:UHX4S%W-&)"<V]19ST]/ST@05-#
M24D*(#T_25-/+3(P,C(M2E`_0C]15DY$4U5K8DI%2D=F171C3T=W8DM%23T_
M/2!!4T-)20H*/3])4T\M,C`R,BU*4#]"/T=Y4D-*0TEB2T5)9T=Y4D-*0U%B
M2T5)9T=Y4D-*0UEB2T5)9T=Y4D-*0V=B2T5)/3\]"B`]/TE33RTR,#(R+4I0
M/T(_24)S:U%I47%'>6A#24)S:U%I47)'>6A#24)S:U%I471'>6A#24)S:U%I
M479'>6A#/ST*(#T_25-/+3(P,C(M2E`_0C])0G-K46E1>$=Y:$-)0G-K46E1
M>D=Y:$-)0G-K46E1,4=Y:$-)0G-K46E1,T=Y:$,_/0H@/3])4T\M,C`R,BU*
M4#]"/TE"<VM1:5$U1WEH0TE"<VM1:5$W1WEH0TE"<VM1:5$Y1WEH0TE"<VM1
M:5$O1WEH0S\]"B`]/TE33RTR,#(R+4I0/T(_24)S:U%I4D)'>6A#24)S:U%I
M4D5'>6A#24)S:U%I4D='>6A#24)S:U%I4DE'>6A#/ST*(#T_25-/+3(P,C(M
M2E`_0C])0G-K46E22T=Y:$-)0G-K46E23$=Y:$-)0G-K46E234=Y:$-)0G-K
M46E23D=Y:$,_/0H@/3])4T\M,C`R,BU*4#]"/TE"<VM1:5)/1WEH0S\]"@H*
-"@H*"@H*"@H*"@H*"@``
eofeof
print "test_data/mime_out2 ";
test("-jM",$example['test_data/mime_out2'],["\x0a"+$example['test_data/mime_out2.ans']])
# test_data/multi-line
$example['test_data/multi-line'] = <<'eofeof'.unpack('u')[0]