1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* dir.c: preserve encoding of strings in glob and fnmatch.

* include/ruby/ruby.h: related changes.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@17950 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2008-07-07 17:29:44 +00:00
parent f10c88827e
commit 0ad460dde8
4 changed files with 103 additions and 157 deletions

View file

@ -1,3 +1,9 @@
Tue Jul 8 02:27:23 2008 NARUSE, Yui <naruse@ruby-lang.org>
* dir.c: preserve encoding of strings in glob and fnmatch.
* include/ruby/ruby.h: related changes.
Tue Jul 8 00:22:58 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (rb_str_succ): limit carrying in an alphanumeric region if

243
dir.c
View file

@ -80,96 +80,35 @@ char *strchr(char*,char);
#define FNM_NOMATCH 1
#define FNM_ERROR 2
#define downcase(c) (nocase && ISUPPER(c) ? TOLOWER(c) : (c))
#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2)))
# define Next(p, enc) (rb_enc_right_char_head(p, p+1, enc))
# define Inc(p, enc) ((p) = Next(p, enc))
/* caution: in case *p == '\0'
Next(p) == p + 1 in single byte environment
Next(p) == p in multi byte environment
*/
#if defined(CharNext)
# define Next(p) CharNext(p)
#elif defined(DJGPP)
# define Next(p) ((p) + mblen(p, RUBY_MBCHAR_MAXSIZE))
#elif defined(__EMX__)
# define Next(p) ((p) + emx_mblen(p))
static inline int
emx_mblen(const char *p)
{
int n = mblen(p, RUBY_MBCHAR_MAXSIZE);
return (n < 0) ? 1 : n;
}
#endif
#ifndef Next /* single byte environment */
# define Next(p) ((p) + 1)
# define Inc(p) (++(p))
# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2))))
#else /* multi byte environment */
# define Inc(p) ((p) = Next(p))
# define Compare(p1, p2) (CompareImpl(p1, p2, nocase))
static int
CompareImpl(const char *p1, const char *p2, int nocase)
char_casecmp(const char *p1, const char *p2, rb_encoding *enc, const int nocase)
{
const int len1 = Next(p1) - p1;
const int len2 = Next(p2) - p2;
#ifdef _WIN32
char buf1[10], buf2[10]; /* large enough? */
#endif
const char *p1end, *p2end;
if (len1 < 0 || len2 < 0) {
rb_fatal("CompareImpl: negative len");
if (!*p1) return *p1;
if (!*p2) return -*p2;
p1end = p1 + strlen(p1);
p2end = p2 + strlen(p2);
int c1 = rb_enc_codepoint(p1, p1end, enc);
int c2 = rb_enc_codepoint(p2, p2end, enc);
if (c1 == c2) return 0;
if (nocase) {
c1 = rb_enc_toupper(c1, enc);
c2 = rb_enc_toupper(c2, enc);
}
if (len1 == 0) return len2;
if (len2 == 0) return -len1;
#ifdef _WIN32
if (nocase && rb_w32_iswinnt()) {
if (len1 > 1) {
if (len1 >= sizeof(buf1)) {
rb_fatal("CompareImpl: too large len");
}
memcpy(buf1, p1, len1);
buf1[len1] = '\0';
CharLower(buf1);
p1 = buf1; /* trick */
}
if (len2 > 1) {
if (len2 >= sizeof(buf2)) {
rb_fatal("CompareImpl: too large len");
}
memcpy(buf2, p2, len2);
buf2[len2] = '\0';
CharLower(buf2);
p2 = buf2; /* trick */
}
}
#endif
if (len1 == 1)
if (len2 == 1)
return compare(downcase(*p1), downcase(*p2));
else {
const int ret = compare(downcase(*p1), *p2);
return ret ? ret : -1;
}
else
if (len2 == 1) {
const int ret = compare(*p1, downcase(*p2));
return ret ? ret : 1;
}
else {
const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2);
return ret ? ret : len1 - len2;
}
return c1 - c2;
}
#endif /* environment */
static char *
bracket(
const char *p, /* pattern (next to '[') */
const char *s, /* string */
int flags)
int flags,
rb_encoding *enc)
{
const int nocase = flags & FNM_CASEFOLD;
const int escape = !(flags & FNM_NOESCAPE);
@ -187,19 +126,19 @@ bracket(
t1++;
if (!*t1)
return NULL;
p = Next(t1);
p = Next(t1, enc);
if (p[0] == '-' && p[1] != ']') {
const char *t2 = p + 1;
if (escape && *t2 == '\\')
t2++;
if (!*t2)
return NULL;
p = Next(t2);
if (!ok && Compare(t1, s) <= 0 && Compare(s, t2) <= 0)
p = Next(t2, enc);
if (!ok && char_casecmp(t1, s, enc, nocase) <= 0 && char_casecmp(s, t2, enc, nocase) <= 0)
ok = 1;
}
else
if (!ok && Compare(t1, s) == 0)
if (!ok && char_casecmp(t1, s, enc, nocase) == 0)
ok = 1;
}
@ -219,7 +158,8 @@ static int
fnmatch_helper(
const char **pcur, /* pattern */
const char **scur, /* string */
int flags)
int flags,
rb_encoding *enc)
{
const int period = !(flags & FNM_DOTMATCH);
const int pathname = flags & FNM_PATHNAME;
@ -253,16 +193,16 @@ fnmatch_helper(
if (ISEND(s))
RETURN(FNM_NOMATCH);
p++;
Inc(s);
Inc(s, enc);
continue;
case '[': {
const char *t;
if (ISEND(s))
RETURN(FNM_NOMATCH);
if ((t = bracket(p + 1, s, flags)) != 0) {
if ((t = bracket(p + 1, s, flags, enc)) != 0) {
p = t;
Inc(s);
Inc(s, enc);
continue;
}
goto failed;
@ -275,16 +215,16 @@ fnmatch_helper(
RETURN(ISEND(p) ? 0 : FNM_NOMATCH);
if (ISEND(p))
goto failed;
if (Compare(p, s) != 0)
if (char_casecmp(p, s, enc, nocase) != 0)
goto failed;
Inc(p);
Inc(s);
Inc(p, enc);
Inc(s, enc);
continue;
failed: /* try next '*' position */
if (ptmp && stmp) {
p = ptmp;
Inc(stmp); /* !ISEND(*stmp) */
Inc(stmp, enc); /* !ISEND(*stmp) */
s = stmp;
continue;
}
@ -294,12 +234,15 @@ fnmatch_helper(
static int
fnmatch(
const char *p, /* pattern */
const char *s, /* string */
VALUE pattern,
VALUE string,
int flags)
{
const char *p = RSTRING_PTR(pattern); /* pattern */
const char *s = RSTRING_PTR(string); /* string */
const int period = !(flags & FNM_DOTMATCH);
const int pathname = flags & FNM_PATHNAME;
rb_encoding *enc = rb_enc_get(pattern);
const char *ptmp = 0;
const char *stmp = 0;
@ -311,8 +254,8 @@ fnmatch(
ptmp = p;
stmp = s;
}
if (fnmatch_helper(&p, &s, flags) == 0) {
while (*s && *s != '/') Inc(s);
if (fnmatch_helper(&p, &s, flags, enc) == 0) {
while (*s && *s != '/') Inc(s, enc);
if (*p && *s) {
p++;
s++;
@ -323,7 +266,7 @@ fnmatch(
}
/* failed : try next recursion */
if (ptmp && stmp && !(period && *stmp == '.')) {
while (*stmp && *stmp != '/') Inc(stmp);
while (*stmp && *stmp != '/') Inc(stmp, enc);
if (*stmp) {
p = ptmp;
stmp++;
@ -335,7 +278,7 @@ fnmatch(
}
}
else
return fnmatch_helper(&p, &s, flags);
return fnmatch_helper(&p, &s, flags, enc);
}
VALUE rb_cDir;
@ -1030,7 +973,7 @@ do_opendir(const char *path, int flags)
/* Return nonzero if S has any special globbing chars in it. */
static int
has_magic(const char *s, int flags)
has_magic(const char *s, int flags, rb_encoding *enc)
{
const int escape = !(flags & FNM_NOESCAPE);
const int nocase = flags & FNM_CASEFOLD;
@ -1055,7 +998,7 @@ has_magic(const char *s, int flags)
return 1;
}
p = Next(p-1);
p = Next(p-1, enc);
}
return 0;
@ -1063,7 +1006,7 @@ has_magic(const char *s, int flags)
/* Find separator in globbing pattern. */
static char *
find_dirsep(const char *s, int flags)
find_dirsep(const char *s, int flags, rb_encoding *enc)
{
const int escape = !(flags & FNM_NOESCAPE);
@ -1091,7 +1034,7 @@ find_dirsep(const char *s, int flags)
continue;
}
p = Next(p-1);
p = Next(p-1, enc);
}
return (char *)p-1;
@ -1099,7 +1042,7 @@ find_dirsep(const char *s, int flags)
/* Remove escaping backslashes */
static void
remove_backslashes(char *p)
remove_backslashes(char *p, rb_encoding *enc)
{
char *t = p;
char *s = p;
@ -1112,7 +1055,7 @@ remove_backslashes(char *p)
s = ++p;
if (!*p) break;
}
Inc(p);
Inc(p, enc);
}
while (*p++);
@ -1133,7 +1076,7 @@ struct glob_pattern {
static void glob_free_pattern(struct glob_pattern *list);
static struct glob_pattern *
glob_make_pattern(const char *p, int flags)
glob_make_pattern(const char *p, int flags, rb_encoding *enc)
{
struct glob_pattern *list, *tmp, **tail = &list;
int dirsep = 0; /* pattern is terminated with '/' */
@ -1149,7 +1092,7 @@ glob_make_pattern(const char *p, int flags)
dirsep = 1;
}
else {
const char *m = find_dirsep(p, flags);
const char *m = find_dirsep(p, flags, enc);
char *buf = GLOB_ALLOC_N(char, m-p+1);
if (!buf) {
GLOB_FREE(tmp);
@ -1157,7 +1100,7 @@ glob_make_pattern(const char *p, int flags)
}
memcpy(buf, p, m-p);
buf[m-p] = '\0';
tmp->type = has_magic(buf, flags) ? MAGICAL : PLAIN;
tmp->type = has_magic(buf, flags, enc) ? MAGICAL : PLAIN;
tmp->str = buf;
if (*m) {
dirsep = 1;
@ -1230,8 +1173,8 @@ enum answer { YES, NO, UNKNOWN };
#endif
struct glob_args {
void (*func)(const char *, VALUE);
const char *path;
void (*func)(VALUE, VALUE);
VALUE path;
VALUE value;
};
@ -1248,7 +1191,7 @@ glob_func_caller(VALUE val)
static int
glob_helper(
const char *path,
VALUE vpath,
int dirsep, /* '/' should be placed before appending child entry's name to 'path'. */
enum answer exist, /* Does 'path' indicate an existing entry? */
enum answer isdir, /* Does 'path' indicate a directory or a symlink to a directory? */
@ -1258,6 +1201,7 @@ glob_helper(
ruby_glob_func *func,
VALUE arg)
{
const char *path = RSTRING_PTR(vpath);
struct stat st;
int status = 0;
struct glob_pattern **cur, **new_beg, **new_end;
@ -1310,13 +1254,13 @@ glob_helper(
}
}
if (match_all && exist == YES) {
status = glob_call_func(func, path, arg);
status = glob_call_func(func, rb_tainted_str_new2(path), arg);
if (status) return status;
}
if (match_dir && isdir == YES) {
char *tmp = join_path(path, dirsep, "");
if (!tmp) return -1;
status = glob_call_func(func, tmp, arg);
status = glob_call_func(func, rb_tainted_str_new2(tmp), arg);
GLOB_FREE(tmp);
if (status) return status;
}
@ -1338,7 +1282,7 @@ glob_helper(
break;
}
if (recursive && strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0
&& fnmatch("*", dp->d_name, flags) == 0) {
&& fnmatch(rb_usascii_str_new2("*"), rb_str_new2(dp->d_name), flags) == 0) {
#ifndef _WIN32
if (do_lstat(buf, &st, flags) == 0)
new_isdir = S_ISDIR(st.st_mode) ? YES : S_ISLNK(st.st_mode) ? UNKNOWN : NO;
@ -1364,12 +1308,13 @@ glob_helper(
p = p->next; /* 0 times recursion */
}
if (p->type == PLAIN || p->type == MAGICAL) {
if (fnmatch(p->str, dp->d_name, flags) == 0)
if (fnmatch(rb_str_new2(p->str), rb_str_new2(dp->d_name), flags) == 0)
*new_end++ = p->next;
}
}
status = glob_helper(buf, 1, YES, new_isdir, new_beg, new_end, flags, func, arg);
status = glob_helper(rb_enc_str_new(buf, strlen(buf), rb_enc_get(vpath)), 1,
YES, new_isdir, new_beg, new_end, flags, func, arg);
GLOB_FREE(buf);
GLOB_FREE(new_beg);
if (status) break;
@ -1395,7 +1340,7 @@ glob_helper(
break;
}
strcpy(name, (*cur)->str);
if (escape) remove_backslashes(name);
if (escape) remove_backslashes(name, rb_enc_get(vpath));
new_beg = new_end = GLOB_ALLOC_N(struct glob_pattern *, end - beg);
if (!new_beg) {
@ -1405,7 +1350,7 @@ glob_helper(
}
*new_end++ = (*cur)->next;
for (cur2 = cur + 1; cur2 < copy_end; ++cur2) {
if (*cur2 && fnmatch((*cur2)->str, name, flags) == 0) {
if (*cur2 && fnmatch(rb_str_new2((*cur2)->str), rb_str_new2(name), flags) == 0) {
*new_end++ = (*cur2)->next;
*cur2 = 0;
}
@ -1418,7 +1363,8 @@ glob_helper(
status = -1;
break;
}
status = glob_helper(buf, 1, UNKNOWN, UNKNOWN, new_beg, new_end, flags, func, arg);
status = glob_helper(rb_enc_str_new(buf, strlen(buf), rb_enc_get(vpath)), 1,
UNKNOWN, UNKNOWN, new_beg, new_end, flags, func, arg);
GLOB_FREE(buf);
GLOB_FREE(new_beg);
if (status) break;
@ -1432,15 +1378,14 @@ glob_helper(
}
static int
ruby_glob0(const char *path, int flags, ruby_glob_func *func, VALUE arg)
ruby_glob0(VALUE path, int flags, ruby_glob_func *func, VALUE arg)
{
struct glob_pattern *list;
const char *root, *start;
char *buf;
int n;
int status;
VALUE buf;
start = root = path;
start = root = RSTRING_PTR(path);
flags |= FNM_SYSCASE;
#if defined DOSISH
root = rb_path_skip_prefix(root);
@ -1448,20 +1393,14 @@ ruby_glob0(const char *path, int flags, ruby_glob_func *func, VALUE arg)
if (root && *root == '/') root++;
n = root - start;
buf = GLOB_ALLOC_N(char, n + 1);
if (!buf) return -1;
MEMCPY(buf, start, char, n);
buf[n] = '\0';
buf = rb_enc_str_new(start, root - start, rb_enc_get(path));
list = glob_make_pattern(root, flags);
list = glob_make_pattern(root, flags, rb_enc_get(path));
if (!list) {
GLOB_FREE(buf);
return -1;
}
status = glob_helper(buf, 0, UNKNOWN, UNKNOWN, &list, &list + 1, flags, func, arg);
glob_free_pattern(list);
GLOB_FREE(buf);
return status;
}
@ -1469,11 +1408,11 @@ ruby_glob0(const char *path, int flags, ruby_glob_func *func, VALUE arg)
int
ruby_glob(const char *path, int flags, ruby_glob_func *func, VALUE arg)
{
return ruby_glob0(path, flags & ~GLOB_VERBOSE, func, arg);
return ruby_glob0(rb_str_new2(path), flags & ~GLOB_VERBOSE, func, arg);
}
static int
rb_glob_caller(const char *path, VALUE a)
rb_glob_caller(VALUE path, VALUE a)
{
int status;
struct glob_args *args = (struct glob_args *)a;
@ -1484,7 +1423,7 @@ rb_glob_caller(const char *path, VALUE a)
}
static int
rb_glob2(const char *path, int flags, void (*func)(const char *, VALUE), VALUE arg)
rb_glob2(VALUE path, int flags, void (*func)(VALUE, VALUE), VALUE arg)
{
struct glob_args args;
@ -1499,20 +1438,21 @@ rb_glob2(const char *path, int flags, void (*func)(const char *, VALUE), VALUE a
}
void
rb_glob(const char *path, void (*func)(const char *, VALUE), VALUE arg)
rb_glob(const char *path, void (*func)(VALUE, VALUE), VALUE arg)
{
int status = rb_glob2(path, 0, func, arg);
int status = rb_glob2(rb_str_new2(path), 0, func, arg);
if (status) GLOB_JUMP_TAG(status);
}
static void
push_pattern(const char *path, VALUE ary)
push_pattern(VALUE path, VALUE ary)
{
rb_ary_push(ary, rb_tainted_str_new2(path));
OBJ_TAINT(path);
rb_ary_push(ary, path);
}
int
ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg)
static int
ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, rb_encoding *enc)
{
const int escape = !(flags & FNM_NOESCAPE);
const char *p = str;
@ -1531,7 +1471,7 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg)
if (*p == '\\' && escape) {
if (!*++p) break;
}
Inc(p);
Inc(p, enc);
}
if (lbrace && rbrace) {
@ -1551,17 +1491,17 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg)
if (*p == '\\' && escape) {
if (++p == rbrace) break;
}
Inc(p);
Inc(p, enc);
}
memcpy(buf+shift, t, p-t);
strcpy(buf+shift+(p-t), rbrace+1);
status = ruby_brace_expand(buf, flags, func, arg);
status = ruby_brace_expand(buf, flags, func, arg, enc);
if (status) break;
}
GLOB_FREE(buf);
}
else if (!lbrace && !rbrace) {
status = (*func)(s, arg);
status = (*func)(rb_enc_str_new(s, strlen(s), enc), arg);
}
return status;
@ -1574,7 +1514,7 @@ struct brace_args {
};
static int
glob_brace(const char *path, VALUE val)
glob_brace(VALUE path, VALUE val)
{
struct brace_args *arg = (struct brace_args *)val;
@ -1582,24 +1522,24 @@ glob_brace(const char *path, VALUE val)
}
static int
ruby_brace_glob0(const char *str, int flags, ruby_glob_func *func, VALUE arg)
ruby_brace_glob0(VALUE str, int flags, ruby_glob_func *func, VALUE arg)
{
struct brace_args args;
args.func = func;
args.value = arg;
args.flags = flags;
return ruby_brace_expand(str, flags, glob_brace, (VALUE)&args);
return ruby_brace_expand(RSTRING_PTR(str), flags, glob_brace, (VALUE)&args, rb_enc_get(str));
}
int
ruby_brace_glob(const char *str, int flags, ruby_glob_func *func, VALUE arg)
{
return ruby_brace_glob0(str, flags & ~GLOB_VERBOSE, func, arg);
return ruby_brace_glob0(rb_str_new2(str), flags & ~GLOB_VERBOSE, func, arg);
}
static int
push_glob(VALUE ary, const char *str, int flags)
push_glob(VALUE ary, VALUE str, int flags)
{
struct glob_args args;
@ -1618,11 +1558,12 @@ rb_push_glob(VALUE str, int flags) /* '\0' is delimiter */
ary = rb_ary_new();
while (offset < RSTRING_LEN(str)) {
int status = push_glob(ary, RSTRING_PTR(str) + offset, flags);
char *p, *pend;
p = RSTRING_PTR(str) + offset;
int status = push_glob(ary,
rb_enc_str_new(p, strlen(p), rb_enc_get(str)), flags);
if (status) GLOB_JUMP_TAG(status);
if (offset >= RSTRING_LEN(str)) break;
p = RSTRING_PTR(str) + offset;
p += strlen(p) + 1;
pend = RSTRING_PTR(str) + RSTRING_LEN(str);
while (p < pend && !*p)
@ -1643,7 +1584,7 @@ dir_globs(long argc, VALUE *argv, int flags)
int status;
VALUE str = argv[i];
StringValue(str);
status = push_glob(ary, RSTRING_PTR(str), flags);
status = push_glob(ary, str, flags);
if (status) GLOB_JUMP_TAG(status);
}
@ -1914,7 +1855,7 @@ file_s_fnmatch(int argc, VALUE *argv, VALUE obj)
StringValue(pattern);
FilePathStringValue(path);
if (fnmatch(RSTRING_PTR(pattern), RSTRING_PTR(path), flags) == 0)
if (fnmatch(pattern, path, flags) == 0)
return Qtrue;
return Qfalse;

View file

@ -725,10 +725,9 @@ struct RBignum {
void rb_obj_infect(VALUE,VALUE);
typedef int ruby_glob_func(const char*,VALUE);
void rb_glob(const char*,void(*)(const char*,VALUE),VALUE);
typedef int ruby_glob_func(VALUE,VALUE);
void rb_glob(const char*,void(*)(VALUE,VALUE),VALUE);
int ruby_glob(const char*,int,ruby_glob_func*,VALUE);
int ruby_brace_expand(const char*,int,ruby_glob_func*,VALUE);
int ruby_brace_glob(const char*,int,ruby_glob_func*,VALUE);
VALUE rb_define_class(const char*,VALUE);

View file

@ -1,7 +1,7 @@
#define RUBY_VERSION "1.9.0"
#define RUBY_RELEASE_DATE "2008-07-07"
#define RUBY_RELEASE_DATE "2008-07-08"
#define RUBY_VERSION_CODE 190
#define RUBY_RELEASE_CODE 20080707
#define RUBY_RELEASE_CODE 20080708
#define RUBY_PATCHLEVEL 0
#define RUBY_VERSION_MAJOR 1
@ -9,7 +9,7 @@
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2008
#define RUBY_RELEASE_MONTH 7
#define RUBY_RELEASE_DAY 7
#define RUBY_RELEASE_DAY 8
#ifdef RUBY_EXTERN
RUBY_EXTERN const char ruby_version[];