Remove custom code, replace by glib, fix fuzzy matching for unicode.

- Also fixes char endianess issues.
2025-03-17 17:14:42 -04:00 · 2015-12-01 22:09:34 +01:00 · 2015-12-01 22:09:34 +01:00 · 4b77a1728d
commit 4b77a1728d
parent ff52140697
8 changed files with 17 additions and 87 deletions
--- a/include/helper.h
+++ b/include/helper.h
@ -151,10 +151,4 @@ char helper_parse_char ( const char *arg );
 */
 void cmd_set_arguments ( int argc, char **argv );

-/**
- * @param str a UTF8 string
- * @return 1 if the string contains any non-ascii codepoints
- */
-int is_not_ascii ( const char *str );
-
 #endif // ROFI_HELPER_H
--- a/source/dialogs/dmenu.c
+++ b/source/dialogs/dmenu.c
@ -298,7 +298,7 @@ static int dmenu_token_match ( const Mode *sw, char **tokens, int not_ascii, int
 static int dmenu_is_not_ascii ( const Mode *sw, unsigned int index )
 {
    DmenuModePrivateData *rmpd = (DmenuModePrivateData *) sw->private_data;
-    return is_not_ascii ( rmpd->cmd_list[index] );
+    return !g_str_is_ascii ( rmpd->cmd_list[index] );
 }

 Mode dmenu_mode =
@ -345,7 +345,7 @@ int dmenu_switcher_dialog ( void )
        char         **tokens = tokenize ( select, config.case_sensitive );
        unsigned int i        = 0;
        for ( i = 0; i < cmd_list_length; i++ ) {
-            if ( token_match ( tokens, cmd_list[i], is_not_ascii ( cmd_list[i] ), config.case_sensitive ) ) {
+            if ( token_match ( tokens, cmd_list[i], !g_str_is_ascii( cmd_list[i] ), config.case_sensitive ) ) {
                pd->selected_line = i;
                break;
            }
@ -356,7 +356,7 @@ int dmenu_switcher_dialog ( void )
        char         **tokens = tokenize ( config.filter ? config.filter : "", config.case_sensitive );
        unsigned int i        = 0;
        for ( i = 0; i < cmd_list_length; i++ ) {
-            if ( token_match ( tokens, cmd_list[i], is_not_ascii ( cmd_list[i] ), config.case_sensitive ) ) {
+            if ( token_match ( tokens, cmd_list[i], !g_str_is_ascii( cmd_list[i] ), config.case_sensitive ) ) {
                dmenu_output_formatted_line ( pd->format, cmd_list[i], i, config.filter );
            }
        }
--- a/source/dialogs/drun.c
+++ b/source/dialogs/drun.c
@ -338,9 +338,9 @@ static int drun_is_not_ascii ( const Mode *sw, unsigned int index )
 {
    DRunModePrivateData *pd = (DRunModePrivateData *) sw->private_data;
    if ( pd->entry_list[index].generic_name ) {
-        return is_not_ascii ( pd->entry_list[index].name ) || is_not_ascii ( pd->entry_list[index].generic_name );
+        return !g_str_is_ascii ( pd->entry_list[index].name ) || !g_str_is_ascii ( pd->entry_list[index].generic_name );
    }
-    return is_not_ascii ( pd->entry_list[index].name );
+    return !g_str_is_ascii ( pd->entry_list[index].name );
 }

 Mode drun_mode =
--- a/source/dialogs/run.c
+++ b/source/dialogs/run.c
@ -340,7 +340,7 @@ static int run_token_match ( const Mode *sw, char **tokens, int not_ascii, int c
 static int run_is_not_ascii ( const Mode *sw, unsigned int index )
 {
    const RunModePrivateData *rmpd = (const RunModePrivateData *) sw->private_data;
-    return is_not_ascii ( rmpd->cmd_list[index] );
+    return !g_str_is_ascii ( rmpd->cmd_list[index] );
 }
 Mode run_mode =
 {
--- a/source/dialogs/script.c
+++ b/source/dialogs/script.c
@ -171,7 +171,7 @@ static int script_token_match ( const Mode *sw, char **tokens, int not_ascii, in
 static int script_is_not_ascii ( const Mode *sw, unsigned int index )
 {
    ScriptModePrivateData *rmpd = sw->private_data;
-    return is_not_ascii ( rmpd->cmd_list[index] );
+    return !g_str_is_ascii ( rmpd->cmd_list[index] );
 }

 Mode *script_switcher_parse_setup ( const char *str )
--- a/source/dialogs/ssh.c
+++ b/source/dialogs/ssh.c
@ -390,7 +390,7 @@ static int ssh_token_match ( const Mode *sw, char **tokens, int not_ascii, int c
 static int ssh_is_not_ascii ( const Mode *sw, unsigned int index )
 {
    SSHModePrivateData *rmpd = (SSHModePrivateData *) sw->private_data;
-    return is_not_ascii ( rmpd->cmd_list[index] );
+    return !g_str_is_ascii ( rmpd->cmd_list[index] );
 }

 Mode ssh_mode =
--- a/source/dialogs/window.c
+++ b/source/dialogs/window.c
@ -579,7 +579,7 @@ static int window_is_not_ascii ( const Mode *sw, unsigned int index )
    int                       idx = winlist_find ( cache_client, ids->array[index] );
    g_assert ( idx >= 0 );
    client                    *c = cache_client->data[idx];
-    return is_not_ascii ( c->role ) || is_not_ascii ( c->class ) || is_not_ascii ( c->title ) || is_not_ascii ( c->name );
+    return !g_str_is_ascii ( c->role ) || !g_str_is_ascii ( c->class ) || !g_str_is_ascii ( c->title ) || !g_str_is_ascii ( c->name );
 }

 Mode window_mode =
--- a/source/helper.c
+++ b/source/helper.c
@ -46,10 +46,6 @@
 static int  stored_argc   = 0;
 static char **stored_argv = NULL;

-// TODO: is this safe?
-#define NON_ASCII_NON_NULL( x )    ( ( ( x ) < 0 ) )
-#define ASCII_NON_NULL( x )        ( ( ( x ) > 0 ) )
-
 void cmd_set_arguments ( int argc, char **argv )
 {
    stored_argc = argc;
@ -280,7 +276,7 @@ int find_arg_uint ( const char * const key, unsigned int *val )

 char helper_parse_char ( const char *arg )
 {
-    char retv = -1;
+    char retv = 0x80;
    int  len  = strlen ( arg );
    // If the length is 1, it is not escaped.
    if ( len == 1 ) {
@ -327,7 +323,7 @@ char helper_parse_char ( const char *arg )
    else if ( len > 2 && arg[0] == '\\' && arg[1] == 'x' ) {
        retv = (char) strtol ( &arg[2], NULL, 16 );
    }
-    if ( retv < 0 ) {
+    if ( (retv&0x80) != 0 ) {
        fprintf ( stderr, "Failed to parse character string: \"%s\"\n", arg );
        // for now default to newline.
        retv = '\n';
@ -346,39 +342,6 @@ int find_arg_char ( const char * const key, char *val )
    return FALSE;
 }

-/*
- * auxiliary to `fuzzy-token-match' below;
- */
-static void advance_unicode_glyph ( char** token_in, char** input_in )
-{
-    // determine the end of the glyph from token
-
-    char *token = *token_in;
-    char *input = *input_in;
-
-    while ( NON_ASCII_NON_NULL ( *token ) ) {
-        token++;
-    }
-
-    // now we know the glyph length, we can scan for that substring in input
-    // temporarily add a null-terminator in case:
-    char glyph_end = *token;
-    *token = 0;
-    char *match = strstr ( input, *token_in );
-    *token = glyph_end;
-
-    if ( match ) {
-        *token_in = token;
-        *input_in = match;
-    }
-    else {
-        // wind input along to the end so that we fail
-        while ( **input_in ) {
-            ( *input_in )++;
-        }
-    }
-}
-
 /**
 * Shared 'token_match' function.
 * Matches tokenized.
@ -389,37 +352,21 @@ static int fuzzy_token_match ( char **tokens, const char *input, __attribute__(

    // Do a tokenized match.

-    // TODO: this doesn't work for unicode input, because it may split a codepoint which is over two bytes.
-    //       mind you, it didn't work before I fiddled with it.
-
-    // this could perhaps be a bit more efficient by iterating over all the tokens at once.
-
    if ( tokens ) {
-        char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
+        char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) g_ascii_strdown(input,-1);
        for ( int j = 0; match && tokens[j]; j++ ) {
            char *t     = compk;
            char *token = tokens[j];

            while ( *t && *token ) {
-                if ( *token > 0 ) { // i.e. we are at an ascii codepoint
-                    if ( ( case_sensitive && ( *t == *token ) ) ||
-                         ( !case_sensitive && ( tolower ( *t ) == tolower ( *token ) ) ) ) {
-                        token++;
-                    }
+                if (  ( g_utf8_get_char(t) == g_utf8_get_char(token) ) ) {
+                    token = g_utf8_next_char(token);
                }
-                else{
-                    // we are not at an ascii codepoint, and so we need to do something
-                    // complicated
-                    advance_unicode_glyph ( &token, &t );
-                }
-                t++;
+                t = g_utf8_next_char(t);
            }
-
            match = !( *token );
        }
-        if ( not_ascii ) {
-            g_free ( compk );
-        }
+        g_free ( compk );
    }

    return match;
@ -569,7 +516,7 @@ void config_sanity_check ( Display *display )
        config.threads = 1;
        long procs = sysconf ( _SC_NPROCESSORS_CONF );
        if ( procs > 0 ) {
-            config.threads = MIN ( procs, UINT_MAX );
+            config.threads = MIN ( procs, 128l );
        }
    }
    // If alternative row is not set, copy the normal background color.
@ -652,17 +599,6 @@ void config_sanity_check ( Display *display )
    g_string_free ( msg, TRUE );
 }

-int is_not_ascii ( const char * str )
-{
-    while ( ASCII_NON_NULL ( *str ) ) {
-        str++;
-    }
-    if ( *str ) {
-        return 1;
-    }
-    return 0;
-}
-
 char *rofi_expand_path ( const char *input )
 {
    char **str = g_strsplit ( input, G_DIR_SEPARATOR_S, -1 );