Merge remote-tracking branch 'faf/fast-ascii-filtering'

2024-10-20 05:22:08 -04:00 · 2015-10-11 13:42:56 +02:00 · 2015-10-11 13:42:56 +02:00 · c9746e8543
commit c9746e8543
parent 01aa2e32cc a53061b890
7 changed files with 153 additions and 53 deletions
--- a/include/helper.h
+++ b/include/helper.h
@ -16,7 +16,7 @@ int helper_parse_setup ( char * string, char ***output, int *length, ... );
 /**
 * Implementation of fgets with custom separator.
 */
-char* fgets_s ( char* s, int n, FILE *iop, char sep );
+char* fgets_s ( char* s, unsigned int n, FILE *iop, char sep );
 /**
 * @param token The string for which we want a collation key.
@ -102,7 +102,7 @@ int find_arg ( const char * const key );
 *
 * @returns 1 when matches, 0 otherwise
 */
-int token_match ( char **tokens, const char *input, int case_sensitive,
+int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,
                  __attribute__( ( unused ) ) unsigned int index,
                  __attribute__( ( unused ) ) Switcher * data );
@ -152,4 +152,11 @@ char helper_parse_char ( const char *arg );
 * Set the application arguments.
 */
 void cmd_set_arguments ( int argc, char **argv );
 /**
 * @param str a UTF8 string
 * @return 1 if the string contains any non-ascii codepoints
 */
 int is_not_ascii ( const char *str );
 #endif // ROFI_HELPER_H
--- a/include/rofi.h
+++ b/include/rofi.h
@ -78,7 +78,7 @@ typedef enum
 *
 * @returns 1 when it matches, 0 if not.
 */
-typedef int ( *menu_match_cb )( char **tokens, const char *input, int case_sensitive, unsigned int index, Switcher *data );
+typedef int ( *menu_match_cb )( char **tokens, const char *input, int not_ascii, int case_sensitive, unsigned int index, Switcher *data );
 /**
 * @param sw the Switcher to show.
--- a/source/dialogs/combi.c
+++ b/source/dialogs/combi.c
@ -169,7 +169,7 @@ static SwitcherMode combi_mode_result ( int mretv, char **input, unsigned int se
    }
    return MODE_EXIT;
 }
-static int combi_mode_match ( char **tokens, const char *input,
+static int combi_mode_match ( char **tokens, const char *input, int not_ascii,
                              int case_sensitive, unsigned int index, Switcher *sw )
 {
    CombiModePrivateData *pd = sw->private_data;
@ -178,13 +178,13 @@ static int combi_mode_match ( char **tokens, const char *input,
        if ( index >= pd->starts[i] && index < ( pd->starts[i] + pd->lengths[i] ) ) {
            if ( tokens && input[0] && tokens[0][0] == '!' ) {
                if ( tokens[0][1] == pd->switchers[i]->name[0] ) {
-                    return pd->switchers[i]->token_match ( &tokens[1], input, case_sensitive,
+                    return pd->switchers[i]->token_match ( &tokens[1], input, not_ascii, case_sensitive,
                                                           index - pd->starts[i], pd->switchers[i] );
                }
                return 0;
            }
            else {
-                return pd->switchers[i]->token_match ( tokens, input, case_sensitive,
+                return pd->switchers[i]->token_match ( tokens, input, not_ascii, case_sensitive,
                                                       index - pd->starts[i], pd->switchers[i] );
            }
        }
--- a/source/dialogs/dmenu.c
+++ b/source/dialogs/dmenu.c
@ -59,27 +59,40 @@ typedef struct _DmenuModePrivateData
 static char **get_dmenu ( unsigned int *length )
 {
-    char buffer[1024];
+    const unsigned int buf_size = 1024;
    char buffer[buf_size];
    char **retv = NULL;
    char *buffer_end = NULL;
    unsigned int rvlength = 1;
    *length = 0;
-    while ( fgets_s ( buffer, 1024, stdin, (char) config.separator ) != NULL ) {
+    while ( ( buffer_end = fgets_s ( buffer, buf_size, stdin, (char) config.separator ) ) != NULL ) {
-        retv                  = g_realloc ( retv, ( ( *length ) + 2 ) * sizeof ( char* ) );
+        if (rvlength < (*length + 2)) {
-        retv[( *length )]     = g_strdup ( buffer );
+          rvlength *= 2;
-        retv[( *length ) + 1] = NULL;
+          retv      = g_realloc ( retv, ( rvlength ) * sizeof ( char* ) );
        }
        size_t blength = buffer_end - &(buffer[0]);
        char *copy = g_malloc( blength + 1 );
        memcpy(copy, buffer, blength);
        // Filter out line-end.
-        if ( retv[( *length )][strlen ( buffer ) - 1] == '\n' ) {
+        if ( copy[blength] == '\n' ) {
-            retv[( *length )][strlen ( buffer ) - 1] = '\0';
+            copy[blength] = '\0';
        }
        retv[( *length )]     = copy;
        retv[( *length ) + 1] = NULL;
        ( *length )++;
        // Stop when we hit 2³¹ entries.
        if ( ( *length ) == INT_MAX ) {
            return retv;
        }
    }
    retv      = g_realloc ( retv, ( *length + 1 ) * sizeof ( char* ) );
    return retv;
 }
@ -303,7 +316,7 @@ int dmenu_switcher_dialog ( void )
        char         **tokens = tokenize ( select, config.case_sensitive );
        unsigned int i        = 0;
        for ( i = 0; i < cmd_list_length; i++ ) {
-            if ( token_match ( tokens, cmd_list[i], config.case_sensitive, 0, NULL ) ) {
+            if ( token_match ( tokens, cmd_list[i], is_not_ascii(cmd_list[i]), config.case_sensitive, 0, NULL ) ) {
                pd->selected_line = i;
                break;
            }
--- a/source/dialogs/window.c
+++ b/source/dialogs/window.c
@ -322,6 +322,7 @@ typedef struct _SwitcherModePrivateData
 } SwitcherModePrivateData;
 static int window_match ( char **tokens, __attribute__( ( unused ) ) const char *input,
                          __attribute__( ( unused) ) int not_ascii,
                          int case_sensitive, unsigned int index, Switcher *sw )
 {
    SwitcherModePrivateData *rmpd = (SwitcherModePrivateData *) sw->private_data;
@ -338,19 +339,19 @@ static int window_match ( char **tokens, __attribute__( ( unused ) ) const char
            // e.g. when searching 'title element' and 'class element'
            char *ftokens[2] = { tokens[j], NULL };
            if ( !test && c->title[0] != '\0' ) {
-                test = token_match ( ftokens, c->title, case_sensitive, 0, NULL );
+                test = token_match ( ftokens, c->title, is_not_ascii(c->title), case_sensitive, 0, NULL );
            }
            if ( !test && c->class[0] != '\0' ) {
-                test = token_match ( ftokens, c->class, case_sensitive, 0, NULL );
+                test = token_match ( ftokens, c->class, is_not_ascii(c->class), case_sensitive, 0, NULL );
            }
            if ( !test && c->role[0] != '\0' ) {
-                test = token_match ( ftokens, c->role, case_sensitive, 0, NULL );
+                test = token_match ( ftokens, c->role, is_not_ascii(c->role), case_sensitive, 0, NULL );
            }
            if ( !test && c->name[0] != '\0' ) {
-                test = token_match ( ftokens, c->name, case_sensitive, 0, NULL );
+                test = token_match ( ftokens, c->name, is_not_ascii(c->name), case_sensitive, 0, NULL );
            }
            if ( test == 0 ) {
--- a/source/helper.c
+++ b/source/helper.c
@ -37,12 +37,17 @@
 #include <sys/types.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <ctype.h>
 #include "helper.h"
 #include "rofi.h"
 static int  stored_argc   = 0;
 static char **stored_argv = NULL;
 // TODO: is this safe?
 #define NON_ASCII_NON_NULL( x ) ( ((x) < 0) )
 #define ASCII_NON_NULL( x ) ( ((x) > 0) )
 void cmd_set_arguments ( int argc, char **argv )
 {
    stored_argc = argc;
@ -52,7 +57,7 @@ void cmd_set_arguments ( int argc, char **argv )
 /**
 *  `fgets` implementation with custom separator.
 */
-char* fgets_s ( char* s, int n, FILE *iop, char sep )
+char* fgets_s ( char* s, unsigned int n, FILE *iop, char sep )
 {
    // Map these to registers.
    register int c = EOF;
@ -72,7 +77,7 @@ char* fgets_s ( char* s, int n, FILE *iop, char sep )
    *cs = '\0';
    // if last read was end of file and current index is start, we are done:
    // Return NULL.
-    return ( c == EOF && cs == s ) ? NULL : s;
+    return ( c == EOF && cs == s ) ? NULL : cs;
 }
 /**
@ -326,49 +331,102 @@ int find_arg_char ( const char * const key, char *val )
    return FALSE;
 }
 /*
 * auxiliary to `fuzzy-token-match' below;
 */
 static void advance_unicode_glyph( char** token_in, char** input_in ) {
  // determine the end of the glyph from token
  char *token = *token_in;
  char *input = *input_in;
  while (NON_ASCII_NON_NULL(*token)) {
    token++;
  }
  // now we know the glyph length, we can scan for that substring in input
  // temporarily add a null-terminator in case:
  char glyph_end = *token;
  *token = 0;
  char *match = strstr(input, *token_in);
  *token = glyph_end;
  if ( match ) {
    *token_in = token;
    *input_in = match;
  } else {
    // wind input along to the end so that we fail
    while ( **input_in ) (*input_in)++;
  }
 }
 /**
 * Shared 'token_match' function.
 * Matches tokenized.
 */
-static int fuzzy_token_match ( char **tokens, const char *input, int case_sensitive )
+static int fuzzy_token_match ( char **tokens, const char *input, __attribute__( (unused) ) int not_ascii,  int case_sensitive )
 {
    int  match  = 1;
    char *compk = token_collate_key ( input, case_sensitive );
    // Do a tokenized match.
    if ( tokens ) {
        for ( int j = 0; match && tokens[j]; j++ ) {
            char *t        = compk;
            int  token_len = strlen ( tokens[j] );
            for ( int id = 0; match && t != NULL && id < token_len; id++ ) {
                match = ( ( t = strchr ( t, tokens[j][id] ) ) != NULL );
                // next should match the next character.
                if ( t != NULL ) {
                    t++;
                }
            }
        }
    }
    g_free ( compk );
    return match;
 }
 static int normal_token_match ( char **tokens, const char *input, int case_sensitive )
 {
    int  match  = 1;
    char *compk = token_collate_key ( input, case_sensitive );
    // Do a tokenized match.
    // TODO: this doesn't work for unicode input, because it may split a codepoint which is over two bytes.
    //       mind you, it didn't work before I fiddled with it.
    // this could perhaps be a bit more efficient by iterating over all the tokens at once.
    if ( tokens ) {
        char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
        for ( int j = 0; match && tokens[j]; j++ ) {
-            match = ( strstr ( compk, tokens[j] ) != NULL );
+            char *t        = compk;
            char *token    = tokens[j];
            while (*t && *token) {
              if ( *token > 0 ) // i.e. we are at an ascii codepoint
                {
                  if ( ( case_sensitive && (*t == *token)) ||
                       (!case_sensitive && (tolower(*t) == tolower(*token))) )
                    token++;
                }
              else
                {
                  // we are not at an ascii codepoint, and so we need to do something
                  // complicated
                  advance_unicode_glyph( &token, &t );
                }
              t++;
            }
            match = !(*token);
        }
        if (not_ascii) g_free ( compk );
    }
-    g_free ( compk );
+
    return match;
 }
-static int glob_token_match ( char **tokens, const char *input, int case_sensitive )
+static int normal_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive )
 {
    int  match  = 1;
-    char *compk = token_collate_key ( input, case_sensitive );
+
    // Do a tokenized match.
    if ( tokens ) {
      char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
      char *(*comparison)(const char *, const char *);
      comparison = (case_sensitive || not_ascii) ? strstr : strcasestr;
      for ( int j = 0; match && tokens[j]; j++ ) {
        match = (comparison( compk, tokens[j] ) != NULL );
      }
      if (not_ascii) g_free ( compk );
    }
    return match;
 }
 static int glob_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive )
 {
    int  match  = 1;
    char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
    // Do a tokenized match.
    if ( tokens ) {
@ -376,20 +434,21 @@ static int glob_token_match ( char **tokens, const char *input, int case_sensiti
            match = g_pattern_match_simple (  tokens[j], compk );
        }
    }
-    g_free ( compk );
+    if (not_ascii) g_free ( compk );
    return match;
 }
-int token_match ( char **tokens, const char *input, int case_sensitive,
+
 int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,
                  __attribute__( ( unused ) ) unsigned int index,
                  __attribute__( ( unused ) ) Switcher *data )
 {
    if ( config.glob ) {
-        return glob_token_match ( tokens, input, case_sensitive );
+        return glob_token_match ( tokens, input, not_ascii, case_sensitive );
    }
    else if ( config.fuzzy ) {
-        return fuzzy_token_match ( tokens, input, case_sensitive );
+        return fuzzy_token_match ( tokens, input, not_ascii, case_sensitive );
    }
-    return normal_token_match ( tokens, input, case_sensitive );
+    return normal_token_match ( tokens, input, not_ascii, case_sensitive );
 }
 int execute_generator ( const char * cmd )
@ -515,3 +574,12 @@ void config_sanity_check (  )
        config.menu_bg_alt = config.menu_bg;
    }
 }
 int is_not_ascii ( const char * str )
 {
   while (ASCII_NON_NULL(*str)) {
     str++;
   }
   if (*str) return 1;
   return 0;
 }
--- a/source/rofi.c
+++ b/source/rofi.c
@ -190,6 +190,7 @@ typedef struct MenuState
    unsigned int *selected_line;
    MenuReturn   retv;
    char         **lines;
    int          *lines_not_ascii;
    int          line_height;
 }MenuState;
@ -252,6 +253,7 @@ static void menu_free_state ( MenuState *state )
    g_free ( state->boxes );
    g_free ( state->line_map );
    g_free ( state->lines_not_ascii );
 }
 /**
@ -669,7 +671,7 @@ static void menu_refilter ( MenuState *state )
        // input changed
        for ( unsigned int i = 0; i < state->num_lines; i++ ) {
-            int match = state->sw->token_match ( tokens, state->lines[i], config.case_sensitive, i, state->sw );
+            int match = state->sw->token_match ( tokens, state->lines[i], state->lines_not_ascii[i], config.case_sensitive, i, state->sw );
            // If each token was matched, add it to list.
            if ( match ) {
@ -688,6 +690,7 @@ static void menu_refilter ( MenuState *state )
        }
        state->filtered_lines = state->num_lines;
    }
    state->selected = MIN ( state->selected, state->filtered_lines - 1 );
    if ( config.auto_select == TRUE && state->filtered_lines == 1 && state->num_lines > 1 ) {
@ -951,6 +954,14 @@ MenuReturn menu ( Switcher *sw, char **input, char *prompt, unsigned int *select
    };
    // Request the lines to show.
    state.lines = sw->get_data ( &( state.num_lines ), sw );
    state.lines_not_ascii = g_malloc0_n( state.num_lines, sizeof( int ) );
    // find out which lines contain non-ascii codepoints, so we can be faster in some cases.
    for (unsigned int line = 0; state.lines[line]; line++) {
      state.lines_not_ascii[line] = is_not_ascii(state.lines[line]);
    }
    if ( next_pos ) {
        *next_pos = *selected_line;
    }