1
0
Fork 0
mirror of https://github.com/davatorium/rofi.git synced 2024-10-20 05:22:08 -04:00

Merge remote-tracking branch 'faf/fast-ascii-filtering'

This commit is contained in:
Dave Davenport 2015-10-11 13:42:56 +02:00
commit c9746e8543
7 changed files with 153 additions and 53 deletions

View file

@ -16,7 +16,7 @@ int helper_parse_setup ( char * string, char ***output, int *length, ... );
/** /**
* Implementation of fgets with custom separator. * Implementation of fgets with custom separator.
*/ */
char* fgets_s ( char* s, int n, FILE *iop, char sep ); char* fgets_s ( char* s, unsigned int n, FILE *iop, char sep );
/** /**
* @param token The string for which we want a collation key. * @param token The string for which we want a collation key.
@ -102,7 +102,7 @@ int find_arg ( const char * const key );
* *
* @returns 1 when matches, 0 otherwise * @returns 1 when matches, 0 otherwise
*/ */
int token_match ( char **tokens, const char *input, int case_sensitive, int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,
__attribute__( ( unused ) ) unsigned int index, __attribute__( ( unused ) ) unsigned int index,
__attribute__( ( unused ) ) Switcher * data ); __attribute__( ( unused ) ) Switcher * data );
@ -152,4 +152,11 @@ char helper_parse_char ( const char *arg );
* Set the application arguments. * Set the application arguments.
*/ */
void cmd_set_arguments ( int argc, char **argv ); void cmd_set_arguments ( int argc, char **argv );
/**
* @param str a UTF8 string
* @return 1 if the string contains any non-ascii codepoints
*/
int is_not_ascii ( const char *str );
#endif // ROFI_HELPER_H #endif // ROFI_HELPER_H

View file

@ -78,7 +78,7 @@ typedef enum
* *
* @returns 1 when it matches, 0 if not. * @returns 1 when it matches, 0 if not.
*/ */
typedef int ( *menu_match_cb )( char **tokens, const char *input, int case_sensitive, unsigned int index, Switcher *data ); typedef int ( *menu_match_cb )( char **tokens, const char *input, int not_ascii, int case_sensitive, unsigned int index, Switcher *data );
/** /**
* @param sw the Switcher to show. * @param sw the Switcher to show.

View file

@ -169,7 +169,7 @@ static SwitcherMode combi_mode_result ( int mretv, char **input, unsigned int se
} }
return MODE_EXIT; return MODE_EXIT;
} }
static int combi_mode_match ( char **tokens, const char *input, static int combi_mode_match ( char **tokens, const char *input, int not_ascii,
int case_sensitive, unsigned int index, Switcher *sw ) int case_sensitive, unsigned int index, Switcher *sw )
{ {
CombiModePrivateData *pd = sw->private_data; CombiModePrivateData *pd = sw->private_data;
@ -178,13 +178,13 @@ static int combi_mode_match ( char **tokens, const char *input,
if ( index >= pd->starts[i] && index < ( pd->starts[i] + pd->lengths[i] ) ) { if ( index >= pd->starts[i] && index < ( pd->starts[i] + pd->lengths[i] ) ) {
if ( tokens && input[0] && tokens[0][0] == '!' ) { if ( tokens && input[0] && tokens[0][0] == '!' ) {
if ( tokens[0][1] == pd->switchers[i]->name[0] ) { if ( tokens[0][1] == pd->switchers[i]->name[0] ) {
return pd->switchers[i]->token_match ( &tokens[1], input, case_sensitive, return pd->switchers[i]->token_match ( &tokens[1], input, not_ascii, case_sensitive,
index - pd->starts[i], pd->switchers[i] ); index - pd->starts[i], pd->switchers[i] );
} }
return 0; return 0;
} }
else { else {
return pd->switchers[i]->token_match ( tokens, input, case_sensitive, return pd->switchers[i]->token_match ( tokens, input, not_ascii, case_sensitive,
index - pd->starts[i], pd->switchers[i] ); index - pd->starts[i], pd->switchers[i] );
} }
} }

View file

@ -59,27 +59,40 @@ typedef struct _DmenuModePrivateData
static char **get_dmenu ( unsigned int *length ) static char **get_dmenu ( unsigned int *length )
{ {
char buffer[1024]; const unsigned int buf_size = 1024;
char buffer[buf_size];
char **retv = NULL; char **retv = NULL;
char *buffer_end = NULL;
unsigned int rvlength = 1;
*length = 0; *length = 0;
while ( fgets_s ( buffer, 1024, stdin, (char) config.separator ) != NULL ) { while ( ( buffer_end = fgets_s ( buffer, buf_size, stdin, (char) config.separator ) ) != NULL ) {
retv = g_realloc ( retv, ( ( *length ) + 2 ) * sizeof ( char* ) ); if (rvlength < (*length + 2)) {
retv[( *length )] = g_strdup ( buffer ); rvlength *= 2;
retv[( *length ) + 1] = NULL; retv = g_realloc ( retv, ( rvlength ) * sizeof ( char* ) );
}
size_t blength = buffer_end - &(buffer[0]);
char *copy = g_malloc( blength + 1 );
memcpy(copy, buffer, blength);
// Filter out line-end. // Filter out line-end.
if ( retv[( *length )][strlen ( buffer ) - 1] == '\n' ) { if ( copy[blength] == '\n' ) {
retv[( *length )][strlen ( buffer ) - 1] = '\0'; copy[blength] = '\0';
} }
retv[( *length )] = copy;
retv[( *length ) + 1] = NULL;
( *length )++; ( *length )++;
// Stop when we hit 2³¹ entries. // Stop when we hit 2³¹ entries.
if ( ( *length ) == INT_MAX ) { if ( ( *length ) == INT_MAX ) {
return retv; return retv;
} }
} }
retv = g_realloc ( retv, ( *length + 1 ) * sizeof ( char* ) );
return retv; return retv;
} }
@ -303,7 +316,7 @@ int dmenu_switcher_dialog ( void )
char **tokens = tokenize ( select, config.case_sensitive ); char **tokens = tokenize ( select, config.case_sensitive );
unsigned int i = 0; unsigned int i = 0;
for ( i = 0; i < cmd_list_length; i++ ) { for ( i = 0; i < cmd_list_length; i++ ) {
if ( token_match ( tokens, cmd_list[i], config.case_sensitive, 0, NULL ) ) { if ( token_match ( tokens, cmd_list[i], is_not_ascii(cmd_list[i]), config.case_sensitive, 0, NULL ) ) {
pd->selected_line = i; pd->selected_line = i;
break; break;
} }

View file

@ -322,6 +322,7 @@ typedef struct _SwitcherModePrivateData
} SwitcherModePrivateData; } SwitcherModePrivateData;
static int window_match ( char **tokens, __attribute__( ( unused ) ) const char *input, static int window_match ( char **tokens, __attribute__( ( unused ) ) const char *input,
__attribute__( ( unused) ) int not_ascii,
int case_sensitive, unsigned int index, Switcher *sw ) int case_sensitive, unsigned int index, Switcher *sw )
{ {
SwitcherModePrivateData *rmpd = (SwitcherModePrivateData *) sw->private_data; SwitcherModePrivateData *rmpd = (SwitcherModePrivateData *) sw->private_data;
@ -338,19 +339,19 @@ static int window_match ( char **tokens, __attribute__( ( unused ) ) const char
// e.g. when searching 'title element' and 'class element' // e.g. when searching 'title element' and 'class element'
char *ftokens[2] = { tokens[j], NULL }; char *ftokens[2] = { tokens[j], NULL };
if ( !test && c->title[0] != '\0' ) { if ( !test && c->title[0] != '\0' ) {
test = token_match ( ftokens, c->title, case_sensitive, 0, NULL ); test = token_match ( ftokens, c->title, is_not_ascii(c->title), case_sensitive, 0, NULL );
} }
if ( !test && c->class[0] != '\0' ) { if ( !test && c->class[0] != '\0' ) {
test = token_match ( ftokens, c->class, case_sensitive, 0, NULL ); test = token_match ( ftokens, c->class, is_not_ascii(c->class), case_sensitive, 0, NULL );
} }
if ( !test && c->role[0] != '\0' ) { if ( !test && c->role[0] != '\0' ) {
test = token_match ( ftokens, c->role, case_sensitive, 0, NULL ); test = token_match ( ftokens, c->role, is_not_ascii(c->role), case_sensitive, 0, NULL );
} }
if ( !test && c->name[0] != '\0' ) { if ( !test && c->name[0] != '\0' ) {
test = token_match ( ftokens, c->name, case_sensitive, 0, NULL ); test = token_match ( ftokens, c->name, is_not_ascii(c->name), case_sensitive, 0, NULL );
} }
if ( test == 0 ) { if ( test == 0 ) {

View file

@ -37,12 +37,17 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/file.h> #include <sys/file.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <ctype.h>
#include "helper.h" #include "helper.h"
#include "rofi.h" #include "rofi.h"
static int stored_argc = 0; static int stored_argc = 0;
static char **stored_argv = NULL; static char **stored_argv = NULL;
// TODO: is this safe?
#define NON_ASCII_NON_NULL( x ) ( ((x) < 0) )
#define ASCII_NON_NULL( x ) ( ((x) > 0) )
void cmd_set_arguments ( int argc, char **argv ) void cmd_set_arguments ( int argc, char **argv )
{ {
stored_argc = argc; stored_argc = argc;
@ -52,7 +57,7 @@ void cmd_set_arguments ( int argc, char **argv )
/** /**
* `fgets` implementation with custom separator. * `fgets` implementation with custom separator.
*/ */
char* fgets_s ( char* s, int n, FILE *iop, char sep ) char* fgets_s ( char* s, unsigned int n, FILE *iop, char sep )
{ {
// Map these to registers. // Map these to registers.
register int c = EOF; register int c = EOF;
@ -72,7 +77,7 @@ char* fgets_s ( char* s, int n, FILE *iop, char sep )
*cs = '\0'; *cs = '\0';
// if last read was end of file and current index is start, we are done: // if last read was end of file and current index is start, we are done:
// Return NULL. // Return NULL.
return ( c == EOF && cs == s ) ? NULL : s; return ( c == EOF && cs == s ) ? NULL : cs;
} }
/** /**
@ -326,49 +331,102 @@ int find_arg_char ( const char * const key, char *val )
return FALSE; return FALSE;
} }
/*
* auxiliary to `fuzzy-token-match' below;
*/
static void advance_unicode_glyph( char** token_in, char** input_in ) {
// determine the end of the glyph from token
char *token = *token_in;
char *input = *input_in;
while (NON_ASCII_NON_NULL(*token)) {
token++;
}
// now we know the glyph length, we can scan for that substring in input
// temporarily add a null-terminator in case:
char glyph_end = *token;
*token = 0;
char *match = strstr(input, *token_in);
*token = glyph_end;
if ( match ) {
*token_in = token;
*input_in = match;
} else {
// wind input along to the end so that we fail
while ( **input_in ) (*input_in)++;
}
}
/** /**
* Shared 'token_match' function. * Shared 'token_match' function.
* Matches tokenized. * Matches tokenized.
*/ */
static int fuzzy_token_match ( char **tokens, const char *input, int case_sensitive ) static int fuzzy_token_match ( char **tokens, const char *input, __attribute__( (unused) ) int not_ascii, int case_sensitive )
{ {
int match = 1; int match = 1;
char *compk = token_collate_key ( input, case_sensitive );
// Do a tokenized match.
if ( tokens ) {
for ( int j = 0; match && tokens[j]; j++ ) {
char *t = compk;
int token_len = strlen ( tokens[j] );
for ( int id = 0; match && t != NULL && id < token_len; id++ ) {
match = ( ( t = strchr ( t, tokens[j][id] ) ) != NULL );
// next should match the next character.
if ( t != NULL ) {
t++;
}
}
}
}
g_free ( compk );
return match;
}
static int normal_token_match ( char **tokens, const char *input, int case_sensitive )
{
int match = 1;
char *compk = token_collate_key ( input, case_sensitive );
// Do a tokenized match. // Do a tokenized match.
// TODO: this doesn't work for unicode input, because it may split a codepoint which is over two bytes.
// mind you, it didn't work before I fiddled with it.
// this could perhaps be a bit more efficient by iterating over all the tokens at once.
if ( tokens ) { if ( tokens ) {
char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
for ( int j = 0; match && tokens[j]; j++ ) { for ( int j = 0; match && tokens[j]; j++ ) {
match = ( strstr ( compk, tokens[j] ) != NULL ); char *t = compk;
char *token = tokens[j];
while (*t && *token) {
if ( *token > 0 ) // i.e. we are at an ascii codepoint
{
if ( ( case_sensitive && (*t == *token)) ||
(!case_sensitive && (tolower(*t) == tolower(*token))) )
token++;
}
else
{
// we are not at an ascii codepoint, and so we need to do something
// complicated
advance_unicode_glyph( &token, &t );
}
t++;
}
match = !(*token);
} }
if (not_ascii) g_free ( compk );
} }
g_free ( compk );
return match; return match;
} }
static int glob_token_match ( char **tokens, const char *input, int case_sensitive ) static int normal_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive )
{ {
int match = 1; int match = 1;
char *compk = token_collate_key ( input, case_sensitive );
// Do a tokenized match.
if ( tokens ) {
char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
char *(*comparison)(const char *, const char *);
comparison = (case_sensitive || not_ascii) ? strstr : strcasestr;
for ( int j = 0; match && tokens[j]; j++ ) {
match = (comparison( compk, tokens[j] ) != NULL );
}
if (not_ascii) g_free ( compk );
}
return match;
}
static int glob_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive )
{
int match = 1;
char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
// Do a tokenized match. // Do a tokenized match.
if ( tokens ) { if ( tokens ) {
@ -376,20 +434,21 @@ static int glob_token_match ( char **tokens, const char *input, int case_sensiti
match = g_pattern_match_simple ( tokens[j], compk ); match = g_pattern_match_simple ( tokens[j], compk );
} }
} }
g_free ( compk ); if (not_ascii) g_free ( compk );
return match; return match;
} }
int token_match ( char **tokens, const char *input, int case_sensitive,
int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,
__attribute__( ( unused ) ) unsigned int index, __attribute__( ( unused ) ) unsigned int index,
__attribute__( ( unused ) ) Switcher *data ) __attribute__( ( unused ) ) Switcher *data )
{ {
if ( config.glob ) { if ( config.glob ) {
return glob_token_match ( tokens, input, case_sensitive ); return glob_token_match ( tokens, input, not_ascii, case_sensitive );
} }
else if ( config.fuzzy ) { else if ( config.fuzzy ) {
return fuzzy_token_match ( tokens, input, case_sensitive ); return fuzzy_token_match ( tokens, input, not_ascii, case_sensitive );
} }
return normal_token_match ( tokens, input, case_sensitive ); return normal_token_match ( tokens, input, not_ascii, case_sensitive );
} }
int execute_generator ( const char * cmd ) int execute_generator ( const char * cmd )
@ -515,3 +574,12 @@ void config_sanity_check ( )
config.menu_bg_alt = config.menu_bg; config.menu_bg_alt = config.menu_bg;
} }
} }
int is_not_ascii ( const char * str )
{
while (ASCII_NON_NULL(*str)) {
str++;
}
if (*str) return 1;
return 0;
}

View file

@ -190,6 +190,7 @@ typedef struct MenuState
unsigned int *selected_line; unsigned int *selected_line;
MenuReturn retv; MenuReturn retv;
char **lines; char **lines;
int *lines_not_ascii;
int line_height; int line_height;
}MenuState; }MenuState;
@ -252,6 +253,7 @@ static void menu_free_state ( MenuState *state )
g_free ( state->boxes ); g_free ( state->boxes );
g_free ( state->line_map ); g_free ( state->line_map );
g_free ( state->lines_not_ascii );
} }
/** /**
@ -669,7 +671,7 @@ static void menu_refilter ( MenuState *state )
// input changed // input changed
for ( unsigned int i = 0; i < state->num_lines; i++ ) { for ( unsigned int i = 0; i < state->num_lines; i++ ) {
int match = state->sw->token_match ( tokens, state->lines[i], config.case_sensitive, i, state->sw ); int match = state->sw->token_match ( tokens, state->lines[i], state->lines_not_ascii[i], config.case_sensitive, i, state->sw );
// If each token was matched, add it to list. // If each token was matched, add it to list.
if ( match ) { if ( match ) {
@ -688,6 +690,7 @@ static void menu_refilter ( MenuState *state )
} }
state->filtered_lines = state->num_lines; state->filtered_lines = state->num_lines;
} }
state->selected = MIN ( state->selected, state->filtered_lines - 1 ); state->selected = MIN ( state->selected, state->filtered_lines - 1 );
if ( config.auto_select == TRUE && state->filtered_lines == 1 && state->num_lines > 1 ) { if ( config.auto_select == TRUE && state->filtered_lines == 1 && state->num_lines > 1 ) {
@ -951,6 +954,14 @@ MenuReturn menu ( Switcher *sw, char **input, char *prompt, unsigned int *select
}; };
// Request the lines to show. // Request the lines to show.
state.lines = sw->get_data ( &( state.num_lines ), sw ); state.lines = sw->get_data ( &( state.num_lines ), sw );
state.lines_not_ascii = g_malloc0_n( state.num_lines, sizeof( int ) );
// find out which lines contain non-ascii codepoints, so we can be faster in some cases.
for (unsigned int line = 0; state.lines[line]; line++) {
state.lines_not_ascii[line] = is_not_ascii(state.lines[line]);
}
if ( next_pos ) { if ( next_pos ) {
*next_pos = *selected_line; *next_pos = *selected_line;
} }