From d661a515f0d4519b7da9d3cd4332313bdb9fd9b9 Mon Sep 17 00:00:00 2001 From: Dave Davenport Date: Mon, 4 Jan 2016 17:14:15 +0100 Subject: [PATCH] Make levenshtein sort utf8 aware and obey case sensitive setting. - Add tests. - Use Glibs unichar for comparison. --- include/helper.h | 1 + source/helper.c | 33 +++++++++++++++++++++++++++++++++ source/rofi.c | 30 +++++++----------------------- test/helper-test.c | 19 ++++++++++++++++++- 4 files changed, 59 insertions(+), 24 deletions(-) diff --git a/include/helper.h b/include/helper.h index fa4c338f..f21fd51a 100644 --- a/include/helper.h +++ b/include/helper.h @@ -154,4 +154,5 @@ void cmd_set_arguments ( int argc, char **argv ); * @returns path */ char *rofi_expand_path ( const char *input ); +unsigned int levenshtein ( const char *needle, const char *haystack ); #endif // ROFI_HELPER_H diff --git a/source/helper.c b/source/helper.c index f94ef6bc..cc5387eb 100644 --- a/source/helper.c +++ b/source/helper.c @@ -624,3 +624,36 @@ char *rofi_expand_path ( const char *input ) g_strfreev ( str ); return retv; } + +#define MIN3( a, b, c ) ( ( a ) < ( b ) ? ( ( a ) < ( c ) ? ( a ) : ( c ) ) : ( ( b ) < ( c ) ? ( b ) : ( c ) ) ) + +unsigned int levenshtein ( const char *needle, const char *haystack ) +{ + unsigned int x, y, lastdiag, olddiag; + size_t needlelen = g_utf8_strlen ( needle, -1 ); + size_t haystacklen = g_utf8_strlen ( haystack, -1 ); + unsigned int column[needlelen + 1]; + for ( y = 0; y <= needlelen; y++ ) { + column[y] = y; + } + for ( x = 1; x <= haystacklen; x++ ) { + const char *needles = needle; + column[0] = x; + gunichar haystackc = g_utf8_get_char ( haystack ); + if ( !config.case_sensitive ) { + haystackc = g_unichar_tolower ( haystackc ); + } + for ( y = 1, lastdiag = x - 1; y <= needlelen; y++ ) { + gunichar needlec = g_utf8_get_char ( needles ); + if ( !config.case_sensitive ) { + needlec = g_unichar_tolower ( needlec ); + } + olddiag = column[y]; + column[y] = MIN3 ( column[y] + 1, column[y - 1] + 1, lastdiag + ( needlec == haystackc ? 0 : 1 ) ); + lastdiag = olddiag; + needles = g_utf8_next_char ( needles ); + } + haystack = g_utf8_next_char ( haystack ); + } + return column[needlelen]; +} diff --git a/source/rofi.c b/source/rofi.c index 0714b1cd..a95ddbb6 100644 --- a/source/rofi.c +++ b/source/rofi.c @@ -173,28 +173,6 @@ static int lev_sort ( const void *p1, const void *p2, void *arg ) return distances[*a] - distances[*b]; } -#define MIN3( a, b, c ) ( ( a ) < ( b ) ? ( ( a ) < ( c ) ? ( a ) : ( c ) ) : ( ( b ) < ( c ) ? ( b ) : ( c ) ) ) - -static unsigned int levenshtein ( const char *s1, const char *s2 ) -{ - unsigned int x, y, lastdiag, olddiag; - size_t s1len = strlen ( s1 ); - size_t s2len = strlen ( s2 ); - unsigned int column[s1len + 1]; - for ( y = 0; y <= s1len; y++ ) { - column[y] = y; - } - for ( x = 1; x <= s2len; x++ ) { - column[0] = x; - for ( y = 1, lastdiag = x - 1; y <= s1len; y++ ) { - olddiag = column[y]; - column[y] = MIN3 ( column[y] + 1, column[y - 1] + 1, lastdiag + ( s1[y - 1] == s2[x - 1] ? 0 : 1 ) ); - lastdiag = olddiag; - } - } - return column[s1len]; -} - // State of the menu. typedef struct MenuState @@ -861,7 +839,13 @@ static void filter_elements ( thread_state *t, G_GNUC_UNUSED gpointer user_data t->state->line_map[t->start + t->count] = i; if ( config.levenshtein_sort ) { // This is inefficient, need to fix it. - char * str = t->state->sw->mgrv ( t->state->sw, i, &st, TRUE ); + char * str = NULL; + if ( t->state->sw->get_completion ) { + str = t->state->sw->get_completion ( t->state->sw, i ); + } + else{ + str = t->state->sw->mgrv ( t->state->sw, i, &st, TRUE ); + } t->state->distance[i] = levenshtein ( t->state->text->text, str ); g_free ( str ); } diff --git a/test/helper-test.c b/test/helper-test.c index a5b98903..1e4d9f55 100644 --- a/test/helper-test.c +++ b/test/helper-test.c @@ -7,10 +7,18 @@ static int test = 0; -#define TASSERT( a ) { \ +#define TASSERT( a ) { \ assert ( a ); \ printf ( "Test %i passed (%s)\n", ++test, # a ); \ } +#define TASSERTE( a, b ) { \ + if ( ( a ) == ( b ) ) { \ + printf ( "Test %i passed (%s == %s) (%u == %u)\n", ++test, # a, # b, a, b ); \ + }else { \ + printf ( "Test %i failed (%s == %s) (%u != %u)\n", ++test, # a, # b, a, b ); \ + abort ( ); \ + } \ +} void error_dialog ( const char *msg, G_GNUC_UNUSED int markup ) { @@ -127,4 +135,13 @@ int main ( int argc, char ** argv ) TASSERT ( retv[2] && strcmp ( retv[2], "bEp" ) == 0 ); TASSERT ( retv[3] && strcmp ( retv[3], "bEE" ) == 0 ); tokenize_free ( retv ); + + TASSERT ( levenshtein ( "aap", "aap" ) == 0 ); + TASSERT ( levenshtein ( "aap", "aap " ) == 1 ); + TASSERT ( levenshtein ( "aap ", "aap" ) == 1 ); + TASSERTE ( levenshtein ( "aap", "aap noot" ), 5 ); + TASSERTE ( levenshtein ( "aap", "noot aap" ), 5 ); + TASSERTE ( levenshtein ( "aap", "noot aap mies" ), 10 ); + TASSERTE ( levenshtein ( "noot aap mies", "aap" ), 10 ); + TASSERTE ( levenshtein ( "otp", "noot aap" ), 5 ); }