Make levenshtein sort utf8 aware and obey case sensitive setting.

- Add tests.
        - Use Glibs unichar for comparison.
This commit is contained in:
Dave Davenport 2016-01-04 17:14:15 +01:00
parent 017f9e47ed
commit d661a515f0
4 changed files with 59 additions and 24 deletions

View File

@ -154,4 +154,5 @@ void cmd_set_arguments ( int argc, char **argv );
* @returns path
*/
char *rofi_expand_path ( const char *input );
unsigned int levenshtein ( const char *needle, const char *haystack );
#endif // ROFI_HELPER_H

View File

@ -624,3 +624,36 @@ char *rofi_expand_path ( const char *input )
g_strfreev ( str );
return retv;
}
#define MIN3( a, b, c ) ( ( a ) < ( b ) ? ( ( a ) < ( c ) ? ( a ) : ( c ) ) : ( ( b ) < ( c ) ? ( b ) : ( c ) ) )
unsigned int levenshtein ( const char *needle, const char *haystack )
{
unsigned int x, y, lastdiag, olddiag;
size_t needlelen = g_utf8_strlen ( needle, -1 );
size_t haystacklen = g_utf8_strlen ( haystack, -1 );
unsigned int column[needlelen + 1];
for ( y = 0; y <= needlelen; y++ ) {
column[y] = y;
}
for ( x = 1; x <= haystacklen; x++ ) {
const char *needles = needle;
column[0] = x;
gunichar haystackc = g_utf8_get_char ( haystack );
if ( !config.case_sensitive ) {
haystackc = g_unichar_tolower ( haystackc );
}
for ( y = 1, lastdiag = x - 1; y <= needlelen; y++ ) {
gunichar needlec = g_utf8_get_char ( needles );
if ( !config.case_sensitive ) {
needlec = g_unichar_tolower ( needlec );
}
olddiag = column[y];
column[y] = MIN3 ( column[y] + 1, column[y - 1] + 1, lastdiag + ( needlec == haystackc ? 0 : 1 ) );
lastdiag = olddiag;
needles = g_utf8_next_char ( needles );
}
haystack = g_utf8_next_char ( haystack );
}
return column[needlelen];
}

View File

@ -173,28 +173,6 @@ static int lev_sort ( const void *p1, const void *p2, void *arg )
return distances[*a] - distances[*b];
}
#define MIN3( a, b, c ) ( ( a ) < ( b ) ? ( ( a ) < ( c ) ? ( a ) : ( c ) ) : ( ( b ) < ( c ) ? ( b ) : ( c ) ) )
static unsigned int levenshtein ( const char *s1, const char *s2 )
{
unsigned int x, y, lastdiag, olddiag;
size_t s1len = strlen ( s1 );
size_t s2len = strlen ( s2 );
unsigned int column[s1len + 1];
for ( y = 0; y <= s1len; y++ ) {
column[y] = y;
}
for ( x = 1; x <= s2len; x++ ) {
column[0] = x;
for ( y = 1, lastdiag = x - 1; y <= s1len; y++ ) {
olddiag = column[y];
column[y] = MIN3 ( column[y] + 1, column[y - 1] + 1, lastdiag + ( s1[y - 1] == s2[x - 1] ? 0 : 1 ) );
lastdiag = olddiag;
}
}
return column[s1len];
}
// State of the menu.
typedef struct MenuState
@ -861,7 +839,13 @@ static void filter_elements ( thread_state *t, G_GNUC_UNUSED gpointer user_data
t->state->line_map[t->start + t->count] = i;
if ( config.levenshtein_sort ) {
// This is inefficient, need to fix it.
char * str = t->state->sw->mgrv ( t->state->sw, i, &st, TRUE );
char * str = NULL;
if ( t->state->sw->get_completion ) {
str = t->state->sw->get_completion ( t->state->sw, i );
}
else{
str = t->state->sw->mgrv ( t->state->sw, i, &st, TRUE );
}
t->state->distance[i] = levenshtein ( t->state->text->text, str );
g_free ( str );
}

View File

@ -7,10 +7,18 @@
static int test = 0;
#define TASSERT( a ) { \
#define TASSERT( a ) { \
assert ( a ); \
printf ( "Test %i passed (%s)\n", ++test, # a ); \
}
#define TASSERTE( a, b ) { \
if ( ( a ) == ( b ) ) { \
printf ( "Test %i passed (%s == %s) (%u == %u)\n", ++test, # a, # b, a, b ); \
}else { \
printf ( "Test %i failed (%s == %s) (%u != %u)\n", ++test, # a, # b, a, b ); \
abort ( ); \
} \
}
void error_dialog ( const char *msg, G_GNUC_UNUSED int markup )
{
@ -127,4 +135,13 @@ int main ( int argc, char ** argv )
TASSERT ( retv[2] && strcmp ( retv[2], "bEp" ) == 0 );
TASSERT ( retv[3] && strcmp ( retv[3], "bEE" ) == 0 );
tokenize_free ( retv );
TASSERT ( levenshtein ( "aap", "aap" ) == 0 );
TASSERT ( levenshtein ( "aap", "aap " ) == 1 );
TASSERT ( levenshtein ( "aap ", "aap" ) == 1 );
TASSERTE ( levenshtein ( "aap", "aap noot" ), 5 );
TASSERTE ( levenshtein ( "aap", "noot aap" ), 5 );
TASSERTE ( levenshtein ( "aap", "noot aap mies" ), 10 );
TASSERTE ( levenshtein ( "noot aap mies", "aap" ), 10 );
TASSERTE ( levenshtein ( "otp", "noot aap" ), 5 );
}