From 4452b08288c25fa833e22c97b57045ec5888b547 Mon Sep 17 00:00:00 2001 From: Dave Davenport Date: Wed, 11 Jan 2017 09:20:19 +0100 Subject: [PATCH] Move fzf matcher into helper --- include/helper.h | 14 ++++ source/helper.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++ source/view.c | 172 +--------------------------------------------- 3 files changed, 190 insertions(+), 171 deletions(-) diff --git a/include/helper.h b/include/helper.h index 3398c0ef..d580c4df 100644 --- a/include/helper.h +++ b/include/helper.h @@ -206,5 +206,19 @@ char * rofi_latin_to_utf8_strdup ( const char *input, gssize length ); * @returns the updated retv list. */ PangoAttrList *token_match_get_pango_attr ( ThemeHighlight th, GRegex **tokens, const char *input, PangoAttrList *retv ); + + + +/** + * @param pattern The user input to match against. + * @param plen Pattern length. + * @param str The input to match against pattern. + * @param slen Lenght of str. + * + * FZF like fuzzy sorting algorithm. + * + * @returns the sorting weight. + */ +int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen ); /*@}*/ #endif // ROFI_HELPER_H diff --git a/source/helper.c b/source/helper.c index cb30a22d..11073d66 100644 --- a/source/helper.c +++ b/source/helper.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -720,3 +721,177 @@ char * rofi_force_utf8 ( gchar *start, ssize_t length ) return g_string_free ( string, FALSE ); } + +/**** + * FZF like scorer + */ + +/** Max length of input to score. */ +#define FUZZY_SCORER_MAX_LENGTH 256 +/** minimum score */ +#define MIN_SCORE ( INT_MIN / 2 ) +/** Leading gap score */ +#define LEADING_GAP_SCORE -4 +/** gap score */ +#define GAP_SCORE -5 +/** start of word score */ +#define WORD_START_SCORE 50 +/** non-word score */ +#define NON_WORD_SCORE 40 +/** CamelCase score */ +#define CAMEL_SCORE ( WORD_START_SCORE + GAP_SCORE - 1 ) +/** Consecutive score */ +#define CONSECUTIVE_SCORE ( WORD_START_SCORE + GAP_SCORE ) +/** non-start multiplier */ +#define PATTERN_NON_START_MULTIPLIER 1 +/** start multiplier */ +#define PATTERN_START_MULTIPLIER 2 + +/** + * Character classification. + */ +enum CharClass +{ + /* Lower case */ + LOWER, + /* Upper case */ + UPPER, + /* Number */ + DIGIT, + /* non word character */ + NON_WORD +}; + +/** + * @param c The character to determine class of + * + * @returns the class of the character c. + */ +static enum CharClass rofi_scorer_get_character_class ( gunichar c ) +{ + if ( g_unichar_islower ( c ) ) { + return LOWER; + } + if ( g_unichar_isupper ( c ) ) { + return UPPER; + } + if ( g_unichar_isdigit ( c ) ) { + return DIGIT; + } + return NON_WORD; +} + +/** + * @param prev The previous character. + * @param curr The current character + * + * Scrore the transition. + * + * @returns score of the transition. + */ +static int rofi_scorer_get_score_for ( enum CharClass prev, enum CharClass curr ) +{ + if ( prev == NON_WORD && curr != NON_WORD ) { + return WORD_START_SCORE; + } + if ( ( prev == LOWER && curr == UPPER ) || + ( prev != DIGIT && curr == DIGIT ) ) { + return CAMEL_SCORE; + } + if ( curr == NON_WORD ) { + return NON_WORD_SCORE; + } + return 0; +} + +/** + * @param pattern The user input to match against. + * @param plen Pattern length. + * @param str The input to match against pattern. + * @param slen Lenght of str. + * + * rofi_scorer_fuzzy_evaluate implements a global sequence alignment algorithm to find the maximum accumulated score by + * aligning `pattern` to `str`. It applies when `pattern` is a subsequence of `str`. + * + * Scoring criteria + * - Prefer matches at the start of a word, or the start of subwords in CamelCase/camelCase/camel123 words. See WORD_START_SCORE/CAMEL_SCORE. + * - Non-word characters matter. See NON_WORD_SCORE. + * - The first characters of words of `pattern` receive bonus because they usually have more significance than the rest. + * See PATTERN_START_MULTIPLIER/PATTERN_NON_START_MULTIPLIER. + * - Superfluous characters in `str` will reduce the score (gap penalty). See GAP_SCORE. + * - Prefer early occurrence of the first character. See LEADING_GAP_SCORE/GAP_SCORE. + * + * The recurrence of the dynamic programming: + * dp[i][j]: maximum accumulated score by aligning pattern[0..i] to str[0..j] + * dp[0][j] = leading_gap_penalty(0, j) + score[j] + * dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] + gap_penalty(k+1, j) + score[j] : k < j)) + * + * The first dimension can be suppressed since we do not need a matching scheme, which reduces the space complexity from + * O(N*M) to O(M) + * + * @returns the sorting weight. + */ +int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen ) +{ + if ( slen > FUZZY_SCORER_MAX_LENGTH ) { + return -MIN_SCORE; + } + if ( plen == 5 ) { + plen = plen; + } + glong pi, si; + // whether we are aligning the first character of pattern + gboolean pfirst = TRUE; + // whether the start of a word in pattern + gboolean pstart = TRUE; + // score for each position + int *score = g_malloc_n ( slen, sizeof ( int ) ); + // dp[i]: maximum value by aligning pattern[0..pi] to str[0..si] + int *dp = g_malloc_n ( slen, sizeof ( int ) ); + // uleft: value of the upper left cell; ulefts: maximum value of uleft and cells on the left. The arbitrary initial + // values suppress warnings. + int uleft = 0, ulefts = 0, left, lefts; + const gchar *pit = pattern, *sit; + enum CharClass prev = NON_WORD, cur; + for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) { + cur = rofi_scorer_get_character_class ( g_utf8_get_char ( sit ) ); + score[si] = rofi_scorer_get_score_for ( prev, cur ); + prev = cur; + dp[si] = MIN_SCORE; + } + for ( pi = 0; pi < plen; pi++, pit = g_utf8_next_char ( pit ) ) { + gunichar pc = g_utf8_get_char ( pit ), sc; + if ( g_unichar_isspace ( pc ) ) { + pstart = TRUE; + continue; + } + lefts = MIN_SCORE; + for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) { + left = dp[si]; + lefts = MAX ( lefts + GAP_SCORE, left ); + sc = g_utf8_get_char ( sit ); + if ( config.case_sensitive + ? pc == sc + : g_unichar_tolower ( pc ) == g_unichar_tolower ( sc ) ) { + int t = score[si] * ( pstart ? PATTERN_START_MULTIPLIER : PATTERN_NON_START_MULTIPLIER ); + dp[si] = pfirst + ? LEADING_GAP_SCORE * si + t + : MAX ( uleft + CONSECUTIVE_SCORE, ulefts + t ); + } + else { + dp[si] = MIN_SCORE; + } + uleft = left; + ulefts = lefts; + } + pfirst = pstart = FALSE; + } + lefts = MIN_SCORE; + for ( si = 0; si < slen; si++ ) { + lefts = MAX ( lefts + GAP_SCORE, dp[si] ); + } + g_free ( score ); + g_free ( dp ); + return -lefts; +} + diff --git a/source/view.c b/source/view.c index 075bd4c7..8b60dabc 100644 --- a/source/view.c +++ b/source/view.c @@ -25,7 +25,6 @@ */ #include -#include #include #include #include @@ -64,26 +63,6 @@ /** The Rofi View log domain */ #define LOG_DOMAIN "View" -/** Max length of input to score. */ -#define FUZZY_SCORER_MAX_LENGTH 256 -/** minimum score */ -#define MIN_SCORE ( INT_MIN / 2 ) -/** Leading gap score */ -#define LEADING_GAP_SCORE -4 -/** gap score */ -#define GAP_SCORE -5 -/** start of word score */ -#define WORD_START_SCORE 50 -/** non-word score */ -#define NON_WORD_SCORE 40 -/** CamelCase score */ -#define CAMEL_SCORE ( WORD_START_SCORE + GAP_SCORE - 1 ) -/** Consecutive score */ -#define CONSECUTIVE_SCORE ( WORD_START_SCORE + GAP_SCORE ) -/** non-start multiplier */ -#define PATTERN_NON_START_MULTIPLIER 1 -/** start multiplier */ -#define PATTERN_START_MULTIPLIER 2 #include "xcb.h" /** @@ -562,154 +541,6 @@ static void rofi_view_call_thread ( gpointer data, gpointer user_data ) g_mutex_unlock ( t->mutex ); } -/** - * Character classification. - */ -enum CharClass -{ - /* Lower case */ - LOWER, - /* Upper case */ - UPPER, - /* Number */ - DIGIT, - /* non word character */ - NON_WORD -}; - -/** - * @param c The character to determine class of - * - * @returns the class of the character c. - */ -static enum CharClass rofi_scorer_get_character_class ( gunichar c ) -{ - if ( g_unichar_islower ( c ) ) { - return LOWER; - } - if ( g_unichar_isupper ( c ) ) { - return UPPER; - } - if ( g_unichar_isdigit ( c ) ) { - return DIGIT; - } - return NON_WORD; -} - -/** - * @param prev The previous character. - * @param curr The current character - * - * Scrore the transition. - * - * @returns score of the transition. - */ -static int rofi_scorer_get_score_for ( enum CharClass prev, enum CharClass curr ) -{ - if ( prev == NON_WORD && curr != NON_WORD ) { - return WORD_START_SCORE; - } - if ( ( prev == LOWER && curr == UPPER ) || - ( prev != DIGIT && curr == DIGIT ) ) { - return CAMEL_SCORE; - } - if ( curr == NON_WORD ) { - return NON_WORD_SCORE; - } - return 0; -} - -/** - * @param pattern The user input to match against. - * @param plen Pattern length. - * @param str The input to match against pattern. - * @param slen Lenght of str. - * - * rofi_scorer_fuzzy_evaluate implements a global sequence alignment algorithm to find the maximum accumulated score by - * aligning `pattern` to `str`. It applies when `pattern` is a subsequence of `str`. - * - * Scoring criteria - * - Prefer matches at the start of a word, or the start of subwords in CamelCase/camelCase/camel123 words. See WORD_START_SCORE/CAMEL_SCORE. - * - Non-word characters matter. See NON_WORD_SCORE. - * - The first characters of words of `pattern` receive bonus because they usually have more significance than the rest. - * See PATTERN_START_MULTIPLIER/PATTERN_NON_START_MULTIPLIER. - * - Superfluous characters in `str` will reduce the score (gap penalty). See GAP_SCORE. - * - Prefer early occurrence of the first character. See LEADING_GAP_SCORE/GAP_SCORE. - * - * The recurrence of the dynamic programming: - * dp[i][j]: maximum accumulated score by aligning pattern[0..i] to str[0..j] - * dp[0][j] = leading_gap_penalty(0, j) + score[j] - * dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] + gap_penalty(k+1, j) + score[j] : k < j)) - * - * The first dimension can be suppressed since we do not need a matching scheme, which reduces the space complexity from - * O(N*M) to O(M) - * - * @returns the sorting weight. - */ -static int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen ) -{ - if ( slen > FUZZY_SCORER_MAX_LENGTH ) { - return MIN_SCORE; - } - if ( plen == 5 ) { - plen = plen; - } - glong pi, si; - // whether we are aligning the first character of pattern - gboolean pfirst = TRUE; - // whether the start of a word in pattern - gboolean pstart = TRUE; - // score for each position - int *score = g_malloc_n ( slen, sizeof ( int ) ); - // dp[i]: maximum value by aligning pattern[0..pi] to str[0..si] - int *dp = g_malloc_n ( slen, sizeof ( int ) ); - // uleft: value of the upper left cell; ulefts: maximum value of uleft and cells on the left. The arbitrary initial - // values suppress warnings. - int uleft = 0, ulefts = 0, left, lefts; - const gchar *pit = pattern, *sit; - enum CharClass prev = NON_WORD, cur; - for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) { - cur = rofi_scorer_get_character_class ( g_utf8_get_char ( sit ) ); - score[si] = rofi_scorer_get_score_for ( prev, cur ); - prev = cur; - dp[si] = MIN_SCORE; - } - for ( pi = 0; pi < plen; pi++, pit = g_utf8_next_char ( pit ) ) { - gunichar pc = g_utf8_get_char ( pit ), sc; - if ( g_unichar_isspace ( pc ) ) { - pstart = TRUE; - continue; - } - lefts = MIN_SCORE; - for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) { - left = dp[si]; - lefts = MAX ( lefts + GAP_SCORE, left ); - sc = g_utf8_get_char ( sit ); - if ( config.case_sensitive - ? pc == sc - : g_unichar_tolower ( pc ) == g_unichar_tolower ( sc ) ) { - int t = score[si] * ( pstart ? PATTERN_START_MULTIPLIER : PATTERN_NON_START_MULTIPLIER ); - dp[si] = pfirst - ? LEADING_GAP_SCORE * si + t - : MAX ( uleft + CONSECUTIVE_SCORE, ulefts + t ); - } - else { - dp[si] = MIN_SCORE; - } - uleft = left; - ulefts = lefts; - } - pfirst = pstart = FALSE; - } - lefts = MIN_SCORE; - for ( si = 0; si < slen; si++ ) { - lefts = MAX ( lefts + GAP_SCORE, dp[si] ); - } - g_free ( score ); - g_free ( dp ); - return lefts; -} - static void filter_elements ( thread_state *t, G_GNUC_UNUSED gpointer user_data ) { char *pattern = NULL; @@ -726,7 +557,7 @@ static void filter_elements ( thread_state *t, G_GNUC_UNUSED gpointer user_data if ( config.matching_method == MM_FUZZY ) { char *str = mode_get_completion ( t->state->sw, i ); glong slen = g_utf8_strlen ( str, -1 ); - t->state->distance[i] = -rofi_scorer_fuzzy_evaluate ( pattern, plen, str, slen ); + t->state->distance[i] = rofi_scorer_fuzzy_evaluate ( pattern, plen, str, slen ); g_free ( str ); } else if ( config.levenshtein_sort ) { @@ -742,7 +573,6 @@ static void filter_elements ( thread_state *t, G_GNUC_UNUSED gpointer user_data g_free ( pattern ); } } - static void rofi_view_setup_fake_transparency ( const char const *fake_background ) { if ( CacheState.fake_bg == NULL ) {