mirror of
https://gitlab.com/sortix/sortix.git
synced 2023-02-13 20:55:38 -05:00
555 lines
14 KiB
C
555 lines
14 KiB
C
/*******************************************************************************
|
|
|
|
Copyright(C) Jonas 'Sortie' Termansen 2013, 2014, 2015.
|
|
|
|
This program is free software: you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the Free
|
|
Software Foundation, either version 3 of the License, or (at your option)
|
|
any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
highlight.c
|
|
Syntax highlighting.
|
|
|
|
*******************************************************************************/
|
|
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <wchar.h>
|
|
#include <wctype.h>
|
|
|
|
#include "editor.h"
|
|
#include "highlight.h"
|
|
|
|
enum language language_of_path(const char* path)
|
|
{
|
|
size_t path_length = strlen(path);
|
|
if ( 2 <= path_length &&
|
|
(!strcmp(path+path_length-2, ".c") ||
|
|
!strcmp(path+path_length-2, ".h")) )
|
|
return LANGUAGE_C_CXX;
|
|
if ( 4 <= path_length &&
|
|
(!strcmp(path+path_length-4, ".c++") ||
|
|
!strcmp(path+path_length-4, ".h++") ||
|
|
!strcmp(path+path_length-4, ".cxx") ||
|
|
!strcmp(path+path_length-4, ".hxx") ||
|
|
!strcmp(path+path_length-4, ".cpp") ||
|
|
!strcmp(path+path_length-4, ".hpp")) )
|
|
return LANGUAGE_C_CXX;
|
|
if ( (5 <= path_length && !strcmp(path+path_length-5, ".diff")) ||
|
|
(6 <= path_length && !strcmp(path+path_length-6, ".patch")) )
|
|
return LANGUAGE_DIFF;
|
|
return LANGUAGE_NONE;
|
|
}
|
|
|
|
static size_t recognize_constant(const wchar_t* string, size_t string_length)
|
|
{
|
|
bool binary = false;
|
|
bool hex = false;
|
|
size_t result = 0;
|
|
if ( result < string_length && string[result] == L'0' )
|
|
{
|
|
result++;
|
|
if ( result < string_length && (string[result] == L'x' ||
|
|
string[result] == L'X') )
|
|
{
|
|
result++;
|
|
hex = true;
|
|
}
|
|
if ( result < string_length && (string[result] == L'b' ||
|
|
string[result] == L'B') )
|
|
{
|
|
result++;
|
|
binary = true;
|
|
}
|
|
}
|
|
bool floating = false;
|
|
bool exponent = false;
|
|
while ( result < string_length )
|
|
{
|
|
if ( (binary && L'0' <= string[result] && string[result] <= L'1') ||
|
|
(!binary && L'0' <= string[result] && string[result] <= L'9') ||
|
|
(hex && L'a' <= string[result] && string[result] <= L'f') ||
|
|
(hex && L'A' <= string[result] && string[result] <= L'F') )
|
|
{
|
|
result++;
|
|
continue;
|
|
}
|
|
if ( string[result] == L'.' )
|
|
{
|
|
if ( binary || hex || floating )
|
|
return 0;
|
|
floating = true;
|
|
result++;
|
|
continue;
|
|
}
|
|
if ( !(hex || binary) &&
|
|
(string[result] == L'e' || string[result] == L'E') )
|
|
{
|
|
if ( !result )
|
|
return 0;
|
|
if ( exponent )
|
|
return 0;
|
|
floating = true;
|
|
result++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if ( result == ((hex || binary) ? 2 : 0) )
|
|
return 0;
|
|
if ( floating )
|
|
{
|
|
if ( result < string_length && (string[result] == L'l' ||
|
|
string[result] == L'L') )
|
|
result++;
|
|
else if ( result < string_length && (string[result] == L'f' ||
|
|
string[result] == L'F') )
|
|
result++;
|
|
}
|
|
else
|
|
{
|
|
if ( result < string_length && (string[result] == L'u' ||
|
|
string[result] == L'U') )
|
|
result++;
|
|
if ( result < string_length && (string[result] == L'l' ||
|
|
string[result] == L'L') )
|
|
result++;
|
|
if ( result < string_length && (string[result] == L'l' ||
|
|
string[result] == L'L') )
|
|
result++;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static void editor_colorize_c_cxx(struct editor* editor);
|
|
static void editor_colorize_diff(struct editor* editor);
|
|
|
|
void editor_colorize(struct editor* editor)
|
|
{
|
|
if ( editor->color_lines_length != editor->lines_used ||
|
|
editor->highlight_source == LANGUAGE_NONE )
|
|
{
|
|
for ( size_t i = 0; i < editor->color_lines_used; i++ )
|
|
free(editor->color_lines[i].data);
|
|
free(editor->color_lines);
|
|
editor->color_lines_used = 0;
|
|
editor->color_lines_length = 0;
|
|
editor->color_lines = NULL;
|
|
}
|
|
|
|
if ( editor->highlight_source == LANGUAGE_NONE )
|
|
return;
|
|
|
|
if ( !editor->color_lines )
|
|
{
|
|
editor->color_lines = (struct color_line*)
|
|
malloc(sizeof(struct color_line) * editor->lines_used);
|
|
if ( !editor->color_lines )
|
|
return;
|
|
editor->color_lines_used = editor->lines_used;
|
|
editor->color_lines_length = editor->lines_used;
|
|
for ( size_t i = 0; i < editor->lines_used; i++ )
|
|
editor->color_lines[i].data = NULL,
|
|
editor->color_lines[i].length = 0;
|
|
}
|
|
|
|
for ( size_t i = 0; i < editor->lines_used; i++ )
|
|
{
|
|
if ( editor->color_lines[i].length == editor->lines[i].used )
|
|
continue;
|
|
|
|
editor->color_lines[i].data = (uint8_t*) malloc(editor->lines[i].used);
|
|
if ( !editor->color_lines[i].data )
|
|
{
|
|
for ( size_t n = 0; n < i; i++ )
|
|
free(editor->color_lines[n].data);
|
|
free(editor->color_lines);
|
|
editor->color_lines_used = 0;
|
|
editor->color_lines_length = 0;
|
|
editor->color_lines = NULL;
|
|
return;
|
|
}
|
|
|
|
editor->color_lines[i].length = editor->lines[i].used;
|
|
}
|
|
|
|
switch ( editor->highlight_source )
|
|
{
|
|
case LANGUAGE_NONE: break;
|
|
case LANGUAGE_C_CXX: editor_colorize_c_cxx(editor); break;
|
|
case LANGUAGE_DIFF: editor_colorize_diff(editor); break;
|
|
}
|
|
}
|
|
|
|
static void editor_colorize_c_cxx(struct editor* editor)
|
|
{
|
|
enum
|
|
{
|
|
STATE_INIT,
|
|
STATE_LINE_COMMENT,
|
|
STATE_MULTI_LINE_COMMENT,
|
|
STATE_PREPROCESSOR,
|
|
STATE_PREPROCESSOR_VALUE,
|
|
STATE_SINGLE_QUOTE,
|
|
STATE_DOUBLE_QUOTE,
|
|
STATE_NUMBER,
|
|
STATE_KEYWORD,
|
|
STATE_TYPE,
|
|
} state = STATE_INIT, prev_state = STATE_INIT;
|
|
|
|
bool escaped = false;
|
|
size_t fixed_state = 0;
|
|
size_t multi_expiration = 0;
|
|
for ( size_t y = 0; y < editor->lines_used; y++ )
|
|
{
|
|
struct line* line = &editor->lines[y];
|
|
for ( size_t x = 0; x < line->used; x++ )
|
|
{
|
|
wchar_t pc = x ? line->data[x-1] : '\0';
|
|
wchar_t c = line->data[x];
|
|
wchar_t nc = x+1 < line->used ? line->data[x+1] : L'\0';
|
|
uint8_t color = 7;
|
|
|
|
// The character makes you leave this state.
|
|
|
|
if ( !fixed_state && (state == STATE_KEYWORD ||
|
|
state == STATE_TYPE ||
|
|
state == STATE_NUMBER ) )
|
|
state = STATE_INIT;
|
|
|
|
// The character makes you enter a new state.
|
|
if ( !fixed_state )
|
|
{
|
|
if ( state == STATE_INIT && c == L'#' )
|
|
state = STATE_PREPROCESSOR;
|
|
if ( state == STATE_PREPROCESSOR && c == '<' )
|
|
state = STATE_PREPROCESSOR_VALUE, fixed_state = 1;
|
|
if ( state == STATE_PREPROCESSOR && c == '"' )
|
|
state = STATE_PREPROCESSOR_VALUE, fixed_state = 1;
|
|
}
|
|
|
|
// TODO: Detect NULL as a value.
|
|
|
|
if ( !fixed_state && state == STATE_INIT &&
|
|
!(x && (iswalnum(pc) || pc == L'_')) )
|
|
{
|
|
size_t number_length = recognize_constant(line->data + x,
|
|
line->used - x);
|
|
if ( number_length )
|
|
{
|
|
state = STATE_NUMBER;
|
|
fixed_state = number_length;
|
|
}
|
|
}
|
|
|
|
if ( !fixed_state && state == STATE_INIT && c == L'\'' )
|
|
state = STATE_SINGLE_QUOTE, fixed_state = 1, escaped = false;
|
|
|
|
if ( !fixed_state && state == STATE_INIT && c == L'"' )
|
|
state = STATE_DOUBLE_QUOTE, fixed_state = 1, escaped = false;
|
|
|
|
if ( !fixed_state && (state == STATE_INIT ||
|
|
state == STATE_PREPROCESSOR) )
|
|
{
|
|
if ( c == L'/' && nc == L'/' )
|
|
state = STATE_LINE_COMMENT, fixed_state = 2;
|
|
else if ( c == L'/' && nc == L'*' )
|
|
{
|
|
prev_state = state;
|
|
multi_expiration = 0;
|
|
state = STATE_MULTI_LINE_COMMENT;
|
|
fixed_state = 2;
|
|
}
|
|
}
|
|
|
|
if ( !fixed_state && state == STATE_INIT )
|
|
{
|
|
const wchar_t* keywords[] =
|
|
{
|
|
L"alignas",
|
|
L"alignof",
|
|
L"and",
|
|
L"and_eq",
|
|
L"asm",
|
|
L"bitand",
|
|
L"bitor",
|
|
L"break",
|
|
L"case",
|
|
L"catch",
|
|
L"class",
|
|
L"compl",
|
|
L"const_cast",
|
|
L"constexpr",
|
|
L"continue",
|
|
L"decltype",
|
|
L"default",
|
|
L"delete",
|
|
L"do",
|
|
L"dynamic_cast",
|
|
L"else",
|
|
L"enum",
|
|
L"false",
|
|
L"final",
|
|
L"for",
|
|
L"friend",
|
|
L"goto",
|
|
L"if",
|
|
L"namespace",
|
|
L"new",
|
|
L"not",
|
|
L"not_eq",
|
|
L"nullptr",
|
|
L"operator",
|
|
L"or",
|
|
L"or_eq",
|
|
L"override",
|
|
L"private",
|
|
L"protected",
|
|
L"public",
|
|
L"reinterpret_cast",
|
|
L"return",
|
|
L"sizeof",
|
|
L"static_assert",
|
|
L"static_cast",
|
|
L"struct",
|
|
L"switch",
|
|
L"template",
|
|
L"this",
|
|
L"thread_local",
|
|
L"throw",
|
|
L"true",
|
|
L"try",
|
|
L"typedef",
|
|
L"typeid",
|
|
L"typename",
|
|
L"union",
|
|
L"using",
|
|
L"virtual",
|
|
L"while",
|
|
L"xor",
|
|
L"xor_eq",
|
|
};
|
|
|
|
bool cannot_be_keyword = x && (iswalnum(pc) || pc == L'_');
|
|
for ( size_t i = 0;
|
|
!cannot_be_keyword && i < sizeof(keywords) / sizeof(keywords[0]);
|
|
i++ )
|
|
{
|
|
const wchar_t* keyword = keywords[i];
|
|
if ( c != keyword[0] )
|
|
continue;
|
|
size_t keyword_length = wcslen(keyword);
|
|
if ( (x - line->used) < keyword_length )
|
|
continue;
|
|
if ( wcsncmp(line->data + x, keyword, keyword_length) != 0 )
|
|
continue;
|
|
|
|
if ( keyword_length < line->used - x )
|
|
{
|
|
wchar_t wc = line->data[x + keyword_length];
|
|
if ( iswalnum(wc) || wc == L'_' )
|
|
continue;
|
|
}
|
|
|
|
state = STATE_KEYWORD;
|
|
fixed_state = keyword_length;
|
|
}
|
|
}
|
|
|
|
if ( !fixed_state && state == STATE_INIT )
|
|
{
|
|
const wchar_t* types[] =
|
|
{
|
|
L"auto",
|
|
L"blkcnt_t",
|
|
L"blksize_t",
|
|
L"bool",
|
|
L"char",
|
|
L"char16_t",
|
|
L"char32_t",
|
|
L"clockid_t",
|
|
L"clock_t",
|
|
L"const",
|
|
L"dev_t",
|
|
L"double",
|
|
L"explicit",
|
|
L"extern",
|
|
L"FILE",
|
|
L"float",
|
|
L"fpos_t",
|
|
L"fsblkcnt_t",
|
|
L"fsfilcnt_t",
|
|
L"gid_t",
|
|
L"id_t",
|
|
L"inline",
|
|
L"ino_t",
|
|
L"int",
|
|
L"int16_t",
|
|
L"int32_t",
|
|
L"int64_t",
|
|
L"int8_t",
|
|
L"intmax_t",
|
|
L"intptr_t",
|
|
L"locale_t",
|
|
L"long",
|
|
L"mode_t",
|
|
L"mutable",
|
|
L"nlink_t",
|
|
L"noexcept",
|
|
L"off_t",
|
|
L"pid_t",
|
|
L"ptrdiff_t",
|
|
L"register",
|
|
L"restrict",
|
|
L"short",
|
|
L"signed",
|
|
L"size_t",
|
|
L"ssize_t",
|
|
L"static",
|
|
L"suseconds_t",
|
|
L"thread_local",
|
|
L"timer_t",
|
|
L"time_t",
|
|
L"trace_t",
|
|
L"uid_t",
|
|
L"uint16_t",
|
|
L"uint32_t",
|
|
L"uint64_t",
|
|
L"uint8_t",
|
|
L"uintmax_t",
|
|
L"uintptr_t",
|
|
L"unsigned",
|
|
L"useconds_t",
|
|
L"va_list",
|
|
L"void",
|
|
L"volatile",
|
|
L"wchar_t",
|
|
};
|
|
|
|
bool cannot_be_type = x && (iswalnum(pc) || pc == L'_');
|
|
for ( size_t i = 0;
|
|
!cannot_be_type && i < sizeof(types) / sizeof(types[0]);
|
|
i++ )
|
|
{
|
|
const wchar_t* type = types[i];
|
|
if ( c != type[0] )
|
|
continue;
|
|
size_t type_length = wcslen(type);
|
|
if ( (x - line->used) < type_length )
|
|
continue;
|
|
if ( wcsncmp(line->data + x, type, type_length) != 0 )
|
|
continue;
|
|
if ( (x - line->used) != type_length &&
|
|
(iswalnum(line->data[x+type_length]) ||
|
|
line->data[x+type_length] == L'_') )
|
|
continue;
|
|
state = STATE_TYPE;
|
|
fixed_state = type_length;
|
|
}
|
|
}
|
|
|
|
// The current state uses a non-default color.
|
|
|
|
if ( state == STATE_SINGLE_QUOTE ||
|
|
state == STATE_DOUBLE_QUOTE ||
|
|
state == STATE_NUMBER ||
|
|
state == STATE_PREPROCESSOR_VALUE )
|
|
color = 5;
|
|
|
|
if ( state == STATE_PREPROCESSOR )
|
|
color = 3;
|
|
|
|
if ( state == STATE_LINE_COMMENT ||
|
|
state == STATE_MULTI_LINE_COMMENT )
|
|
color = 6;
|
|
|
|
if ( state == STATE_KEYWORD )
|
|
color = 1;
|
|
|
|
if ( state == STATE_TYPE )
|
|
color = 2;
|
|
|
|
// The character is the last character in this state.
|
|
|
|
if ( !fixed_state )
|
|
{
|
|
if ( state == STATE_SINGLE_QUOTE && !escaped && c == L'\'' )
|
|
state = STATE_INIT, fixed_state = 1;
|
|
if ( state == STATE_DOUBLE_QUOTE && !escaped && c == L'"' )
|
|
state = STATE_INIT, fixed_state = 1;
|
|
if ( state == STATE_PREPROCESSOR_VALUE && c == '>' )
|
|
state = STATE_PREPROCESSOR;
|
|
if ( state == STATE_PREPROCESSOR_VALUE && c == '"' )
|
|
state = STATE_PREPROCESSOR;
|
|
}
|
|
|
|
if ( (state == STATE_SINGLE_QUOTE || state == STATE_DOUBLE_QUOTE) )
|
|
{
|
|
if ( !escaped && c == L'\\' )
|
|
escaped = true;
|
|
else if ( escaped )
|
|
escaped = false;
|
|
}
|
|
|
|
if ( !fixed_state && state == STATE_MULTI_LINE_COMMENT )
|
|
{
|
|
if ( multi_expiration == 1 )
|
|
state = prev_state, multi_expiration = 0;
|
|
else if ( c == L'*' && nc == L'/' )
|
|
multi_expiration = 1;
|
|
}
|
|
|
|
if ( state == STATE_PREPROCESSOR )
|
|
escaped = c == L'\\' && !nc;
|
|
|
|
editor->color_lines[y].data[x] = color;
|
|
|
|
if ( fixed_state )
|
|
fixed_state--;
|
|
}
|
|
|
|
if ( state == STATE_LINE_COMMENT ||
|
|
state == STATE_PREPROCESSOR ||
|
|
state == STATE_PREPROCESSOR_VALUE ||
|
|
state == STATE_SINGLE_QUOTE ||
|
|
state == STATE_DOUBLE_QUOTE )
|
|
{
|
|
if ( state == STATE_PREPROCESSOR && escaped )
|
|
escaped = false;
|
|
else
|
|
state = STATE_INIT;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void editor_colorize_diff(struct editor* editor)
|
|
{
|
|
for ( size_t y = 0; y < editor->lines_used; y++ )
|
|
{
|
|
struct line* line = &editor->lines[y];
|
|
uint8_t color = 7;
|
|
if ( line->used && line->data[0] == L'-' )
|
|
color = 1;
|
|
else if ( line->used && line->data[0] == L'+' )
|
|
color = 2;
|
|
else if ( line->used && line->data[0] == L'@' )
|
|
color = 6;
|
|
else if ( line->used && !iswblank(line->data[0]) )
|
|
color = 4 + 8;
|
|
for ( size_t x = 0; x < line->used; x++ )
|
|
{
|
|
editor->color_lines[y].data[x] = color;
|
|
}
|
|
}
|
|
}
|