diff --git a/compare50/_data.py b/compare50/_data.py index c9b17d7..2d0dbd9 100644 --- a/compare50/_data.py +++ b/compare50/_data.py @@ -1,12 +1,12 @@ import abc from collections.abc import Mapping, Sequence -import os import pathlib import numbers import attr import pygments import pygments.lexers +from .lexers import WordLexer __all__ = ["Pass", "Comparator", "File", "Submission", @@ -191,14 +191,18 @@ def lexer(self): # get lexer for this file type try: - lexer = pygments.lexers.get_lexer_for_filename(self.name.name) + if ext == ".txt": + lexer = WordLexer() + else: + lexer = pygments.lexers.get_lexer_for_filename(self.name.name) + self._lexer_cache[ext] = lexer return lexer except pygments.util.ClassNotFound: try: return pygments.lexers.guess_lexer(self.read()) except pygments.util.ClassNotFound: - return pygments.lexers.special.TextLexer() + return WordLexer() @classmethod def get(cls, id): diff --git a/compare50/lexers.py b/compare50/lexers.py new file mode 100644 index 0000000..fadf605 --- /dev/null +++ b/compare50/lexers.py @@ -0,0 +1,16 @@ +from pygments.lexer import RegexLexer +from pygments.token import Text, Name + +class WordLexer(RegexLexer): + """Custom compare50 lexer that creates a token based on each 'word'.""" + name = "WordLexer" + aliases = ["word"] + filenames = ["*.txt"] + + tokens = { + "root": [ + (r"\s+", Text), # whitespace + (r"\w+", Name), # word (alphanumeric) + (r"\W", Text), # punctuation or other + ] + }