#lexer #dfa #regex

nightly macro enum-lexer-macro

A proc_macro lexer generator. using enum-like syntax.

1 unstable release

0.1.0 Aug 5, 2020

#25 in #dfa


Used in enum-lexer

MIT license

58KB
1.5K SLoC

Enum Lexer

A proc_macro lexer generator. using enum-like syntax.

Write a lexer

#![feature(exclusive_range_pattern)]
use enum_lexer::enum_lexer;

enum_lexer! {
    #[derive(Debug, Eq, PartialEq)]
    enum lexer {
        Ident(String): {
            r"[A-Za-z_][A-Za-z_0-9]*" => Ident(text),
        }
        LitInt(usize): {
            r"[0-9][0-9]*" =>
                LitInt(text.parse::<usize>()?), // default error type is Box<dyn Error>
        }
        Op(char): {
            r"\+" => Op('+'),
            r"\-" => Op('-'),
        }
        Def: r"def",
        Let: r"let",
        Group(Vec<Token>, char) : {
            r"\(" => {
                Group(read_group()?, '(')       // construct a token tree within '(', ')'.
            }
            r"\)" => { panic!("error") }
        }
        COMMENTS: {                             // COMMENTS will be ignored
            r"//.*?\n" => !,
            r"/\*.*?\*/" => !,
        }
    }
}

This will generate struct and enum like:

mod lexer {
     #[derive(Debug, Eq, PartialEq)]
     pub struct Token {
         pub inner: TokenInner,
         pub span: Span,
     }
     
     #[derive(Debug, Eq, PartialEq)]
     pub enum TokenInner {
         Ident(String),
         LitInt(usize),
         Op(char),
         Def,
         Let,
         Group(Vec<Token>, char),
     }
     pub struct TokenIterator{...}
     pub type LexError = Box<&dyn Error>;
     pub fn parse_str(src: &str) -> Result<TokenIterator>;
}

Usage

let vec: lexer::Result<Vec<_>> =
    lexer::parse_str(r#"
        let a = 10 + (1 + 2) // alpha
    "#).unwrap().collect();

println!("{:?}", vec);

Customizing Error Types

enum_lexer! {
    type LexError = MyError;
    enum lexer {
        LitStr: "\".*?\""
    }
}

Dfa Generation

enum_lexer_test will write generated DFA to a dfa.dot file.

Dependencies

~3–11MB
~117K SLoC