inkblot/InkBlot/InkBlotAntlrLexer.g4

lexer grammar InkBlotAntlrLexer;

THREAD_ARROW: '<-' ;
DIVERT_ARROW: '->' ;
TUNNEL_ARROW: '->->' ;

IDENTIFIER: [a-zA-Z0-9_]+;

//// classic "white space" and "new line" - ink's new line also allows for some whitespace at start
WS: [ \t]+ ;
NL: WS? '\r'? '\n' ;
//
//// one or more (potential whitespace followed by) newline(s); used e.g. for block sequencing
//MULTILINE_WS: NL+ ;
//
//// see InkParser_Content.cs, ContentTextNoEscape and ContentTextAllowingEcapeChar for the escape case
//// this works for the base case where we're not parsing a string, nor a choice
//// We ALSO have to remove all other tokens from here, otherwise this will gobble them all up, since it will become
//// the longest-matching token
//CONTENT_TEXT_NO_ESCAPE_NO_IDENT_SIMPLE:
//    (
//      // any character is valid, except for:
//      // - {}                   ==> identifies embedded logic
//      // - |                    ==> text alternatives, is forbidden even in non-logic text for some reason
//      // - \n\r                 ==> a new line of content
//      // - #                    ==> a tag
//      // - \, < and - with exceptions (see below)
//      // - space and \t         ==> these are used to parse spaces
//      // a-z, A-Z, 0-9 and _    ==> these are used to parse identifiers TODO: add missing characters
//      // !&$                   ==> these are used by as sequence type symbol annotation; "~" too but that is special
//      ~[{}|\n\r\\#\-< \ta-zA-Z0-9_!&$]
//      // any character can be escaped
//    | '\\' [\u0000-\uFFFF] // TODO: is there a better way to say "any character"?
//      // accept a - only if not followed by a > (->, a divert)
//    | '-' { InputStream.LA(1) != '>' }?
//      // same for threads (<-) and glue (<>)
//    | '<' { InputStream.LA(1) != '-' && InputStream.LA(1) != '>' }?
//    )+ ;
//
//INLINE_LOGIC_START: '{' ;
//INLINE_LOGIC_END: '}' ;
//
//// All symbols for sequencing: either using the short-hand symbols (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#types-of-alternatives)
//// or using the multiline blocks (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#multiline-blocks)
//SEQUENCE_TYPE_SYMBOL_ANNOTATION: [!&~$ ] ;
//
//THREAD_ARROW: '<-' ;
//DIVERT_ARROW: '->' ;
//TUNNEL_ARROW: '->->' ;
//
//// TODO: add all extra character ranges from InkParser_CharacterRanges (LatinBasic, LatinExtendedA, ...), and also remove them from CONTENT_TEXT_NO_ESCAPE_SIMPLE
//IDENTIFIER: [a-zA-Z0-9_]+;