lexer grammar InkBlotAntlrLexer; // classic "white space" and "new line" - ink's new line also allows for some whitespace at start WS: [ \t]+ ; NL: WS? '\r'? '\n' ; // one or more (potential whitespace followed by) newline(s); used e.g. for block sequencing MULTILINE_WS: NL+ ; // see InkParser_Content.cs, ContentTextNoEscape and ContentTextAllowingEcapeChar for the escape case // this works for the base case where we're not parsing a string, nor a choice CONTENT_TEXT_NO_ESCAPE_SIMPLE: ( // any character is valid, except for: // - {} ==> identifies embedded logic // - | ==> text alternatives, is forbidden even in non-logic text for some reason // - \n\r ==> a new line of content // - # ==> a tag // - \, < and - with exceptions (see below) ~[{}|\n\r\\#-< ] // any character can be escaped | '\\' [\u0000-\uFFFF] // TODO: is there a better way to say "any character"? // accept a - only if not followed by a > (->, a divert) | '-' { InputStream.LA(1) != '>' }? // same for threads (<-) and glue (<>) // | '<' { InputStream.LA(1) != '-' && InputStream.LA(1) != '>' }? )+ ; INLINE_LOGIC_START: '{' ; INLINE_LOGIC_END: '}' ; // All symbols for sequencing: either using the short-hand symbols (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#types-of-alternatives) // or using the multiline blocks (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#multiline-blocks) SEQUENCE_TYPE_SYMBOL_ANNOTATION: [!&~$ ] ; ONCE: 'once' WS? ':' ; CYCLE: 'cycle' WS? ':' ; SHUFFLE: 'shuffle' WS? ':' ; STOPPING: 'stopping' WS? ':' ; SHUFFLE_ONCE: 'shuffle' WS 'once' WS? ':' ; SHUFFLE_STOPPING: 'shuffle' WS 'stopping' WS? ':' ; THREAD_ARROW: '<-' ; DIVERT_ARROW: '->' ; TUNNEL_ARROW: '->->' ; // TODO: add all extra character ranges from InkParser_CharacterRanges (LatinBasic, LatinExtendedA, ...) IDENTIFIER: [A-Za-z0-9_]+;