lexer grammar InkBlotAntlrLexer; THREAD_ARROW: '<-' ; DIVERT_ARROW: '->' ; TUNNEL_ARROW: '->->' ; IDENTIFIER: [a-zA-Z0-9_]+; //// classic "white space" and "new line" - ink's new line also allows for some whitespace at start WS: [ \t]+ ; NL: WS? '\r'? '\n' ; // //// one or more (potential whitespace followed by) newline(s); used e.g. for block sequencing //MULTILINE_WS: NL+ ; // //// see InkParser_Content.cs, ContentTextNoEscape and ContentTextAllowingEcapeChar for the escape case //// this works for the base case where we're not parsing a string, nor a choice //// We ALSO have to remove all other tokens from here, otherwise this will gobble them all up, since it will become //// the longest-matching token //CONTENT_TEXT_NO_ESCAPE_NO_IDENT_SIMPLE: // ( // // any character is valid, except for: // // - {} ==> identifies embedded logic // // - | ==> text alternatives, is forbidden even in non-logic text for some reason // // - \n\r ==> a new line of content // // - # ==> a tag // // - \, < and - with exceptions (see below) // // - space and \t ==> these are used to parse spaces // // a-z, A-Z, 0-9 and _ ==> these are used to parse identifiers TODO: add missing characters // // !&$ ==> these are used by as sequence type symbol annotation; "~" too but that is special // ~[{}|\n\r\\#\-< \ta-zA-Z0-9_!&$] // // any character can be escaped // | '\\' [\u0000-\uFFFF] // TODO: is there a better way to say "any character"? // // accept a - only if not followed by a > (->, a divert) // | '-' { InputStream.LA(1) != '>' }? // // same for threads (<-) and glue (<>) // | '<' { InputStream.LA(1) != '-' && InputStream.LA(1) != '>' }? // )+ ; // //INLINE_LOGIC_START: '{' ; //INLINE_LOGIC_END: '}' ; // //// All symbols for sequencing: either using the short-hand symbols (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#types-of-alternatives) //// or using the multiline blocks (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#multiline-blocks) //SEQUENCE_TYPE_SYMBOL_ANNOTATION: [!&~$ ] ; // //THREAD_ARROW: '<-' ; //DIVERT_ARROW: '->' ; //TUNNEL_ARROW: '->->' ; // //// TODO: add all extra character ranges from InkParser_CharacterRanges (LatinBasic, LatinExtendedA, ...), and also remove them from CONTENT_TEXT_NO_ESCAPE_SIMPLE //IDENTIFIER: [a-zA-Z0-9_]+;