lexer grammar InkBlotAntlrLexer; Whitespace: [ \t]+ ; // see InkParser_Content.cs, ContentTextNoEscape and ContentTextAllowingEcapeChar for the escape case // this works for the base case where we're not parsing a string, nor a choice CONTENT_TEXT_NO_ESCAPE_SIMPLE: ( // any character is valid, except for: // - {} ==> identifies embedded logic // - | ==> text alternatives, is forbidden even in non-logic text for some reason // - \n\r ==> a new line of content // - # ==> a tag // - \, < and - with exceptions (see below) ~[{}|\n\r\\#-<] // any character can be escaped | '\\' [\u0000-\uFFFF] // TODO: is there a better way to say "any character"? // accept a - only if not followed by a > (->, a divert) | '-' { InputStream.LA(1) != '>' }? // same for threads (<-) and glue (<>) | '<' { InputStream.LA(1) != '-' && InputStream.LA(1) != '>' }? )+ ;