feat: parse content text (base case).

2025-02-16 18:26:28 +01:00
parent b7aae9a04f
commit d386c50499
32 changed files with 940 additions and 6 deletions
--- a/InkBlot/InkBlotAntlrLexer.g4
+++ b/InkBlot/InkBlotAntlrLexer.g4
@@ -0,0 +1,22 @@
+lexer grammar InkBlotAntlrLexer;
+
+Whitespace: [ \t]+ ;
+
+// see InkParser_Content.cs, ContentTextNoEscape and ContentTextAllowingEcapeChar for the escape case
+// this works for the base case where we're not parsing a string, nor a choice 
+CONTENT_TEXT_NO_ESCAPE_SIMPLE:
+    (
+      // any character is valid, except for:
+      // - {}   ==> identifies embedded logic
+      // - |    ==> text alternatives, is forbidden even in non-logic text for some reason
+      // - \n\r ==> a new line of content
+      // - #    ==> a tag
+      // - \, < and - with exceptions (see below)
+      ~[{}|\n\r\\#-<]
+      // any character can be escaped
+    | '\\' [\u0000-\uFFFF] // TODO: is there a better way to say "any character"?
+      // accept a - only if not followed by a > (->, a divert)
+    | '-' { InputStream.LA(1) != '>' }?
+      // same for threads (<-) and glue (<>)
+    | '<' { InputStream.LA(1) != '-' && InputStream.LA(1) != '>' }?
+    )+ ;