feat: finally able to overcome the base of the base of this f*cked up mess
This commit is contained in:
@@ -1,47 +1,52 @@
|
||||
lexer grammar InkBlotAntlrLexer;
|
||||
|
||||
// classic "white space" and "new line" - ink's new line also allows for some whitespace at start
|
||||
WS: [ \t]+ ;
|
||||
NL: WS? '\r'? '\n' ;
|
||||
|
||||
// one or more (potential whitespace followed by) newline(s); used e.g. for block sequencing
|
||||
MULTILINE_WS: NL+ ;
|
||||
|
||||
// see InkParser_Content.cs, ContentTextNoEscape and ContentTextAllowingEcapeChar for the escape case
|
||||
// this works for the base case where we're not parsing a string, nor a choice
|
||||
CONTENT_TEXT_NO_ESCAPE_SIMPLE:
|
||||
(
|
||||
// any character is valid, except for:
|
||||
// - {} ==> identifies embedded logic
|
||||
// - | ==> text alternatives, is forbidden even in non-logic text for some reason
|
||||
// - \n\r ==> a new line of content
|
||||
// - # ==> a tag
|
||||
// - \, < and - with exceptions (see below)
|
||||
~[{}|\n\r\\#-< ]
|
||||
// any character can be escaped
|
||||
| '\\' [\u0000-\uFFFF] // TODO: is there a better way to say "any character"?
|
||||
// accept a - only if not followed by a > (->, a divert)
|
||||
| '-' { InputStream.LA(1) != '>' }?
|
||||
// same for threads (<-) and glue (<>)
|
||||
// | '<' { InputStream.LA(1) != '-' && InputStream.LA(1) != '>' }?
|
||||
)+ ;
|
||||
|
||||
INLINE_LOGIC_START: '{' ;
|
||||
INLINE_LOGIC_END: '}' ;
|
||||
|
||||
// All symbols for sequencing: either using the short-hand symbols (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#types-of-alternatives)
|
||||
// or using the multiline blocks (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#multiline-blocks)
|
||||
SEQUENCE_TYPE_SYMBOL_ANNOTATION: [!&~$ ] ;
|
||||
ONCE: 'once' WS? ':' ;
|
||||
CYCLE: 'cycle' WS? ':' ;
|
||||
SHUFFLE: 'shuffle' WS? ':' ;
|
||||
STOPPING: 'stopping' WS? ':' ;
|
||||
SHUFFLE_ONCE: 'shuffle' WS 'once' WS? ':' ;
|
||||
SHUFFLE_STOPPING: 'shuffle' WS 'stopping' WS? ':' ;
|
||||
|
||||
THREAD_ARROW: '<-' ;
|
||||
DIVERT_ARROW: '->' ;
|
||||
TUNNEL_ARROW: '->->' ;
|
||||
|
||||
// TODO: add all extra character ranges from InkParser_CharacterRanges (LatinBasic, LatinExtendedA, ...)
|
||||
IDENTIFIER: [A-Za-z0-9_]+;
|
||||
IDENTIFIER: [a-zA-Z0-9_]+;
|
||||
|
||||
//// classic "white space" and "new line" - ink's new line also allows for some whitespace at start
|
||||
WS: [ \t]+ ;
|
||||
NL: WS? '\r'? '\n' ;
|
||||
//
|
||||
//// one or more (potential whitespace followed by) newline(s); used e.g. for block sequencing
|
||||
//MULTILINE_WS: NL+ ;
|
||||
//
|
||||
//// see InkParser_Content.cs, ContentTextNoEscape and ContentTextAllowingEcapeChar for the escape case
|
||||
//// this works for the base case where we're not parsing a string, nor a choice
|
||||
//// We ALSO have to remove all other tokens from here, otherwise this will gobble them all up, since it will become
|
||||
//// the longest-matching token
|
||||
//CONTENT_TEXT_NO_ESCAPE_NO_IDENT_SIMPLE:
|
||||
// (
|
||||
// // any character is valid, except for:
|
||||
// // - {} ==> identifies embedded logic
|
||||
// // - | ==> text alternatives, is forbidden even in non-logic text for some reason
|
||||
// // - \n\r ==> a new line of content
|
||||
// // - # ==> a tag
|
||||
// // - \, < and - with exceptions (see below)
|
||||
// // - space and \t ==> these are used to parse spaces
|
||||
// // a-z, A-Z, 0-9 and _ ==> these are used to parse identifiers TODO: add missing characters
|
||||
// // !&$ ==> these are used by as sequence type symbol annotation; "~" too but that is special
|
||||
// ~[{}|\n\r\\#\-< \ta-zA-Z0-9_!&$]
|
||||
// // any character can be escaped
|
||||
// | '\\' [\u0000-\uFFFF] // TODO: is there a better way to say "any character"?
|
||||
// // accept a - only if not followed by a > (->, a divert)
|
||||
// | '-' { InputStream.LA(1) != '>' }?
|
||||
// // same for threads (<-) and glue (<>)
|
||||
// | '<' { InputStream.LA(1) != '-' && InputStream.LA(1) != '>' }?
|
||||
// )+ ;
|
||||
//
|
||||
//INLINE_LOGIC_START: '{' ;
|
||||
//INLINE_LOGIC_END: '}' ;
|
||||
//
|
||||
//// All symbols for sequencing: either using the short-hand symbols (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#types-of-alternatives)
|
||||
//// or using the multiline blocks (https://github.com/inkle/ink/blob/master/Documentation/WritingWithInk.md#multiline-blocks)
|
||||
//SEQUENCE_TYPE_SYMBOL_ANNOTATION: [!&~$ ] ;
|
||||
//
|
||||
//THREAD_ARROW: '<-' ;
|
||||
//DIVERT_ARROW: '->' ;
|
||||
//TUNNEL_ARROW: '->->' ;
|
||||
//
|
||||
//// TODO: add all extra character ranges from InkParser_CharacterRanges (LatinBasic, LatinExtendedA, ...), and also remove them from CONTENT_TEXT_NO_ESCAPE_SIMPLE
|
||||
//IDENTIFIER: [a-zA-Z0-9_]+;
|
||||
Reference in New Issue
Block a user