diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2022-02-08 11:54:37 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-08 11:54:37 +0000 |
commit | 69e10976b2e7682c6d57f4272932ebc19f8e8859 (patch) | |
tree | 58a1716aab40f00e50fcd6e8abea7ae294e960c0 /Parser | |
parent | bpo-46675: Allow object value arrays and split key dictionaries larger than 1... (diff) | |
download | cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.tar.gz cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.tar.bz2 cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.zip |
bpo-46521: Fix codeop to use a new partial-input mode of the parser (GH-31010)
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/pegen.c | 15 | ||||
-rw-r--r-- | Parser/pegen.h | 1 | ||||
-rw-r--r-- | Parser/tokenizer.c | 26 |
3 files changed, 31 insertions, 11 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c index 470c2cbd743..6adde843230 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -726,6 +726,9 @@ compute_parser_flags(PyCompilerFlags *flags) if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) { parser_flags |= PyPARSE_ASYNC_HACKS; } + if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) { + parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT; + } return parser_flags; } @@ -811,16 +814,26 @@ reset_parser_state_for_error_pass(Parser *p) p->tok->interactive_underflow = IUNDERFLOW_STOP; } +static inline int +_is_end_of_source(Parser *p) { + int err = p->tok->done; + return err == E_EOF || err == E_EOFS || err == E_EOLS; +} + void * _PyPegen_run_parser(Parser *p) { void *res = _PyPegen_parse(p); assert(p->level == 0); if (res == NULL) { + if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) { + PyErr_Clear(); + return RAISE_SYNTAX_ERROR("incomplete input"); + } if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { return NULL; } - // Make a second parser pass. In this pass we activate heavier and slower checks + // Make a second parser pass. In this pass we activate heavier and slower checks // to produce better error messages and more complete diagnostics. Extra "invalid_*" // rules will be active during parsing. Token *last_token = p->tokens[p->fill - 1]; diff --git a/Parser/pegen.h b/Parser/pegen.h index caba34e535b..061ca3a2013 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -22,6 +22,7 @@ #define PyPARSE_BARRY_AS_BDFL 0x0020 #define PyPARSE_TYPE_COMMENTS 0x0040 #define PyPARSE_ASYNC_HACKS 0x0080 +#define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100 #define CURRENT_POS (-5) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5b5cbdb809e..d38df66c69e 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -40,7 +40,7 @@ static struct tok_state *tok_new(void); static int tok_nextc(struct tok_state *tok); static void tok_backup(struct tok_state *tok, int c); - +static int syntaxerror(struct tok_state *tok, const char *format, ...); /* Spaces in this constant are treated as "zero or more spaces or tabs" when tokenizing. */ @@ -1031,8 +1031,9 @@ tok_nextc(struct tok_state *tok) if (tok->cur != tok->inp) { return Py_CHARMASK(*tok->cur++); /* Fast path */ } - if (tok->done != E_OK) - return EOF; + if (tok->done != E_OK) { + return EOF; + } if (tok->fp == NULL) { rc = tok_underflow_string(tok); } @@ -1964,16 +1965,21 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) tok->line_start = tok->multi_line_start; int start = tok->lineno; tok->lineno = tok->first_lineno; - if (quote_size == 3) { - return syntaxerror(tok, - "unterminated triple-quoted string literal" - " (detected at line %d)", start); + syntaxerror(tok, "unterminated triple-quoted string literal" + " (detected at line %d)", start); + if (c != '\n') { + tok->done = E_EOFS; + } + return ERRORTOKEN; } else { - return syntaxerror(tok, - "unterminated string literal (detected at" - " line %d)", start); + syntaxerror(tok, "unterminated string literal (detected at" + " line %d)", start); + if (c != '\n') { + tok->done = E_EOLS; + } + return ERRORTOKEN; } } if (c == quote) { |