bpo-46521: Fix codeop to use a new partial-input mode of the parser (GH-31010)

author: Pablo Galindo Salgado <Pablogsal@gmail.com> 2022-02-08 11:54:37 +0000
committer: GitHub <noreply@github.com> 2022-02-08 11:54:37 +0000
commit: 69e10976b2e7682c6d57f4272932ebc19f8e8859 (patch)
tree: 58a1716aab40f00e50fcd6e8abea7ae294e960c0 /Parser
parent: bpo-46675: Allow object value arrays and split key dictionaries larger than 1... (diff)
download: cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.tar.gz
cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.tar.bz2
cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.zip
3 files changed, 31 insertions, 11 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 470c2cbd743..6adde843230 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -726,6 +726,9 @@ compute_parser_flags(PyCompilerFlags *flags)
     if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
         parser_flags |= PyPARSE_ASYNC_HACKS;
     }
+    if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
+        parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
+    }
     return parser_flags;
 }
 
@@ -811,16 +814,26 @@ reset_parser_state_for_error_pass(Parser *p)
     p->tok->interactive_underflow = IUNDERFLOW_STOP;
 }
 
+static inline int
+_is_end_of_source(Parser *p) {
+    int err = p->tok->done;
+    return err == E_EOF || err == E_EOFS || err == E_EOLS;
+}
+
 void *
 _PyPegen_run_parser(Parser *p)
 {
     void *res = _PyPegen_parse(p);
     assert(p->level == 0);
     if (res == NULL) {
+        if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) &&  _is_end_of_source(p)) {
+            PyErr_Clear();
+            return RAISE_SYNTAX_ERROR("incomplete input");
+        }
         if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
             return NULL;
         }
-        // Make a second parser pass. In this pass we activate heavier and slower checks
+       // Make a second parser pass. In this pass we activate heavier and slower checks
         // to produce better error messages and more complete diagnostics. Extra "invalid_*"
         // rules will be active during parsing.
         Token *last_token = p->tokens[p->fill - 1];
diff --git a/Parser/pegen.h b/Parser/pegen.h
index caba34e535b..061ca3a2013 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -22,6 +22,7 @@
 #define PyPARSE_BARRY_AS_BDFL 0x0020
 #define PyPARSE_TYPE_COMMENTS 0x0040
 #define PyPARSE_ASYNC_HACKS   0x0080
+#define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100
 
 #define CURRENT_POS (-5)
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5b5cbdb809e..d38df66c69e 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -40,7 +40,7 @@
 static struct tok_state *tok_new(void);
 static int tok_nextc(struct tok_state *tok);
 static void tok_backup(struct tok_state *tok, int c);
-
+static int syntaxerror(struct tok_state *tok, const char *format, ...);
 
 /* Spaces in this constant are treated as "zero or more spaces or tabs" when
    tokenizing. */
@@ -1031,8 +1031,9 @@ tok_nextc(struct tok_state *tok)
         if (tok->cur != tok->inp) {
             return Py_CHARMASK(*tok->cur++); /* Fast path */
         }
-        if (tok->done != E_OK)
-            return EOF;
+        if (tok->done != E_OK) {
+           return EOF;
+        }
         if (tok->fp == NULL) {
             rc = tok_underflow_string(tok);
         }
@@ -1964,16 +1965,21 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
                 tok->line_start = tok->multi_line_start;
                 int start = tok->lineno;
                 tok->lineno = tok->first_lineno;
-
                 if (quote_size == 3) {
-                    return syntaxerror(tok,
-                                       "unterminated triple-quoted string literal"
-                                       " (detected at line %d)", start);
+                    syntaxerror(tok, "unterminated triple-quoted string literal"
+                                     " (detected at line %d)", start);
+                    if (c != '\n') {
+                        tok->done = E_EOFS;
+                    }
+                    return ERRORTOKEN;
                 }
                 else {
-                    return syntaxerror(tok,
-                                       "unterminated string literal (detected at"
-                                       " line %d)", start);
+                    syntaxerror(tok, "unterminated string literal (detected at"
+                                     " line %d)", start);
+                    if (c != '\n') {
+                        tok->done = E_EOLS;
+                    }
+                    return ERRORTOKEN;
                 }
             }
             if (c == quote) {
author	Pablo Galindo Salgado <Pablogsal@gmail.com>	2022-02-08 11:54:37 +0000
committer	GitHub <noreply@github.com>	2022-02-08 11:54:37 +0000
commit	69e10976b2e7682c6d57f4272932ebc19f8e8859 (patch)
tree	58a1716aab40f00e50fcd6e8abea7ae294e960c0 /Parser
parent	bpo-46675: Allow object value arrays and split key dictionaries larger than 1... (diff)
download	cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.tar.gz cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.tar.bz2 cpython-69e10976b2e7682c6d57f4272932ebc19f8e8859.zip