From fb975d478611be3c71341de7ec6b43f44aca689d Mon Sep 17 00:00:00 2001 From: SAY-5 Date: Mon, 11 May 2026 22:16:04 -0700 Subject: [PATCH 1/2] fix: don't count '=' inside string literals when parsing parameters PythonTranslator.inspect treated any line with more than one '=' as unparseable, which incorrectly rejected parameter lines like s = "a=b" where the second '=' is inside a string literal. Count only top-level assignment operators using tokenize so the existing PARAMETER_PATTERN regex (which already handles '=' in values correctly) is given a chance to match. Closes #864 --- papermill/tests/test_translators.py | 3 +++ papermill/translators.py | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/papermill/tests/test_translators.py b/papermill/tests/test_translators.py index 0edc1f07..a14f5099 100644 --- a/papermill/tests/test_translators.py +++ b/papermill/tests/test_translators.py @@ -107,6 +107,9 @@ def test_translate_comment_python(test_input, expected): Parameter("b", "float", "-2.3432", "My b variable"), ], ), + # Regression test for #864: '=' inside string literals shouldn't trip parsing. + ('s = "a=b"', [Parameter("s", "None", '"a=b"', "")]), + ("s = 'a=b'", [Parameter("s", "None", "'a=b'", "")]), ], ) def test_inspect_python(test_input, expected): diff --git a/papermill/translators.py b/papermill/translators.py index 1cb43d89..15ae76f2 100644 --- a/papermill/translators.py +++ b/papermill/translators.py @@ -1,7 +1,9 @@ +import io import logging import math import re import shlex +import tokenize from .exceptions import PapermillException from .models import Parameter @@ -9,6 +11,23 @@ logger = logging.getLogger(__name__) +def _count_assignment_operators(line): + """Count top-level assignment operators in a Python source line. + + Uses ``tokenize`` so that ``=`` characters appearing inside string + literals (e.g. ``s = "a=b"``) are not counted as assignment + operators. Falls back to a naive ``line.count('=')`` if tokenization + fails (e.g. for incomplete multiline definitions). + """ + try: + tokens = tokenize.tokenize(io.BytesIO(line.encode("utf-8")).readline) + return sum( + 1 for tok in tokens if tok.type == tokenize.OP and tok.string == "=" + ) + except (tokenize.TokenError, SyntaxError): + return line.count("=") + + class PapermillTranslators: ''' The holder which houses any translator registered with the system. @@ -242,7 +261,7 @@ def flatten_accumulator(accumulator): if len(line.strip()) == 0 or line.strip().startswith('#'): continue # Skip blank and comment - nequal = line.count("=") + nequal = _count_assignment_operators(line) if nequal > 0: grouped_variable.append(flatten_accumulator(accumulator)) accumulator = [] From 58dd73b22ac7a0a8dae8fcedc6cc3649fac7b4c5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 May 2026 05:16:31 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- papermill/translators.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/papermill/translators.py b/papermill/translators.py index 15ae76f2..6c2466a6 100644 --- a/papermill/translators.py +++ b/papermill/translators.py @@ -21,9 +21,7 @@ def _count_assignment_operators(line): """ try: tokens = tokenize.tokenize(io.BytesIO(line.encode("utf-8")).readline) - return sum( - 1 for tok in tokens if tok.type == tokenize.OP and tok.string == "=" - ) + return sum(1 for tok in tokens if tok.type == tokenize.OP and tok.string == "=") except (tokenize.TokenError, SyntaxError): return line.count("=")