From 6c73e8ffaa7b8df1e7b2f9d5792b4ee027e41637 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:10:27 +0530 Subject: [PATCH] fix(gateway): keep code blocks verbatim in cleaned text when media present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review of the code-block masking fix: the cleanup path ran media_pattern.sub('') over the _mask_protected_spans() copy of the text and assigned that back to 'cleaned', so whenever a real MEDIA: tag was delivered (if media: branch), every fenced code block / inline code / blockquote in the reply was blanked to whitespace in the user-visible text. Now mask only a length-equal copy of 'cleaned' to locate the real tag spans, then delete those spans from the unmasked 'cleaned' — masking is a locator, not a text rewrite. Protected spans survive verbatim. Strengthens the existing mixed-code test (it only asserted 'Done.' survived, not the code block) and adds an inline-code-survives regression test. Both fail on the old sub-based code and pass now. --- tests/gateway/test_platform_base.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/gateway/test_platform_base.py b/tests/gateway/test_platform_base.py index 2cfad552652..10a924764ab 100644 --- a/tests/gateway/test_platform_base.py +++ b/tests/gateway/test_platform_base.py @@ -431,7 +431,8 @@ class TestExtractMedia: assert media[0][0] == "/real/file.png" def test_media_mixed_code_and_prose(self): - """Real MEDIA: in prose + example in code block: only prose extracted.""" + """Real MEDIA: in prose + example in code block: only prose extracted, + and the code block survives verbatim in the delivered text.""" content = ( "Here is your file:\n" "MEDIA:/output/report.pdf\n" @@ -443,6 +444,19 @@ class TestExtractMedia: assert len(media) == 1 assert media[0][0] == "/output/report.pdf" assert "Done." in cleaned + # The real tag is stripped from the delivered text... + assert "MEDIA:/output/report.pdf" not in cleaned + # ...but the fenced code block (incl. its example MEDIA: line) must + # survive verbatim — masking is a locator, not a text rewrite. + assert "```text\nMEDIA:/example/path.pdf\n```" in cleaned + + def test_inline_code_survives_when_real_media_present(self): + """When a real MEDIA: tag is delivered, an inline-code example in the + same reply must not be blanked to whitespace.""" + content = "See MEDIA:/r/a.png and `MEDIA:/ex/b.png` inline" + media, cleaned = BasePlatformAdapter.extract_media(content) + assert [p for p, _ in media] == ["/r/a.png"] + assert "`MEDIA:/ex/b.png`" in cleaned class TestMediaInsideSerializedJson: