fix(gateway): keep code blocks verbatim in cleaned text when media present

Self-review of the code-block masking fix: the cleanup path ran media_pattern.sub('') over the _mask_protected_spans() copy of the text and assigned that back to 'cleaned', so whenever a real MEDIA: tag was delivered (if media: branch), every fenced code block / inline code / blockquote in the reply was blanked to whitespace in the user-visible text. Now mask only a length-equal copy of 'cleaned' to locate the real tag spans, then delete those spans from the unmasked 'cleaned' — masking is a locator, not a text rewrite. Protected spans survive verbatim. Strengthens the existing mixed-code test (it only asserted 'Done.' survived, not the code block) and adds an inline-code-survives regression test. Both fail on the old sub-based code and pass now.
2026-07-26 17:38:36 +00:00 · 2026-06-01 12:10:27 +05:30 · 2026-06-01 12:10:27 +05:30 · 6c73e8ffaa
commit 6c73e8ffaa
parent ec6261ae2f
1 changed files with 15 additions and 1 deletions
--- a/tests/gateway/test_platform_base.py
+++ b/tests/gateway/test_platform_base.py
@ -431,7 +431,8 @@ class TestExtractMedia:
        assert media[0][0] == "/real/file.png"

    def test_media_mixed_code_and_prose(self):
-        """Real MEDIA: in prose + example in code block: only prose extracted."""
+        """Real MEDIA: in prose + example in code block: only prose extracted,
+        and the code block survives verbatim in the delivered text."""
        content = (
            "Here is your file:\n"
            "MEDIA:/output/report.pdf\n"
@ -443,6 +444,19 @@ class TestExtractMedia:
        assert len(media) == 1
        assert media[0][0] == "/output/report.pdf"
        assert "Done." in cleaned
+        # The real tag is stripped from the delivered text...
+        assert "MEDIA:/output/report.pdf" not in cleaned
+        # ...but the fenced code block (incl. its example MEDIA: line) must
+        # survive verbatim — masking is a locator, not a text rewrite.
+        assert "```text\nMEDIA:/example/path.pdf\n```" in cleaned
+
+    def test_inline_code_survives_when_real_media_present(self):
+        """When a real MEDIA: tag is delivered, an inline-code example in the
+        same reply must not be blanked to whitespace."""
+        content = "See MEDIA:/r/a.png and `MEDIA:/ex/b.png` inline"
+        media, cleaned = BasePlatformAdapter.extract_media(content)
+        assert [p for p, _ in media] == ["/r/a.png"]
+        assert "`MEDIA:/ex/b.png`" in cleaned


 class TestMediaInsideSerializedJson: