"""Tests for the cm_bot scraper resilience helpers. The CM_BOT class currently uses bare `soup.find(...)['value']` calls that throw cryptic TypeErrors when cm99.net returns an unexpected response. R3 introduces three pieces: - ScraperError: typed exception so callers can distinguish scraper failures from network errors. - _dump_html(context, content): writes the failing response to logs/scraper-failures/-.html and returns the path. - _find_input_value(soup, ident, *, context, raw, by='name'): the dominant extraction pattern. Returns the value on success, dumps + raises ScraperError on miss. These tests do NOT exercise the live cm99.net integration. They use small inline HTML fixtures and patch filesystem side effects so the tests stay hermetic. """ import os import shutil import tempfile import unittest from unittest import mock from bs4 import BeautifulSoup from app.cm_bot import CM_BOT, ScraperError class ScraperHelpersTests(unittest.TestCase): def setUp(self): # CM_BOT.__init__ reads CM_BOT_BASE_URL from the env. Patch it # in setUp (mock.patch.dict as a class decorator only wraps # test_* methods, so setUp would see an unpatched env). self._env_patcher = mock.patch.dict( os.environ, {"CM_BOT_BASE_URL": "https://example.invalid"} ) self._env_patcher.start() self.addCleanup(self._env_patcher.stop) # Each test gets a fresh tmpdir so the dump helper writes # somewhere predictable. We chdir into it for the duration of # the test because _dump_html writes to a relative # logs/scraper-failures path. self._old_cwd = os.getcwd() self._tmp = tempfile.mkdtemp(prefix="r3-test-") os.chdir(self._tmp) self.bot = CM_BOT() def tearDown(self): os.chdir(self._old_cwd) shutil.rmtree(self._tmp, ignore_errors=True) # ---- _dump_html ---- def test_dump_html_creates_dir_and_writes_bytes(self): path = self.bot._dump_html("ctx-test", b"hi") self.assertTrue(os.path.isfile(path), f"file should exist: {path}") with open(path, "rb") as f: self.assertEqual(f.read(), b"hi") self.assertTrue(path.startswith(os.path.join("logs", "scraper-failures"))) def test_dump_html_accepts_str_content(self): path = self.bot._dump_html("ctx-test", "hi") with open(path, "rb") as f: self.assertEqual(f.read(), b"hi") def test_dump_html_includes_context_and_timestamp_in_filename(self): path = self.bot._dump_html("register_form_token", b"x") basename = os.path.basename(path) self.assertTrue(basename.startswith("register_form_token-"), basename) self.assertTrue(basename.endswith(".html"), basename) # ---- _find_input_value ---- def test_find_input_value_returns_value_when_present(self): html = '
' soup = BeautifulSoup(html, "html.parser") result = self.bot._find_input_value( soup, "token", context="happy_path", raw=html.encode() ) self.assertEqual(result, "abc123") def test_find_input_value_raises_and_dumps_when_missing(self): html = '
' soup = BeautifulSoup(html, "html.parser") with self.assertRaises(ScraperError) as cm: self.bot._find_input_value( soup, "token", context="missing_input", raw=html.encode() ) msg = str(cm.exception) self.assertIn("missing_input", msg) self.assertIn("token", msg) dumped = os.listdir(os.path.join("logs", "scraper-failures")) self.assertEqual(len(dumped), 1, f"expected one dump, got {dumped}") self.assertTrue(dumped[0].startswith("missing_input-")) def test_find_input_value_raises_when_input_has_no_value_attr(self): html = '
' soup = BeautifulSoup(html, "html.parser") with self.assertRaises(ScraperError): self.bot._find_input_value( soup, "token", context="no_value_attr", raw=html.encode() ) def test_find_input_value_does_not_dump_on_success(self): html = '
' soup = BeautifulSoup(html, "html.parser") self.bot._find_input_value( soup, "token", context="should_not_dump", raw=html.encode() ) self.assertFalse( os.path.isdir(os.path.join("logs", "scraper-failures")), "happy path should not create the failure dir", ) def test_find_input_value_supports_by_id(self): html = '
' soup = BeautifulSoup(html, "html.parser") result = self.bot._find_input_value( soup, "toUserId", context="by_id", raw=html.encode(), by="id", ) self.assertEqual(result, "42") if __name__ == "__main__": unittest.main()