From 3e1c9a0efcfde0662c9d0cd043a4eace213cb578 Mon Sep 17 00:00:00 2001 From: C5H12O5 Date: Sat, 12 Aug 2023 12:44:32 +0800 Subject: [PATCH] Parse json string in non-strict mode --- scraper/scraper.py | 4 ++-- scraper/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scraper/scraper.py b/scraper/scraper.py index cf9f19e..1f126a4 100644 --- a/scraper/scraper.py +++ b/scraper/scraper.py @@ -83,8 +83,8 @@ def _start(flow: "ScrapeFlow", limit: int): _results.append(next(result_gen)) except StopIteration: break - except ScrapeError as e: - _logger.error("Error while scraping: %s", e) + except ScrapeError: + _logger.error("Failed to scrape from %s", flow.site, exc_info=True) class ScrapeFlow: diff --git a/scraper/utils.py b/scraper/utils.py index 76ce309..d4415c2 100644 --- a/scraper/utils.py +++ b/scraper/utils.py @@ -33,7 +33,7 @@ def str_to_etree(string: str) -> Optional[ElementTree.Element]: """Convert a string to an ElementTree.""" string = string.strip() if string.startswith("{") or string.startswith("["): - return json_to_etree(json.loads(string)) + return json_to_etree(json.loads(string, strict=False)) elif string.startswith("<"): return html_to_etree(string) return None