From 00b76aaaee6e96f541211b974962c041208361b4 Mon Sep 17 00:00:00 2001
From: eric <eric@gupi.fr>
Date: Thu, 5 Sep 2024 09:45:58 +0200
Subject: [PATCH] fix issue-109 (use unique temp files for input/output of
 ExtractArticle.js), increase verbosity of tests (tests for above don't pass
 due to some some extra \n)

---
 readabilipy/simple_json.py | 16 ++++++++++------
 tests/checks.py            |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/readabilipy/simple_json.py b/readabilipy/simple_json.py
index 83e83e9..b462097 100644
--- a/readabilipy/simple_json.py
+++ b/readabilipy/simple_json.py
@@ -44,13 +44,17 @@ def simple_json_from_html_string(html, content_digests=False, node_indexes=False
 
     if use_readability:
         # Write input HTML to temporary file so it is available to the node.js script
-        with tempfile.NamedTemporaryFile(delete=False, mode="w+", encoding="utf-8") as f_html:
+        # It is important that this file be unique in case this function is called concurrently
+        with tempfile.NamedTemporaryFile(delete=False, mode="w+", encoding="utf-8", prefix="readabilipy") as f_html:
             f_html.write(html)
             f_html.close()
         html_path = f_html.name
 
+        # Derive some output name
+        # (making the assumption this will be unique too)
+        json_path = html_path + ".json"
+
         # Call Mozilla's Readability.js Readability.parse() function via node, writing output to a temporary file
-        article_json_path = f_html.name + ".json"
         jsdir = os.path.join(os.path.dirname(__file__), 'javascript')
         try:
             result = subprocess.run(
@@ -64,12 +68,12 @@ def simple_json_from_html_string(html, content_digests=False, node_indexes=False
             print(e.stderr)
             raise
 
-        # Read output of call to Readability.parse() from JSON file and return as Python dictionary
-        with open(article_json_path, "r", encoding="utf-8") as json_file:
+        # Read output of call to Readability.parse() from JSON file as Python dictionary
+        with open(json_path, "r", encoding="utf-8") as json_file:
             input_json = json.load(json_file)
 
-        # Deleting files after processing
-        os.unlink(article_json_path)
+        # Delete temporary input and output files after processing
+        os.unlink(json_path)
         os.unlink(f_html.name)
     else:
         input_json = {
diff --git a/tests/checks.py b/tests/checks.py
index aec404d..a70bd7d 100644
--- a/tests/checks.py
+++ b/tests/checks.py
@@ -51,7 +51,7 @@ def check_extract_article(test_filename, expected_filename, content_digests=Fals
         expected_article_json = json.loads(h.read())
 
     # Test full JSON matches (checks for unexpected fields in either actual or expected JSON)
-    assert article_json == expected_article_json
+    assert article_json == expected_article_json, f"{article_json=} != {expected_article_json=}"
 
 
 def check_extract_paragraphs_as_plain_text(test_filename, expected_filename):