From b1b28950cef194889f52b318f43bf9bc33620658 Mon Sep 17 00:00:00 2001 From: palewire Date: Sun, 27 Aug 2023 08:48:29 -0400 Subject: [PATCH] Lets try to consolidate the robots files --- newshomepages/extract/consolidate.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/newshomepages/extract/consolidate.py b/newshomepages/extract/consolidate.py index d9158deb400..e79e0f8b432 100644 --- a/newshomepages/extract/consolidate.py +++ b/newshomepages/extract/consolidate.py @@ -93,7 +93,10 @@ def consolidate( for p in item_data["files"] if ( handle.lower() in p["name"].lower() - and p["format"] in ["JSON", "JPEG", "HTML"] + and ( + p["format"] in ["JSON", "JPEG", "HTML"] + or p["name"].lower().endswith("robots.txt") + ) ) ] @@ -121,6 +124,7 @@ def consolidate( lighthouse_list = [] wayback_list = [] html_list = [] + robotstxt_list = [] print("🪆 Extracting files") for f in track(file_list): name = f["file_name"] @@ -140,6 +144,8 @@ def consolidate( wayback_list.append(f) elif name.endswith(".html"): html_list.append(f) + elif name.endswith("robots.txt"): + robotstxt_list.append(f) else: raise ValueError(f"File name {name} doesn't have an output file") @@ -150,6 +156,7 @@ def consolidate( utils.write_csv(lighthouse_list, output_path / "lighthouse-files.csv") utils.write_csv(wayback_list, output_path / "wayback-files.csv") utils.write_csv(html_list, output_path / "html-files.csv") + utils.write_csv(robotstxt_list, output_path / "robotstxt-files.csv") # Delete the zip file zip_path = output_path / "latest.zip"