Skip to content

Commit

Permalink
Merge pull request #36 from mlibrary/in-s3-bucket-report
Browse files Browse the repository at this point in the history
create report of barcodes in the s3 bucket
  • Loading branch information
niquerio authored Jan 16, 2025
2 parents c2d0404 + cc3f2c6 commit 791e5c4
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 1 deletion.
10 changes: 9 additions & 1 deletion .config/rclone/rclone.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,12 @@ host = YOUR_HOST
user = YOUR_USERNAME
port = YOUR_PORT
pass = YOUR_ENCRYPTED_HASH
shell_type = cmd
shell_type = cmd

[digifeeds_reports]
type = alias
remote = digifeeds_dropbox:YOUR_REPORTS_FOLDER

[digifeeds_dropbox]
type = dropbox
token = {"access_token":"YOUR_ACCESS_TOKEN","token_type":"bearer","expiry":"0001-01-01T00:00:00Z"}
10 changes: 10 additions & 0 deletions aim/cli/digifeeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,13 @@ def process_barcodes(
for barcode in barcodes:
item = get_item(barcode)
process_item(item)


@app.command()
def generate_barcodes_in_s3_report():
"""
Generates a report of barcodes that have been moved to the google pickup
location in the last two weeks. It is based on the files in the processed
location in the s3 bucket. This report is sent to a dropbox folder.
"""
functions.generate_barcodes_added_in_last_two_weeks_report()
53 changes: 53 additions & 0 deletions aim/digifeeds/functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from aim.services import S
import boto3
from pathlib import Path
from rclone_python import rclone
from datetime import datetime, timedelta
import csv
import tempfile


def list_barcodes_in_input_bucket():
Expand All @@ -13,3 +17,52 @@ def list_barcodes_in_input_bucket():
response = s3.list_objects_v2(Bucket=S.digifeeds_s3_bucket, Prefix=prefix)
barcodes = [Path(object["Key"]).stem for object in response["Contents"]]
return barcodes


def last_two_weeks_rclone_filter(start_date: datetime = datetime.today()):
day_count = 14
dates = []
for single_date in (start_date - timedelta(n) for n in range(day_count)):
formatted_date = single_date.strftime("%Y-%m-%d")
dates.append(f"{formatted_date}*")
joined = ",".join(dates)
return f"{{{joined}}}"


def barcodes_added_in_last_two_weeks():
files = rclone.ls(
path=f"{S.digifeeds_s3_bucket}:{S.digifeeds_s3_processed_path}",
args=[f'--include "{last_two_weeks_rclone_filter()}"'],
)
output = []
for file in files:
barcode = file["Name"].split("_")[2].split(".")[0]
date = file["Name"].split("_")[0]
S.logger.info(
"added_to_barcode_report",
barcode=barcode,
message="Added to barcode report",
)
output.append([date, barcode])

return output


def write_barcodes_added_in_last_two_weeks_report(outfile):
output = barcodes_added_in_last_two_weeks()
writer = csv.writer(outfile, delimiter="\t", lineterminator="\n")
S.logger.info("writing_report_rows_to_file")
writer.writerows(output)


def generate_barcodes_added_in_last_two_weeks_report():
report_file = tempfile.NamedTemporaryFile()
with open(report_file.name, "w") as rf:
write_barcodes_added_in_last_two_weeks_report(rf)

today = datetime.today().strftime("%Y-%m-%d")
S.logger.info("writing report to dropbox")
rclone.copyto(
in_path=report_file.name,
out_path=f"{S.digifeeds_reports_rclone_remote}:{today}_barcodes_in_s3_processed.tsv",
)
5 changes: 5 additions & 0 deletions aim/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ class Services:
#: The name of the rclone remote for the place where google pickups up the digifeeds files
digifeeds_pickup_rclone_remote: str

#: The name of the rclone remote where reports from digifeeds are sent
digifeeds_reports_rclone_remote: str


S = Services(
logger=structlog.get_logger(),
Expand Down Expand Up @@ -116,4 +119,6 @@ class Services:
or "digifeeds_bucket",
digifeeds_pickup_rclone_remote=os.getenv("DIGIFEEDS_PICKUP_RCLONE_REMOTE")
or "digifeeds_pickup",
digifeeds_reports_rclone_remote=os.getenv("DIGIFEEDS_REPORTS_RCLONE_REMOTE")
or "digifeeds_reports",
)
12 changes: 12 additions & 0 deletions tests/cli/test_digifeeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,15 @@ def test_process_barcodes(mocker, item_in_zephir_too_recent):
assert "some_barcode" in result.stdout
assert "other_barcode" in result.stdout
assert result.exit_code == 0


def test_generate_barcodes_in_s3_report(mocker):
generate_report_mock = mocker.patch.object(
functions, "generate_barcodes_added_in_last_two_weeks_report"
)

result = runner.invoke(app, ["digifeeds", "generate-barcodes-in-s3-report"])

generate_report_mock.assert_called()

assert result.exit_code == 0
81 changes: 81 additions & 0 deletions tests/digifeeds/test_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from datetime import datetime
import json
from aim.digifeeds.functions import (
rclone,
barcodes_added_in_last_two_weeks,
write_barcodes_added_in_last_two_weeks_report,
generate_barcodes_added_in_last_two_weeks_report,
last_two_weeks_rclone_filter,
)
from io import StringIO


def test_last_two_weeks_rclone_filter():
filters = last_two_weeks_rclone_filter(
start_date=datetime.strptime("2025-01-02", "%Y-%m-%d")
)
expected_filter_string = (
"{2025-01-02*,2025-01-01*,2024-12-31*,2024-12-30*,2024-12-29*"
",2024-12-28*,2024-12-27*,2024-12-26*,2024-12-25*,2024-12-24*"
",2024-12-23*,2024-12-22*,2024-12-21*,2024-12-20*}"
)
assert filters == expected_filter_string


def test_barcodes_added_in_last_two_weeks(mocker):
ls_data_raw = """
[
{
"Path": "2024-12-01_07-10-02_35112203951670.zip",
"Name": "2024-12-01_07-10-02_35112203951670.zip",
"Size": 554562627,
"MimeType": "application/zip",
"ModTime": "2024-12-14T02:01:05.093051502-05:00",
"IsDir": false,
"Tier": "STANDARD"
},
{
"Path": "2024-12-01_07-10-02_39015004707009.zip",
"Name": "2024-12-01_07-10-02_39015004707009.zip",
"Size": 232895588,
"MimeType": "application/zip",
"ModTime": "2024-12-14T02:02:29.111076546-05:00",
"IsDir": false,
"Tier": "STANDARD"
}
]
"""
mocker.patch.object(rclone, "ls", return_value=json.loads(ls_data_raw))
output = barcodes_added_in_last_two_weeks()
assert output == [
["2024-12-01", "35112203951670"],
["2024-12-01", "39015004707009"],
]


def test_write_barcodes_added_in_last_two_weeks_report(mocker):
outfile = StringIO()
barcodes = [
["2024-12-14", "35112203951670"],
["2024-12-14", "39015004707009"],
]

mocker.patch(
"aim.digifeeds.functions.barcodes_added_in_last_two_weeks",
return_value=barcodes,
)
write_barcodes_added_in_last_two_weeks_report(outfile)
outfile.seek(0)
content = outfile.read()
assert content == "2024-12-14\t35112203951670\n2024-12-14\t39015004707009\n"


def test_generate_barcodes_added_in_last_two_weeks_report(mocker):
rclone_mock = mocker.patch.object(rclone, "copyto")
report_writer_mock = mocker.patch(
"aim.digifeeds.functions.write_barcodes_added_in_last_two_weeks_report",
)

generate_barcodes_added_in_last_two_weeks_report()
rclone_mock.assert_called()
report_writer_mock.assert_called()

0 comments on commit 791e5c4

Please sign in to comment.