-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlink_subjects_resources.py
41 lines (38 loc) · 1.99 KB
/
link_subjects_resources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# This script grabs subjects from the ArchivesSpace database and their links to all resources and generates a
# spreadsheet with that info
import csv
import mysql.connector
import pathlib
from openpyxl import load_workbook
from secrets import as_dbstag_host, as_dbstag_un, as_dbstag_pw
subjects = load_workbook(pathlib.Path.joinpath(pathlib.Path.cwd(), "reports", "subjects_agents-notes.xlsx"))
subjects_to_agents = subjects["Subjects to agents"]
subject_ids = []
for row in subjects_to_agents.iter_rows(min_col=2, values_only=True):
for cell in row:
subject_ids.append(cell.split("/")[-1])
uga_aspace_staging = mysql.connector.connect(host=as_dbstag_host, user=as_dbstag_un, password=as_dbstag_pw,
database="archivesspace")
astag_cursor = uga_aspace_staging.cursor(buffered=True)
for subject_id in subject_ids:
collections = [subject_id]
subres_query = f"SELECT subject.title, resource.identifier, repository.name " \
f"FROM subject_rlshp " \
f"JOIN subject ON subject.id = subject_rlshp.subject_id " \
f"JOIN resource ON resource.id = subject_rlshp.resource_id " \
f"JOIN repository ON repository.id = resource.repo_id " \
f"WHERE subject.id = {subject_id}"
astag_cursor.execute(subres_query)
results = astag_cursor.fetchall()
for result in results:
if result[0] not in collections:
collections.append(result[0])
collection = ""
for collection_id_frag in list(result[1].strip("][").replace('"', '').split(",")):
if collection_id_frag != "null":
collection += collection_id_frag + "-"
collection = collection[:-1]
collections.append(collection)
with open(pathlib.Path.joinpath(pathlib.Path.cwd(), "reports", "subject_to_agents.csv"), mode="a", newline="") as sub_to_ag:
subag_writer = csv.writer(sub_to_ag, delimiter=",")
subag_writer.writerow(collections)