-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_v5.py
108 lines (84 loc) · 4.91 KB
/
test_v5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# -*- coding: utf-8 -*-
# external imports
import pymarc
# Internal import
import marc_utils_5 as marc_utils
MARC_READER = pymarc.MARCReader(open("test_records.mrc", "rb"), to_unicode=True, force_utf8=True) # DON'T FORGET ME
MARC_WRITER = open("test_records_modified.mrc", "wb") # DON'T FORGET ME
# Loop through records
for index, record in enumerate(MARC_READER):
print(f"--- Starting Record {index}")
# If record is invalid
if record is None:
print(f"Current chunk: {MARC_READER.current_chunk} was ignored because the following exception raised: {MARC_READER.current_exception}")
continue
record_nb = record["001"].data
# Record 000001 : test sorting fields
marc_utils.sort_fields_by_tag(record)
# Record 000002 : test sorting subfields
marc_utils.sort_subfields_for_tag(record, "610", ["9", "a", "*", "8", "z"])
marc_utils.sort_subfields_for_tag(record, "615", ["0", "f"])
marc_utils.sort_subfields_for_tag(record, "620", ["*", "5", "k"])
# Record 000003 : test forcing indicators
if record_nb == "000003":
marc_utils.force_indicators(record, "200")
marc_utils.force_indicators(record, "330", ind1="3", ind2=" ")
marc_utils.force_indicators(record, "701", ind1="9")
marc_utils.force_indicators(record, "702", ind2="8")
# Recod 000004 : test adding missing subfields
if record_nb == "000004":
marc_utils.add_missing_subfield_to_field(record, "701", "z", "fre", 5)
# Record 000005 : test edit specific subfields with regexp (old & new func)
if record_nb == "000005":
marc_utils.edit_specific_repeatable_subfield_content_with_regexp(record["101"], ["a", "c"], r"^\s*([a-z]{3})\s*$", r"\1")
marc_utils.edit_specific_repeatable_subfield_content_with_regexp(record["330"], ["a"], r"^\s+$", "")
marc_utils.edit_repeatable_subf_content_with_regexp_for_tag(record, "102", ["a", "c"], r"^\s*([A-Z]{2})\s*$", r"\1")
marc_utils.edit_repeatable_subf_content_with_regexp_for_tag(record, "200", ["e"], r"^\s+$", "")
# Record 000006 : test replacing specific subfields not maching regexp (old & new func)
if record_nb == "000006":
marc_utils.replace_specific_repeatable_subfield_content_not_matching_regexp(record["101"], ["a", "c"], r"^[a-z]{3}$", "und")
marc_utils.replace_specific_repeatable_subfield_content_not_matching_regexp(record["330"], ["a"], r"^Résumé \:", "Résumé invalide")
marc_utils.replace_repeatable_subf_content_not_matching_regexp_for_tag(record, "102", ["a", "c"], r"^[A-Z]{2}$", r"??")
marc_utils.replace_repeatable_subf_content_not_matching_regexp_for_tag(record, "200", ["e"], r"^in :", "ARA ARA ARA")
# Record 000007 : test merging fields
marc_utils.merge_all_fields_by_tag(record, "099")
marc_utils.merge_all_fields_by_tag(record, "181", ["6", "*", "2"])
# Record 000008 : test splitting a field if a specific subfield is repeated
marc_utils.split_tags_if_multiple_specific_subfield(record, "463", "t")
# Record 000009 : test splitting field that have multiple times subfields
marc_utils.split_merged_tags(record, "995")
marc_utils.split_merged_tags(record, "777")
# Record 000010 : test deleting empty subfields & empty fields
if record_nb == "000010":
marc_utils.delete_empty_subfields(record)
marc_utils.delete_empty_fields(record)
# Record 000011 : test deleting fields if a subfield matches a regexp
marc_utils.delete_field_if_all_subfields_match_regexp(record, "410", "t", r"^\s+$", keep_if_no_subf=False)
marc_utils.delete_field_if_all_subfields_match_regexp(record, "412", "t", r"^\s+$", keep_if_no_subf=True)
# Record 000012 : test deleting multiple subfields in a field
marc_utils.delete_multiple_subfield_for_tag(record, "725", "4")
# Record 000013 : test date function
if record_nb == "000013":
print("UNM 1000 publication date : ", marc_utils.get_year_from_UNM_100(record, creation=False))
print("UNM 1000 creation date : ", marc_utils.get_year_from_UNM_100(record, creation=True))
print("214$d date : ", marc_utils.get_years_in_specific_subfield(record, "214", "d"))
print("330 date : ", marc_utils.get_years_less_accurate(record, "330"))
print("214$d, 330, 615$a, 200, 100$a dates : ", marc_utils.get_years(record, [
("214", "d"),
("330", None),
("615", "a"),
("200", None),
("100", "a")
]))
if record_nb in ["000014", "000016"]:
marc_utils.fix_7XX(record, prioritize_71X=False)
elif record_nb == "000015":
marc_utils.fix_7XX(record, prioritize_71X=True)
# Record 000017 : test merge subfields
marc_utils.merge_all_subfields_with_code(record, "324", "a", " ; ")
# Print the record in the terminak
print(marc_utils.record_as_string(record))
# Write record
MARC_WRITER.write(record.as_marc())
MARC_READER.close()
MARC_WRITER.close()