-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_parse.py
91 lines (74 loc) · 2.51 KB
/
test_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import datetime
import json
from pathlib import Path
import pytest
from pytest import fixture
from parser import InterestParser
from utils import pdf_to_xml_dict
@fixture
def pdf_dict():
with Path("testdata.json").open() as fp:
return json.load(fp)
@fixture
def interest_parser(pdf_dict):
return InterestParser(pdf_dict=pdf_dict)
@pytest.mark.skip("Too slow")
def test_pdf_to_xml_dict():
pdf_path = Path("pdfs/interests-2020-03-23.pdf")
pdf_dict = pdf_to_xml_dict(pdf_path)
assert pdf_dict
assert "23. mars" in json.dumps(pdf_dict)
with Path("testdata.json").open() as fp:
expected = json.load(fp)
assert pdf_dict == expected
def test_parse_date(interest_parser):
meta = interest_parser.parse_document_meta()
assert meta
assert meta.get("updated_at")
updated_at = meta["updated_at"]
assert isinstance(updated_at, datetime.date)
assert updated_at.isoformat() == "2020-03-23"
def test_rep_data(interest_parser):
data = interest_parser.parse_pdf_data()
assert data
assert len(data) > 0
for rep in data:
assert rep
assert "first_name" in rep
def test_first_last_data(interest_parser):
data = interest_parser.parse_pdf_data()
assert data
assert len(data) > 0
last_rep = data[-1]
assert last_rep == {
"first_name": "Johan",
"last_name": "Aas",
"party": "frp",
"by_category": {"2": "Styreleder Gamle Bæreiavegen boligsameie (lønnet)"},
}
first_rep = data[0]
assert first_rep == {
"by_category": {
"2": "Høy & Rodum Eiendom AS, styreleder\n"
"KomRev Trøndelag IKS, styreleder (lønnet)\n"
"HRE Holding AS, styreleder\n"
"Dr. Agdestein AS, styremedlem (vara)\n"
"Steinkjer Montessoribarnehage, styremedlem\n"
"Naboer AB, styremedlem\n"
"Steinkjer Montessoriforening, styremedlem",
"8": "Fyrgt 3, Steinkjer\n"
"Kongensgt 38, Steinkjer\n"
"Otto Sverdrups vei 50, Steinkjer\n"
"Åsveien 57-59, Steinkjer",
"9": "Høy & Rodum Eiendom AS\n" "HRE Holding AS",
},
"first_name": "Elin Rodum",
"last_name": "Agdestein",
"party": "h",
}
# jan tore
jt = list(filter(lambda rep: rep["first_name"] == "Jan Tore" and rep["last_name"] == "Sanner", data))
assert jt
categories = jt[0]["by_category"]
expected_cats = ["4", "7", "10", "11"]
assert all([cat in categories for cat in expected_cats])