-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_annotator.py
48 lines (42 loc) · 2.08 KB
/
test_annotator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# -*- coding: utf-8 -*-
import pytest
@pytest.mark.parametrize("test_input,labels,expected", [
('(a, A), (a,A), ("a", B), ("a , B), (a", , "B),(a", , \'B),(a"\',,,, , \'B), (a"\',,,, , \'B" ),(a,B/C)',
['A', 'B', 'B/C'],
[('a', 'A'), ('a', 'A'), ('"a"', 'B'), ('"a ', 'B'), ('a", ', 'B'),
('a", ', 'B'), ('a"\',,,, ', 'B'), ('a"\',,,, ', 'B'), ('a', 'B/C')]
)
])
def test_ner_regex(test_input, labels, expected):
from llano import GPTAnnotator
regex = GPTAnnotator.make_ner_extraction_regex(labels)
pairs = []
for match in regex.finditer(test_input):
entity, entity_type = match.group('entity'), match.group('entity_type')
pairs.append((entity, entity_type))
assert pairs == expected
@pytest.mark.parametrize("test_input,labels,expected", [
(
'''[(Mr. Li, live in, Shanghai),("Mr. Li", live in, "Shanghai"),( Mr. Li, work at, HelloWorld )], others...(Note: The above output is in Chinese language as the given sentence is in Chinese.)''',
['live in', 'work at'],
[("Mr. Li", "live in", "Shanghai"), ("Mr. Li", "live in", "Shanghai"), ("Mr. Li", "work at", "HelloWorld")]
), (
"[('李华', '居住在', '上海'), ('李华', '工作在', 'HelloWorld公司')]",
['居住在', '工作在'],
[('李华', '居住在', '上海'), ('李华', '工作在', 'HelloWorld公司')]
), (
"[('Mr. Li', 'work at', 'HelloWorld Tech'), ('he', 'live in', 'Shanghai')]",
['live in', 'work at'],
[('Mr. Li', 'work at', 'HelloWorld Tech'), ('he', 'live in', 'Shanghai')]
)
])
def test_relation_extraction_regex(test_input, labels, expected):
from llano import GPTAnnotator
regex = GPTAnnotator.make_relation_extraction_regex(labels)
triples = []
for match in regex.finditer(test_input):
subject = GPTAnnotator.re_strip(match.group('subject'))
predicate = GPTAnnotator.re_strip(match.group('predicate'))
object = GPTAnnotator.re_strip(match.group('object'))
triples.append((subject, predicate, object))
assert triples == expected