-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathPOSTagger.py
22 lines (18 loc) · 881 Bytes
/
POSTagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import nltk
nltk.download('averaged_perceptron_tagger')
class POSTagger(object):
def __init__(self):
pass
def pos_tag(self, sentences):
"""
input format: list of lists of words
e.g.: [['this', 'is', 'a', 'sentence'], ['this', 'is', 'another', 'one']]
output format: list of lists of tagged tokens. Each tagged tokens has a
form, a lemma, and a list of tags
e.g: [[('this', 'this', ['DT']), ('is', 'be', ['VB']), ('a', 'a', ['DT']), ('sentence', 'sentence', ['NN'])],
[('this', 'this', ['DT']), ('is', 'be', ['VB']), ('another', 'another', ['DT']), ('one', 'one', ['CARD'])]]
"""
pos = [nltk.pos_tag(sentence) for sentence in sentences]
# adapt format
pos = [[(word, word, [postag]) for (word, postag) in sentence] for sentence in pos]
return pos