-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmapping_keywords.py
22 lines (21 loc) · 1.1 KB
/
mapping_keywords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from nltk.tokenize import sent_tokenize
# Function to map keywords to sentences with customizable context window size
def map_keywords_to_sentences(text, keywords, context_window_size):
sentences = sent_tokenize(text)
keyword_sentence_mapping = {}
print(f"\n\nSentences: {sentences}\n\n")
for keyword in keywords:
for i, sentence in enumerate(sentences):
if keyword in sentence:
# Combine current sentence with surrounding sentences for context
# start = max(0, i - context_window_size)
# end = min(len(sentences), i + context_window_size + 1)
start = max(0,i - context_window_size)
context_sentenses = sentences[start:i+1]
context = ' '.join(context_sentenses)
# context = ' '.join(sentences[start:end])
if keyword not in keyword_sentence_mapping:
keyword_sentence_mapping[keyword] = context
else:
keyword_sentence_mapping[keyword] += ' ' + context
return keyword_sentence_mapping