-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathchatbot.py
58 lines (53 loc) · 3.07 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# The Shani Sir chatbot
from chatterbot import ChatBot
from chatterbot.comparisons import JaccardSimilarity
shanisirbot = ChatBot('The Shani Sir Bot',
storage_adapter='chatterbot.storage.SQLStorageAdapter',
logic_adapters=[{"import_path": "chatterbot.logic.BestMatch",
"statement_comparison_function": JaccardSimilarity,
'maximum_similarity_threshold': 0.99}],
preprocessors=['chatterbot.preprocessors.clean_whitespace'],
database_uri='sqlite:///db.sqlite3',
read_only=False) # Set to True to disable further learning from conversations
# NOTE: response_selection_method can also be get_random_response or get_most_frequent_response
shanisirbot.initialize() # Does any work that needs to be done before the chatbot can process responses.
get_tags = shanisirbot.storage.tagger.get_bigram_pair_string
# def train_with(corpus: str) -> None:
# """
# Trains the bot using the specified corpus
# eng ---> chatterbot.corpus.english (standard English corpus from chatterbot_corpora)
# woz ---> ./MULTIWOZ2.1 (Multi-Domain Wizard-of-Oz dataset from http://dialogue.mi.eng.cam.ac.uk/index.php/corpus/)
# ubu ---> Will download and extract the Ubuntu dialog corpus if that has not already been done.
# """
#
# from chatterbot.trainers import ChatterBotCorpusTrainer, UbuntuCorpusTrainer
# import time
#
# if corpus == 'ubu': # WARNING: TAKES A REALLY LONG TIME
# start = time.time() # (TOOK 114000 SECONDS = 31 HRS TO EXTRACT & TRAIN FOR UNCLE SAM, NOT INCLUDING DL TIME)
# corpus_trainer = UbuntuCorpusTrainer(shanisirbot)
# corpus_trainer.train()
# else:
# start = time.time()
# corpus_trainer = ChatterBotCorpusTrainer(shanisirbot)
# if corpus == 'eng':
# corpus_trainer.train("chatterbot.corpus.english")
# elif corpus == 'woz':
# corpus_trainer.train('./data/MULTIWOZ2.1/attraction_db.json',
# './data/MULTIWOZ2.1/data.json',
# './data/MULTIWOZ2.1/dialogue_acts.json',
# './data/MULTIWOZ2.1/hospital_db.json',
# './data/MULTIWOZ2.1/hotel_db.json',
# './data/MULTIWOZ2.1/ontology.json',
# './data/MULTIWOZ2.1/police_db.json'
# './data/MULTIWOZ2.1/restaurant_db.json',
# './data/MULTIWOZ2.1/taxi_db.json',
# './data/MULTIWOZ2.1/testListFile.json',
# './data/MULTIWOZ2.1/train_db.json',
# './data/MULTIWOZ2.1/valListFile.json')
# else:
# print("Invalid corpus.")
# return
# end = time.time()
# time_taken = end - start
# print(f"\n\nThe Shani Sir chatbot has been trained using the corpus {corpus}. Time taken: {time_taken}s")