Adding basic chatbot to be modified for our usage. This is partially from: https://data-flair.training/blogs/python-chatbot-project/

2025-09-08 12:13:13 +00:00 · 2020-04-17 20:50:40 -04:00
parent b8ee129f22
commit 15a77e7bcf
9 changed files with 408 additions and 0 deletions
--- a/src/semantic/chatbot_model.h5
+++ b/src/semantic/chatbot_model.h5
--- a/src/semantic/classes.pkl
+++ b/src/semantic/classes.pkl
--- a/src/semantic/classes.txt
+++ b/src/semantic/classes.txt
@@ -0,0 +1,9 @@
+adverse_drug
+blood_pressure
+blood_pressure_search
+goodbye
+greeting
+hospital_search
+options
+pharmacy_search
+thanks
--- a/src/semantic/gui_chatbot.py
+++ b/src/semantic/gui_chatbot.py
@@ -0,0 +1,114 @@
+import nltk
+from nltk.stem import WordNetLemmatizer
+lemmatizer = WordNetLemmatizer()
+import pickle
+import numpy as np
+
+from keras.models import load_model
+model = load_model('chatbot_model.h5')
+import json
+import random
+intents = json.loads(open('intents.json').read())
+words = pickle.load(open('words.pkl','rb'))
+classes = pickle.load(open('classes.pkl','rb'))
+
+
+def clean_up_sentence(sentence):
+    # tokenize the pattern - splitting words into array
+    sentence_words = nltk.word_tokenize(sentence)
+    # stemming every word - reducing to base form
+    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
+    return sentence_words
+
+
+# return bag of words array: 0 or 1 for words that exist in sentence
+def bag_of_words(sentence, words, show_details=True):
+    # tokenizing patterns
+    sentence_words = clean_up_sentence(sentence)
+    # bag of words - vocabulary matrix
+    bag = [0]*len(words)  
+    for s in sentence_words:
+        for i,word in enumerate(words):
+            if word == s: 
+                # assign 1 if current word is in the vocabulary position
+                bag[i] = 1
+                if show_details:
+                    print ("found in bag: %s" % word)
+    return(np.array(bag))
+
+def predict_class(sentence):
+    # filter below  threshold predictions
+    p = bag_of_words(sentence, words,show_details=False)
+    res = model.predict(np.array([p]))[0]
+    ERROR_THRESHOLD = 0.25
+    results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
+    # sorting strength probability
+    results.sort(key=lambda x: x[1], reverse=True)
+    return_list = []
+    for r in results:
+        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
+    return return_list
+
+def getResponse(ints, intents_json):
+    tag = ints[0]['intent']
+    list_of_intents = intents_json['intents']
+    for i in list_of_intents:
+        if(i['tag']== tag):
+            result = random.choice(i['responses'])
+            break
+    return result
+
+
+#Creating tkinter GUI
+import tkinter
+from tkinter import *
+
+def send():
+    msg = EntryBox.get("1.0",'end-1c').strip()
+    EntryBox.delete("0.0",END)
+
+    if msg != '':
+        ChatBox.config(state=NORMAL)
+        ChatBox.insert(END, "You: " + msg + '\n\n')
+        ChatBox.config(foreground="#446665", font=("Verdana", 12 ))
+    
+        ints = predict_class(msg)
+        res = getResponse(ints, intents)
+        
+        ChatBox.insert(END, "Bot: " + res + '\n\n')
+            
+        ChatBox.config(state=DISABLED)
+        ChatBox.yview(END)
+ 
+
+root = Tk()
+root.title("Chatbot")
+root.geometry("400x500")
+root.resizable(width=FALSE, height=FALSE)
+
+#Create Chat window
+ChatBox = Text(root, bd=0, bg="white", height="8", width="50", font="Arial",)
+
+ChatBox.config(state=DISABLED)
+
+#Bind scrollbar to Chat window
+scrollbar = Scrollbar(root, command=ChatBox.yview, cursor="heart")
+ChatBox['yscrollcommand'] = scrollbar.set
+
+#Create Button to send message
+SendButton = Button(root, font=("Verdana",12,'bold'), text="Send", width="12", height=5,
+                    bd=0, bg="#f9a602", activebackground="#3c9d9b",fg='#000000',
+                    command= send )
+
+#Create the box to enter message
+EntryBox = Text(root, bd=0, bg="white",width="29", height="5", font="Arial")
+#EntryBox.bind("<Return>", send)
+
+
+#Place all components on the screen
+scrollbar.place(x=376,y=6, height=386)
+ChatBox.place(x=6,y=6, height=386, width=370)
+EntryBox.place(x=128, y=401, height=90, width=265)
+SendButton.place(x=6, y=401, height=90)
+
+root.mainloop()
--- a/src/semantic/intents.json
+++ b/src/semantic/intents.json
@@ -0,0 +1,73 @@
+{"intents": [
+        {"tag": "greeting",
+         "patterns": ["Hi there", "How are you", "Is anyone there?","Hey","Hola", "Hello", "Good day"],
+         "responses": ["Hello, thanks for asking", "Good to see you again", "Hi there, how can I help?"],
+         "context": [""]
+        },
+        {"tag": "goodbye",
+         "patterns": ["Bye", "See you later", "Goodbye", "Nice chatting to you, bye", "Till next time"],
+         "responses": ["See you!", "Have a nice day", "Bye! Come back again soon."],
+         "context": [""]
+        },
+        {"tag": "thanks",
+         "patterns": ["Thanks", "Thank you", "That's helpful", "Awesome, thanks", "Thanks for helping me"],
+         "responses": ["Happy to help!", "Any time!", "My pleasure"],
+         "context": [""]
+        },
+        {"tag": "noanswer",
+         "patterns": [],
+         "responses": ["Sorry, can't understand you", "Please give me more info", "Not sure I understand"],
+         "context": [""]
+        },
+        {"tag": "options",
+         "patterns": ["How you could help me?", "What you can do?", "What help you provide?", "How you can be helpful?", "What support is offered"],
+         "responses": ["I can guide you through Adverse drug reaction list, Blood pressure tracking, Hospitals and Pharmacies", "Offering support for Adverse drug reaction, Blood pressure, Hospitals and Pharmacies"],
+         "context": [""]
+        },
+        {"tag": "navigation",
+         "patterns": ["How to check Adverse drug reaction?", "Open adverse drugs module", "Give me a list of drugs causing adverse behavior", "List all drugs suitable for patient with adverse reaction", "Which drugs dont have adverse reaction?" ],
+         "responses": ["Navigating to Adverse drug reaction module"],
+         "context": [""]
+        },
+        {"tag": "exit",
+         "patterns": ["Open blood pressure module", "Task related to blood pressure", "Blood pressure data entry", "I want to log blood pressure results", "Blood pressure data management" ],
+         "responses": ["Navigating to Blood Pressure module"],
+         "context": [""]
+        },
+        {"tag": "blood_pressure_search",
+         "patterns": ["I want to search for blood pressure result history", "Blood pressure for patient", "Load patient blood pressure result", "Show blood pressure results for patient", "Find blood pressure results by ID" ],
+         "responses": ["Please provide Patient ID", "Patient ID?"],
+         "context": ["search_blood_pressure_by_patient_id"]
+        },
+        {"tag": "search_blood_pressure_by_patient_id",
+         "patterns": [],
+         "responses": ["Loading Blood pressure result for Patient"],
+         "context": [""]
+        },
+        {"tag": "pharmacy_search",
+         "patterns": ["Find me a pharmacy", "Find pharmacy", "List of pharmacies nearby", "Locate pharmacy", "Search pharmacy" ],
+         "responses": ["Please provide pharmacy name"],
+         "context": ["search_pharmacy_by_name"]
+        },
+        {"tag": "search_pharmacy_by_name",
+         "patterns": [],
+         "responses": ["Loading pharmacy details"],
+         "context": [""]
+        },
+        {"tag": "hospital_search",
+         "patterns": ["Lookup for hospital", "Searching for hospital to transfer patient", "I want to search hospital data", "Hospital lookup for patient", "Looking up hospital details" ],
+         "responses": ["Please provide hospital name or location"],
+         "context": ["search_hospital_by_params"]
+        },
+        {"tag": "search_hospital_by_params",
+         "patterns": [],
+         "responses": ["Please provide hospital type"],
+         "context": ["search_hospital_by_type"]
+        },
+        {"tag": "search_hospital_by_type",
+         "patterns": [],
+         "responses": ["Loading hospital details"],
+         "context": [""]
+        }
+   ]
+}
--- a/src/semantic/pickleManage.py
+++ b/src/semantic/pickleManage.py
@@ -0,0 +1,33 @@
+import pickle
+
+# file to print current pickle files to text file
+# this allows us to monitor current dictionaries
+def printPickle(filename):
+    pickle_in = open(filename + '.pkl',"rb")
+    currDict = pickle.load(pickle_in)
+    f = open(filename + '.txt',"w")
+    for x in currDict:
+        f.write('%s\n' % x )
+    f.close()
+
+# update pickle files to update dictionaries
+# example: grades = {'Bart', 'Lisa', 'Milhouse', 'Nelson'}
+def createPickle(filename, pklList):
+    f = open(filename + '.pkl', 'wb')   # Pickle file is newly created where foo1.py is
+    pickle.dump(pklList, f)          # dump data to f
+    f.close()
+
+def updatePickle(filename, pklList):
+    pickle_in = open(filename + '.pkl',"rb")
+    currDict = pickle.load(pickle_in)
+    f = open(filename + '.pkl', 'wb')   # Pickle file is newly created where foo1.py is
+    pickle.dump(currDict|pklList, f)          # dump data to f
+    f.close()
+
+printPickle("classes")
+printPickle("words")
+
+# Example usage
+#   createPickle('test', {'Bart', 'Lisa', 'Milhouse', 'Nelson'})
+#   updatePickle('test', {'Theo'})
+#   printPickle("test")
--- a/src/semantic/train_chatbot.py
+++ b/src/semantic/train_chatbot.py
@@ -0,0 +1,92 @@
+import numpy as np
+from keras.models import Sequential
+from keras.layers import Dense, Activation, Dropout
+from keras.optimizers import SGD
+import random
+
+import nltk
+from nltk.stem import WordNetLemmatizer
+lemmatizer = WordNetLemmatizer()
+import json
+import pickle
+
+words=[]
+classes = []
+documents = []
+ignore_letters = ['!', '?', ',', '.']
+intents_file = open('intents.json').read()
+intents = json.loads(intents_file)
+
+for intent in intents['intents']:
+    for pattern in intent['patterns']:
+        #tokenize each word
+        word = nltk.word_tokenize(pattern)
+        words.extend(word)
+        #add documents in the corpus
+        documents.append((word, intent['tag']))
+        # add to our classes list
+        if intent['tag'] not in classes:
+            classes.append(intent['tag'])
+print(documents)
+# lemmaztize and lower each word and remove duplicates
+words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_letters]
+words = sorted(list(set(words)))
+# sort classes
+classes = sorted(list(set(classes)))
+# documents = combination between patterns and intents
+print (len(documents), "documents")
+# classes = intents
+print (len(classes), "classes", classes)
+# words = all words, vocabulary
+print (len(words), "unique lemmatized words", words)
+
+pickle.dump(words,open('words.pkl','wb'))
+pickle.dump(classes,open('classes.pkl','wb'))
+
+# create our training data
+training = []
+# create an empty array for our output
+output_empty = [0] * len(classes)
+# training set, bag of words for each sentence
+for doc in documents:
+    # initialize our bag of words
+    bag = []
+    # list of tokenized words for the pattern
+    pattern_words = doc[0]
+    # lemmatize each word - create base word, in attempt to represent related words
+    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
+    # create our bag of words array with 1, if word match found in current pattern
+    for word in words:
+        bag.append(1) if word in pattern_words else bag.append(0)
+        
+    # output is a '0' for each tag and '1' for current tag (for each pattern)
+    output_row = list(output_empty)
+    output_row[classes.index(doc[1])] = 1
+    
+    training.append([bag, output_row])
+# shuffle our features and turn into np.array
+random.shuffle(training)
+training = np.array(training)
+# create train and test lists. X - patterns, Y - intents
+train_x = list(training[:,0])
+train_y = list(training[:,1])
+print("Training data created")
+
+# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
+# equal to number of intents to predict output intent with softmax
+model = Sequential()
+model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(64, activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(len(train_y[0]), activation='softmax'))
+
+# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
+sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
+model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
+
+#fitting and saving the model 
+hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
+model.save('chatbot_model.h5', hist)
+
+print("model created")
--- a/src/semantic/words.pkl
+++ b/src/semantic/words.pkl
--- a/src/semantic/words.txt
+++ b/src/semantic/words.txt
@@ -0,0 +1,87 @@
+'s
+a
+adverse
+all
+anyone
+are
+awesome
+be
+behavior
+blood
+by
+bye
+can
+causing
+chatting
+check
+could
+data
+day
+detail
+do
+dont
+drug
+entry
+find
+for
+give
+good
+goodbye
+have
+hello
+help
+helpful
+helping
+hey
+hi
+history
+hola
+hospital
+how
+i
+id
+is
+later
+list
+load
+locate
+log
+looking
+lookup
+management
+me
+module
+nearby
+next
+nice
+of
+offered
+open
+patient
+pharmacy
+pressure
+provide
+reaction
+related
+result
+search
+searching
+see
+show
+suitable
+support
+task
+thank
+thanks
+that
+there
+till
+time
+to
+transfer
+up
+want
+what
+which
+with
+you