diff --git a/src/semantic/chatbot_model.h5 b/src/semantic/chatbot_model.h5 new file mode 100644 index 0000000..2449402 Binary files /dev/null and b/src/semantic/chatbot_model.h5 differ diff --git a/src/semantic/classes.pkl b/src/semantic/classes.pkl new file mode 100644 index 0000000..ccf71de Binary files /dev/null and b/src/semantic/classes.pkl differ diff --git a/src/semantic/classes.txt b/src/semantic/classes.txt new file mode 100644 index 0000000..52d56a9 --- /dev/null +++ b/src/semantic/classes.txt @@ -0,0 +1,9 @@ +adverse_drug +blood_pressure +blood_pressure_search +goodbye +greeting +hospital_search +options +pharmacy_search +thanks diff --git a/src/semantic/gui_chatbot.py b/src/semantic/gui_chatbot.py new file mode 100644 index 0000000..d205db3 --- /dev/null +++ b/src/semantic/gui_chatbot.py @@ -0,0 +1,114 @@ +import nltk +from nltk.stem import WordNetLemmatizer +lemmatizer = WordNetLemmatizer() +import pickle +import numpy as np + +from keras.models import load_model +model = load_model('chatbot_model.h5') +import json +import random +intents = json.loads(open('intents.json').read()) +words = pickle.load(open('words.pkl','rb')) +classes = pickle.load(open('classes.pkl','rb')) + + +def clean_up_sentence(sentence): + # tokenize the pattern - splitting words into array + sentence_words = nltk.word_tokenize(sentence) + # stemming every word - reducing to base form + sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words] + return sentence_words + + +# return bag of words array: 0 or 1 for words that exist in sentence +def bag_of_words(sentence, words, show_details=True): + # tokenizing patterns + sentence_words = clean_up_sentence(sentence) + # bag of words - vocabulary matrix + bag = [0]*len(words) + for s in sentence_words: + for i,word in enumerate(words): + if word == s: + # assign 1 if current word is in the vocabulary position + bag[i] = 1 + if show_details: + print ("found in bag: %s" % word) + return(np.array(bag)) + +def predict_class(sentence): + # filter below threshold predictions + p = bag_of_words(sentence, words,show_details=False) + res = model.predict(np.array([p]))[0] + ERROR_THRESHOLD = 0.25 + results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD] + # sorting strength probability + results.sort(key=lambda x: x[1], reverse=True) + return_list = [] + for r in results: + return_list.append({"intent": classes[r[0]], "probability": str(r[1])}) + return return_list + +def getResponse(ints, intents_json): + tag = ints[0]['intent'] + list_of_intents = intents_json['intents'] + for i in list_of_intents: + if(i['tag']== tag): + result = random.choice(i['responses']) + break + return result + + +#Creating tkinter GUI +import tkinter +from tkinter import * + +def send(): + msg = EntryBox.get("1.0",'end-1c').strip() + EntryBox.delete("0.0",END) + + if msg != '': + ChatBox.config(state=NORMAL) + ChatBox.insert(END, "You: " + msg + '\n\n') + ChatBox.config(foreground="#446665", font=("Verdana", 12 )) + + ints = predict_class(msg) + res = getResponse(ints, intents) + + ChatBox.insert(END, "Bot: " + res + '\n\n') + + ChatBox.config(state=DISABLED) + ChatBox.yview(END) + + +root = Tk() +root.title("Chatbot") +root.geometry("400x500") +root.resizable(width=FALSE, height=FALSE) + +#Create Chat window +ChatBox = Text(root, bd=0, bg="white", height="8", width="50", font="Arial",) + +ChatBox.config(state=DISABLED) + +#Bind scrollbar to Chat window +scrollbar = Scrollbar(root, command=ChatBox.yview, cursor="heart") +ChatBox['yscrollcommand'] = scrollbar.set + +#Create Button to send message +SendButton = Button(root, font=("Verdana",12,'bold'), text="Send", width="12", height=5, + bd=0, bg="#f9a602", activebackground="#3c9d9b",fg='#000000', + command= send ) + +#Create the box to enter message +EntryBox = Text(root, bd=0, bg="white",width="29", height="5", font="Arial") +#EntryBox.bind("", send) + + +#Place all components on the screen +scrollbar.place(x=376,y=6, height=386) +ChatBox.place(x=6,y=6, height=386, width=370) +EntryBox.place(x=128, y=401, height=90, width=265) +SendButton.place(x=6, y=401, height=90) + +root.mainloop() diff --git a/src/semantic/intents.json b/src/semantic/intents.json new file mode 100644 index 0000000..3f97da9 --- /dev/null +++ b/src/semantic/intents.json @@ -0,0 +1,73 @@ +{"intents": [ + {"tag": "greeting", + "patterns": ["Hi there", "How are you", "Is anyone there?","Hey","Hola", "Hello", "Good day"], + "responses": ["Hello, thanks for asking", "Good to see you again", "Hi there, how can I help?"], + "context": [""] + }, + {"tag": "goodbye", + "patterns": ["Bye", "See you later", "Goodbye", "Nice chatting to you, bye", "Till next time"], + "responses": ["See you!", "Have a nice day", "Bye! Come back again soon."], + "context": [""] + }, + {"tag": "thanks", + "patterns": ["Thanks", "Thank you", "That's helpful", "Awesome, thanks", "Thanks for helping me"], + "responses": ["Happy to help!", "Any time!", "My pleasure"], + "context": [""] + }, + {"tag": "noanswer", + "patterns": [], + "responses": ["Sorry, can't understand you", "Please give me more info", "Not sure I understand"], + "context": [""] + }, + {"tag": "options", + "patterns": ["How you could help me?", "What you can do?", "What help you provide?", "How you can be helpful?", "What support is offered"], + "responses": ["I can guide you through Adverse drug reaction list, Blood pressure tracking, Hospitals and Pharmacies", "Offering support for Adverse drug reaction, Blood pressure, Hospitals and Pharmacies"], + "context": [""] + }, + {"tag": "navigation", + "patterns": ["How to check Adverse drug reaction?", "Open adverse drugs module", "Give me a list of drugs causing adverse behavior", "List all drugs suitable for patient with adverse reaction", "Which drugs dont have adverse reaction?" ], + "responses": ["Navigating to Adverse drug reaction module"], + "context": [""] + }, + {"tag": "exit", + "patterns": ["Open blood pressure module", "Task related to blood pressure", "Blood pressure data entry", "I want to log blood pressure results", "Blood pressure data management" ], + "responses": ["Navigating to Blood Pressure module"], + "context": [""] + }, + {"tag": "blood_pressure_search", + "patterns": ["I want to search for blood pressure result history", "Blood pressure for patient", "Load patient blood pressure result", "Show blood pressure results for patient", "Find blood pressure results by ID" ], + "responses": ["Please provide Patient ID", "Patient ID?"], + "context": ["search_blood_pressure_by_patient_id"] + }, + {"tag": "search_blood_pressure_by_patient_id", + "patterns": [], + "responses": ["Loading Blood pressure result for Patient"], + "context": [""] + }, + {"tag": "pharmacy_search", + "patterns": ["Find me a pharmacy", "Find pharmacy", "List of pharmacies nearby", "Locate pharmacy", "Search pharmacy" ], + "responses": ["Please provide pharmacy name"], + "context": ["search_pharmacy_by_name"] + }, + {"tag": "search_pharmacy_by_name", + "patterns": [], + "responses": ["Loading pharmacy details"], + "context": [""] + }, + {"tag": "hospital_search", + "patterns": ["Lookup for hospital", "Searching for hospital to transfer patient", "I want to search hospital data", "Hospital lookup for patient", "Looking up hospital details" ], + "responses": ["Please provide hospital name or location"], + "context": ["search_hospital_by_params"] + }, + {"tag": "search_hospital_by_params", + "patterns": [], + "responses": ["Please provide hospital type"], + "context": ["search_hospital_by_type"] + }, + {"tag": "search_hospital_by_type", + "patterns": [], + "responses": ["Loading hospital details"], + "context": [""] + } + ] +} diff --git a/src/semantic/pickleManage.py b/src/semantic/pickleManage.py new file mode 100644 index 0000000..daa8b3f --- /dev/null +++ b/src/semantic/pickleManage.py @@ -0,0 +1,33 @@ +import pickle + +# file to print current pickle files to text file +# this allows us to monitor current dictionaries +def printPickle(filename): + pickle_in = open(filename + '.pkl',"rb") + currDict = pickle.load(pickle_in) + f = open(filename + '.txt',"w") + for x in currDict: + f.write('%s\n' % x ) + f.close() + +# update pickle files to update dictionaries +# example: grades = {'Bart', 'Lisa', 'Milhouse', 'Nelson'} +def createPickle(filename, pklList): + f = open(filename + '.pkl', 'wb') # Pickle file is newly created where foo1.py is + pickle.dump(pklList, f) # dump data to f + f.close() + +def updatePickle(filename, pklList): + pickle_in = open(filename + '.pkl',"rb") + currDict = pickle.load(pickle_in) + f = open(filename + '.pkl', 'wb') # Pickle file is newly created where foo1.py is + pickle.dump(currDict|pklList, f) # dump data to f + f.close() + +printPickle("classes") +printPickle("words") + +# Example usage +# createPickle('test', {'Bart', 'Lisa', 'Milhouse', 'Nelson'}) +# updatePickle('test', {'Theo'}) +# printPickle("test") diff --git a/src/semantic/train_chatbot.py b/src/semantic/train_chatbot.py new file mode 100644 index 0000000..2c211ae --- /dev/null +++ b/src/semantic/train_chatbot.py @@ -0,0 +1,92 @@ +import numpy as np +from keras.models import Sequential +from keras.layers import Dense, Activation, Dropout +from keras.optimizers import SGD +import random + +import nltk +from nltk.stem import WordNetLemmatizer +lemmatizer = WordNetLemmatizer() +import json +import pickle + +words=[] +classes = [] +documents = [] +ignore_letters = ['!', '?', ',', '.'] +intents_file = open('intents.json').read() +intents = json.loads(intents_file) + +for intent in intents['intents']: + for pattern in intent['patterns']: + #tokenize each word + word = nltk.word_tokenize(pattern) + words.extend(word) + #add documents in the corpus + documents.append((word, intent['tag'])) + # add to our classes list + if intent['tag'] not in classes: + classes.append(intent['tag']) +print(documents) +# lemmaztize and lower each word and remove duplicates +words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_letters] +words = sorted(list(set(words))) +# sort classes +classes = sorted(list(set(classes))) +# documents = combination between patterns and intents +print (len(documents), "documents") +# classes = intents +print (len(classes), "classes", classes) +# words = all words, vocabulary +print (len(words), "unique lemmatized words", words) + +pickle.dump(words,open('words.pkl','wb')) +pickle.dump(classes,open('classes.pkl','wb')) + +# create our training data +training = [] +# create an empty array for our output +output_empty = [0] * len(classes) +# training set, bag of words for each sentence +for doc in documents: + # initialize our bag of words + bag = [] + # list of tokenized words for the pattern + pattern_words = doc[0] + # lemmatize each word - create base word, in attempt to represent related words + pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words] + # create our bag of words array with 1, if word match found in current pattern + for word in words: + bag.append(1) if word in pattern_words else bag.append(0) + + # output is a '0' for each tag and '1' for current tag (for each pattern) + output_row = list(output_empty) + output_row[classes.index(doc[1])] = 1 + + training.append([bag, output_row]) +# shuffle our features and turn into np.array +random.shuffle(training) +training = np.array(training) +# create train and test lists. X - patterns, Y - intents +train_x = list(training[:,0]) +train_y = list(training[:,1]) +print("Training data created") + +# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons +# equal to number of intents to predict output intent with softmax +model = Sequential() +model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu')) +model.add(Dropout(0.5)) +model.add(Dense(64, activation='relu')) +model.add(Dropout(0.5)) +model.add(Dense(len(train_y[0]), activation='softmax')) + +# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model +sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) + +#fitting and saving the model +hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1) +model.save('chatbot_model.h5', hist) + +print("model created") diff --git a/src/semantic/words.pkl b/src/semantic/words.pkl new file mode 100644 index 0000000..71d696a Binary files /dev/null and b/src/semantic/words.pkl differ diff --git a/src/semantic/words.txt b/src/semantic/words.txt new file mode 100644 index 0000000..724b6dd --- /dev/null +++ b/src/semantic/words.txt @@ -0,0 +1,87 @@ +'s +a +adverse +all +anyone +are +awesome +be +behavior +blood +by +bye +can +causing +chatting +check +could +data +day +detail +do +dont +drug +entry +find +for +give +good +goodbye +have +hello +help +helpful +helping +hey +hi +history +hola +hospital +how +i +id +is +later +list +load +locate +log +looking +lookup +management +me +module +nearby +next +nice +of +offered +open +patient +pharmacy +pressure +provide +reaction +related +result +search +searching +see +show +suitable +support +task +thank +thanks +that +there +till +time +to +transfer +up +want +what +which +with +you