mirror of
https://github.com/Mobile-Robotics-W20-Team-9/UMICH-NCLT-SLAP.git
synced 2025-09-09 12:23:14 +00:00
Merge pull request #14 from Mobile-Robotics-W20-Team-9/semantic
Semantic
This commit is contained in:
26
docs/home.md
26
docs/home.md
@@ -36,6 +36,14 @@
|
|||||||
- [Scipy](https://www.scipy.org/)
|
- [Scipy](https://www.scipy.org/)
|
||||||
- [Matplotlib](https://matplotlib.org/)
|
- [Matplotlib](https://matplotlib.org/)
|
||||||
- [Natural Language Toolkit](https://www.nltk.org/)
|
- [Natural Language Toolkit](https://www.nltk.org/)
|
||||||
|
- [Cpython](https://pypi.org/project/cPython/)
|
||||||
|
- [NLTK](https://pypi.org/project/nltk/)
|
||||||
|
- [Setup Tools](https://pypi.org/project/setuptools/)
|
||||||
|
- [Pylint](https://pypi.org/project/pylint/)
|
||||||
|
- [Spacy](https://pypi.org/project/spacy/)
|
||||||
|
- [Pickle](https://pypi.org/project/pickle-mixin/)
|
||||||
|
- [TensorFlow](https://pypi.org/project/tensorflow/)
|
||||||
|
- [Keras](https://pypi.org/project/Keras/)
|
||||||
|
|
||||||
## Docker
|
## Docker
|
||||||
|
|
||||||
@@ -49,3 +57,21 @@ After cloning the repo, start your docker machine and following commands shown b
|
|||||||
|
|
||||||
1. `cd /PATH/TO/UMICH_NCLT_SLAP/src`
|
1. `cd /PATH/TO/UMICH_NCLT_SLAP/src`
|
||||||
2. `docker-compose run --rm python-dev`
|
2. `docker-compose run --rm python-dev`
|
||||||
|
|
||||||
|
### Semantic Language Parsing: Chatbot
|
||||||
|
|
||||||
|
For standalone testing of the chatbot, run the following commands
|
||||||
|
|
||||||
|
1. `cd /PATH/TO/UMICH_NCLT_SLAP/semantic/src`
|
||||||
|
2. `docker-compose run --rm python-dev`
|
||||||
|
1. `cd app/semantic`
|
||||||
|
2. `python gui_chatbot.py`
|
||||||
|
|
||||||
|
You can update the models by changing the intent or pickle files. Intent.json can be changed wiht a basic text editor and pickles can be read and changed using pickleManage.py.
|
||||||
|
1. `cd /PATH/TO/UMICH_NCLT_SLAP/src/datset/dataManipulation/pickles`
|
||||||
|
2. `python`
|
||||||
|
3. `from pickleManage import *`
|
||||||
|
4. Use desired functions. Functions are documented with examples in pickleManage.py file.
|
||||||
|
To update the models are making changes run:
|
||||||
|
'python
|
||||||
|
|
||||||
|
@@ -120,7 +120,7 @@ def project_vel_to_cam(hits, cam_num):
|
|||||||
def main(args):
|
def main(args):
|
||||||
|
|
||||||
if len(args)<4:
|
if len(args)<4:
|
||||||
print """Incorrect usage.
|
print("""Incorrect usage.
|
||||||
|
|
||||||
To use:
|
To use:
|
||||||
|
|
||||||
@@ -129,7 +129,7 @@ To use:
|
|||||||
vel: The velodyne binary file (timestamp.bin)
|
vel: The velodyne binary file (timestamp.bin)
|
||||||
img: The undistorted image (timestamp.tiff)
|
img: The undistorted image (timestamp.tiff)
|
||||||
cam_num: The index (0 through 5) of the camera
|
cam_num: The index (0 through 5) of the camera
|
||||||
"""
|
""")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
Binary file not shown.
@@ -4,7 +4,8 @@ RUN apt-get update && \
|
|||||||
apt-get install -y \
|
apt-get install -y \
|
||||||
build-essential \
|
build-essential \
|
||||||
python-opencv \
|
python-opencv \
|
||||||
libpcl-dev
|
libpcl-dev \
|
||||||
|
x11-apps
|
||||||
|
|
||||||
RUN pip install -U pip && \
|
RUN pip install -U pip && \
|
||||||
pip install -U \
|
pip install -U \
|
||||||
@@ -15,6 +16,13 @@ RUN pip install -U pip && \
|
|||||||
nltk \
|
nltk \
|
||||||
setuptools \
|
setuptools \
|
||||||
pylint \
|
pylint \
|
||||||
|
pickle-mixin \
|
||||||
|
spacy \
|
||||||
|
--upgrade setuptools \
|
||||||
|
--no-cache-dir tensorflow \
|
||||||
|
keras
|
||||||
|
|
||||||
|
RUN python -m spacy download en_core_web_sm
|
||||||
pickle-mixin
|
pickle-mixin
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
BIN
src/semantic/buildings_model.h5
Normal file
BIN
src/semantic/buildings_model.h5
Normal file
Binary file not shown.
BIN
src/semantic/chatbot_model.h5
Normal file
BIN
src/semantic/chatbot_model.h5
Normal file
Binary file not shown.
27
src/semantic/docker-compose_ubuntu
Normal file
27
src/semantic/docker-compose_ubuntu
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Docker Compose
|
||||||
|
|
||||||
|
# docker-compose.yml format version
|
||||||
|
version: '3'
|
||||||
|
|
||||||
|
# Define services
|
||||||
|
services:
|
||||||
|
# Python Development Container
|
||||||
|
python-dev:
|
||||||
|
# Use Dockerfile in current folder
|
||||||
|
build: .
|
||||||
|
# Mount ros-dev folder on host to app folder in container
|
||||||
|
volumes:
|
||||||
|
- ./control:/app/control
|
||||||
|
- ./dataset:/app/dataset
|
||||||
|
- ./localization:/app/localization
|
||||||
|
- ./planning:/app/planning
|
||||||
|
- ./semantic:/app/semantic
|
||||||
|
- ./visualization:/app/visualization
|
||||||
|
- /tmp/.X11-unix/:/tmp/.X11-unix
|
||||||
|
# Set DISPLAY variable and network mode for GUIs
|
||||||
|
environment:
|
||||||
|
- DISPLAY=$DISPLAY
|
||||||
|
#- DISPLAY=${IP_ADDRESS}:0.0
|
||||||
|
network_mode: "host"
|
||||||
|
# Set working directory in container to app folder
|
||||||
|
working_dir: /app
|
201
src/semantic/gui_chatbot.py
Normal file
201
src/semantic/gui_chatbot.py
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
# from python example and tutorial here: https://data-flair.training/blogs/python-chatbot-project/
|
||||||
|
# also utilizes examples from spacy website
|
||||||
|
|
||||||
|
import nltk
|
||||||
|
from nltk.stem import WordNetLemmatizer
|
||||||
|
lemmatizer = WordNetLemmatizer()
|
||||||
|
import pickle
|
||||||
|
import numpy as np
|
||||||
|
import spacy
|
||||||
|
import tkinter
|
||||||
|
from tkinter import *
|
||||||
|
|
||||||
|
|
||||||
|
from keras.models import load_model
|
||||||
|
model = load_model('chatbot_model.h5')
|
||||||
|
modelBuilding = load_model('buildings_model.h5')
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
intents = json.loads(open('intents/intents.json').read())
|
||||||
|
words = pickle.load(open('pickles/words.pkl','rb'))
|
||||||
|
classes = pickle.load(open('pickles/classes.pkl','rb'))
|
||||||
|
buildingsIntents = json.loads(open('intents/buildingIntents.json').read())
|
||||||
|
building_words = pickle.load(open('pickles/building_words.pkl','rb'))
|
||||||
|
buildings = pickle.load(open('pickles/buildings.pkl','rb'))
|
||||||
|
confirmation = 0
|
||||||
|
startNav = 0 #TODO: START CONVERSION TO GPS COORDINATES
|
||||||
|
completedNav = 0 #TODO: Add response once complete
|
||||||
|
emergencyExit = 0 #TODO: OPTIONAL STOP EVERYTHING
|
||||||
|
|
||||||
|
def clean_up_sentence(sentence):
|
||||||
|
# tokenize the pattern - splitting words into array
|
||||||
|
sentence_words = nltk.word_tokenize(sentence)
|
||||||
|
# stemming every word - reducing to base form
|
||||||
|
sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
|
||||||
|
return sentence_words
|
||||||
|
|
||||||
|
|
||||||
|
# return bag of words array: 0 or 1 for words that exist in sentence
|
||||||
|
def bag_of_words(sentence, wording, show_details=True):
|
||||||
|
# tokenizing patterns
|
||||||
|
sentence_words = clean_up_sentence(sentence)
|
||||||
|
# bag of words - vocabulary matrix
|
||||||
|
bag = [0]*len(wording)
|
||||||
|
for s in sentence_words:
|
||||||
|
for i,word in enumerate(wording):
|
||||||
|
if word == s:
|
||||||
|
# assign 1 if current word is in the vocabulary position
|
||||||
|
bag[i] = 1
|
||||||
|
if show_details:
|
||||||
|
print ("found in bag: %s" % word)
|
||||||
|
return(np.array(bag))
|
||||||
|
|
||||||
|
def predict_class(sentence):
|
||||||
|
# filter below threshold predictions
|
||||||
|
p = bag_of_words(sentence, words,show_details=False)
|
||||||
|
res = model.predict(np.array([p]))[0]
|
||||||
|
ERROR_THRESHOLD = 0.25
|
||||||
|
results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
|
||||||
|
# sorting strength probability
|
||||||
|
results.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
return_list = []
|
||||||
|
for r in results:
|
||||||
|
return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
|
||||||
|
return return_list
|
||||||
|
|
||||||
|
def predict_building(currbuilding):
|
||||||
|
# filter below threshold predictions
|
||||||
|
p = bag_of_words(currbuilding, building_words,show_details=False)
|
||||||
|
res = modelBuilding.predict(np.array([p]))[0]
|
||||||
|
ERROR_THRESHOLD = 0.5
|
||||||
|
results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
|
||||||
|
# sorting strength probability
|
||||||
|
results.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
return_list = []
|
||||||
|
for r in results:
|
||||||
|
return_list.append({"buildingIntents": buildings[r[0]], "probability": str(r[1])})
|
||||||
|
return return_list
|
||||||
|
|
||||||
|
def getResponse(ints, intents_json):
|
||||||
|
tag = ints[0]['intent']
|
||||||
|
list_of_intents = intents_json['intents']
|
||||||
|
for i in list_of_intents:
|
||||||
|
if(i['tag']== tag):
|
||||||
|
result = random.choice(i['responses'])
|
||||||
|
break
|
||||||
|
return result
|
||||||
|
|
||||||
|
def getBuildingInfo(sentence):
|
||||||
|
doc = nlp(sentence)
|
||||||
|
start = 0
|
||||||
|
end = 0
|
||||||
|
startBuilding = "random location"
|
||||||
|
stopBuilding = "random location"
|
||||||
|
for token in doc:
|
||||||
|
if token.pos_ == "PROPN" and start == 1:
|
||||||
|
startBuilding = token.text
|
||||||
|
elif token.pos_ == "PROPN" and end == 1:
|
||||||
|
stopBuilding = token.text
|
||||||
|
elif token.text == "to":
|
||||||
|
start = 0
|
||||||
|
end = 1
|
||||||
|
elif token.text == "from":
|
||||||
|
start = 1
|
||||||
|
end = 0
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
# print(token.text)
|
||||||
|
return [startBuilding, stopBuilding]
|
||||||
|
|
||||||
|
|
||||||
|
#Creating tkinter GUI
|
||||||
|
def send():
|
||||||
|
msgClean = EntryBox.get("1.0",'end-1c')
|
||||||
|
msg = msgClean.strip()
|
||||||
|
EntryBox.delete("0.0",END)
|
||||||
|
|
||||||
|
if msg != '':
|
||||||
|
ChatBox.config(state=NORMAL)
|
||||||
|
ChatBox.insert(END, "You: " + msg + '\n\n')
|
||||||
|
ChatBox.config(foreground="#446665", font=("Verdana", 12 ))
|
||||||
|
|
||||||
|
ints = predict_class(msg)
|
||||||
|
global confirmation
|
||||||
|
global startNav
|
||||||
|
global emergencyExit
|
||||||
|
# adds rule based chatbot to confirm navigation
|
||||||
|
if (ints[0]['intent'] == "yes" or ints[0]['intent'] == "no") and confirmation == 1 and startNav == 0:
|
||||||
|
emergencyExit = 0
|
||||||
|
if ints[0]['intent'] == "yes":
|
||||||
|
res = "Starting navigation. Please wait for process to complete. This may take a couple minutes."
|
||||||
|
startNav = 1
|
||||||
|
elif ints[0]['intent'] == "no":
|
||||||
|
res = "Cancelled operation"
|
||||||
|
confirmation = 0
|
||||||
|
elif ints[0]['intent'] == "navigation" and startNav == 0:
|
||||||
|
emergencyExit = 0
|
||||||
|
currbuilding = getBuildingInfo(msgClean)
|
||||||
|
if currbuilding[0] == 'random location':
|
||||||
|
currbuilding[0] = buildings[random.randint(0, len(buildings)-1)]
|
||||||
|
while currbuilding[0] == currbuilding[1]:
|
||||||
|
currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
|
||||||
|
if currbuilding[1] == 'random location':
|
||||||
|
currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
|
||||||
|
while currbuilding[0] == currbuilding[1]:
|
||||||
|
currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
|
||||||
|
fromBuild = predict_building(currbuilding[0])
|
||||||
|
toBuild = predict_building(currbuilding[1])
|
||||||
|
res = "You chose navigating to " + toBuild[0]['buildingIntents'] + " building from " + fromBuild[0]['buildingIntents'] + " building. Is this correct?"
|
||||||
|
confirmation = 1
|
||||||
|
elif ints[0]['intent'] == "exit":
|
||||||
|
res = getResponse(ints, intents)
|
||||||
|
startNav = 0
|
||||||
|
emergencyExit = 1
|
||||||
|
elif startNav == 1:
|
||||||
|
emergencyExit = 0
|
||||||
|
res = "Please wait while the navigation is processing"
|
||||||
|
else:
|
||||||
|
emergencyExit = 0
|
||||||
|
res = getResponse(ints, intents)
|
||||||
|
ChatBox.insert(END, "Belatrix: " + res + '\n\n')
|
||||||
|
|
||||||
|
ChatBox.config(state=DISABLED)
|
||||||
|
ChatBox.yview(END)
|
||||||
|
|
||||||
|
|
||||||
|
root = Tk()
|
||||||
|
root.title("Chatbot")
|
||||||
|
root.geometry("400x500")
|
||||||
|
root.resizable(width=FALSE, height=FALSE)
|
||||||
|
|
||||||
|
#import nlp dictionary
|
||||||
|
nlp = spacy.load("en_core_web_sm")
|
||||||
|
nltk.download('punkt')
|
||||||
|
nltk.download('wordnet')
|
||||||
|
|
||||||
|
#Create Chat window
|
||||||
|
ChatBox = Text(root, bd=0, bg="white", height="8", width="50", font="Arial",)
|
||||||
|
|
||||||
|
ChatBox.config(state=DISABLED)
|
||||||
|
|
||||||
|
#Bind scrollbar to Chat window
|
||||||
|
scrollbar = Scrollbar(root, command=ChatBox.yview, cursor="heart")
|
||||||
|
ChatBox['yscrollcommand'] = scrollbar.set
|
||||||
|
|
||||||
|
#Create Button to send message
|
||||||
|
SendButton = Button(root, font=("Verdana",12,'bold'), text="Send", width="12", height=5,
|
||||||
|
bd=0, bg="#f9a602", activebackground="#3c9d9b",fg='#000000',
|
||||||
|
command= send )
|
||||||
|
|
||||||
|
#Create the box to enter message
|
||||||
|
EntryBox = Text(root, bd=0, bg="white",width="29", height="5", font="Arial")
|
||||||
|
#EntryBox.bind("<Return>", send)
|
||||||
|
|
||||||
|
|
||||||
|
#Place all components on the screen
|
||||||
|
scrollbar.place(x=376,y=6, height=386)
|
||||||
|
ChatBox.place(x=6,y=6, height=386, width=370)
|
||||||
|
EntryBox.place(x=128, y=401, height=90, width=265)
|
||||||
|
SendButton.place(x=6, y=401, height=90)
|
||||||
|
|
||||||
|
root.mainloop()
|
28
src/semantic/intents/buildingIntents.json
Normal file
28
src/semantic/intents/buildingIntents.json
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
{"intents": [
|
||||||
|
{"tag": "Bob and Betty Beyster",
|
||||||
|
"patterns": ["BBB", "CSE", "CS","Computer Science", "Computer", "Bob", "Bob and Betty Beyster", "Betty", "Computer Science Department", "CS Department"],
|
||||||
|
"responses": ["Bob and Betty Beyster"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "Duderstadt",
|
||||||
|
"patterns": ["Dude", "the Dude", "Duderstadt", "Mujos", "Library", "North Campus Library"],
|
||||||
|
"responses": ["Duderstadt"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "FXB",
|
||||||
|
"patterns": ["FXB", "Francois-Xavier Bagnoud", "Aerospace", "aerospace", "Aerospace Engineering", "planes", "Aerospace Department", "Aerospace Engineering Department"],
|
||||||
|
"responses": ["FXB"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "Electrical and Computer Engineering",
|
||||||
|
"patterns": ["Electrical and Computer Engineering","Electrical", "Electrical Engineering", "Computer Engineering", "EECS", "ECE", "Electrical Engineering Department", "EECS Department", "ECE Department"],
|
||||||
|
"responses": ["Electrical and Computer Engineering"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "Pierpont Commons",
|
||||||
|
"patterns": ["Pierpont", "Pierpont Commons", "Commons", "Panda Express"],
|
||||||
|
"responses": ["Pierpont Commons"],
|
||||||
|
"context": [""]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
48
src/semantic/intents/intents.json
Normal file
48
src/semantic/intents/intents.json
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
{"intents": [
|
||||||
|
{"tag": "greeting",
|
||||||
|
"patterns": ["Hi there", "How are you", "Is anyone there?","Hey","Hola", "Hello", "Good day"],
|
||||||
|
"responses": ["Hello, thanks for asking", "Good to see you again", "Hi there, how can I help?"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "goodbye",
|
||||||
|
"patterns": ["Bye", "See you later", "Goodbye", "Nice chatting to you, bye", "Till next time"],
|
||||||
|
"responses": ["See you!", "Have a nice day", "Bye! Come back again soon."],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "thanks",
|
||||||
|
"patterns": ["Thanks", "Thank you", "That's helpful", "Awesome, thanks", "Thanks for helping me"],
|
||||||
|
"responses": ["Happy to help!", "Any time!", "My pleasure"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "noanswer",
|
||||||
|
"patterns": [],
|
||||||
|
"responses": ["Sorry, can't understand you", "Please give me more info", "Not sure I understand"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "options",
|
||||||
|
"patterns": ["How you could help me?", "What you can do?", "What help you provide?", "How you can be helpful?", "What support is offered"],
|
||||||
|
"responses": ["I can take you to multiple buildings including BBB, EECS, and more on north campus."],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "navigation",
|
||||||
|
"patterns": ["Can you take me to the ", "Take me to the building", "Map me to the location", "Navigate me to the building from the building"],
|
||||||
|
"responses": ["Starting Navigation"],
|
||||||
|
"context": ["navigation_to_building"]
|
||||||
|
},
|
||||||
|
{"tag": "exit",
|
||||||
|
"patterns": ["stop", "quit", "end", "I want to stop navigation"],
|
||||||
|
"responses": ["Ending current navigation"],
|
||||||
|
"context": ["navigation_to_building"]
|
||||||
|
},
|
||||||
|
{"tag": "yes",
|
||||||
|
"patterns": ["yes", "y", "sure", "right", "correct"],
|
||||||
|
"responses": ["I am sorry. I don't understand"],
|
||||||
|
"context": ["navigation_to_building"]
|
||||||
|
},
|
||||||
|
{"tag": "no",
|
||||||
|
"patterns": ["no", "nope", "n", "wrong", "incorrect"],
|
||||||
|
"responses": ["I am sorry. I don't understand"],
|
||||||
|
"context": ["navigation_to_building"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
BIN
src/semantic/pickles/building_words.pkl
Normal file
BIN
src/semantic/pickles/building_words.pkl
Normal file
Binary file not shown.
14
src/semantic/pickles/building_words.txt
Normal file
14
src/semantic/pickles/building_words.txt
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
Dude
|
||||||
|
Computer Science
|
||||||
|
CSE
|
||||||
|
FXB
|
||||||
|
BBB
|
||||||
|
Aerospace Engineering
|
||||||
|
Electrical Engineering
|
||||||
|
EECS
|
||||||
|
ECE
|
||||||
|
Pierpont
|
||||||
|
Duderstadt
|
||||||
|
Francois-Xavier Bagnoud
|
||||||
|
Bob and Betty Beyster
|
||||||
|
Pierpont Commons
|
BIN
src/semantic/pickles/buildings.pkl
Normal file
BIN
src/semantic/pickles/buildings.pkl
Normal file
Binary file not shown.
5
src/semantic/pickles/buildings.txt
Normal file
5
src/semantic/pickles/buildings.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
Duderstadt
|
||||||
|
Electrical and Computer Engineering
|
||||||
|
FXB
|
||||||
|
Pierpont Commons
|
||||||
|
Bob and Betty Beyster
|
BIN
src/semantic/pickles/classes.pkl
Normal file
BIN
src/semantic/pickles/classes.pkl
Normal file
Binary file not shown.
11
src/semantic/pickles/classes.txt
Normal file
11
src/semantic/pickles/classes.txt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
blood_pressure_search
|
||||||
|
exit
|
||||||
|
goodbye
|
||||||
|
greeting
|
||||||
|
hospital_search
|
||||||
|
navigation
|
||||||
|
options
|
||||||
|
pharmacy_search
|
||||||
|
thanks
|
||||||
|
navigation
|
||||||
|
exit
|
BIN
src/semantic/pickles/pickleManage.pyc
Normal file
BIN
src/semantic/pickles/pickleManage.pyc
Normal file
Binary file not shown.
BIN
src/semantic/pickles/words.pkl
Normal file
BIN
src/semantic/pickles/words.pkl
Normal file
Binary file not shown.
92
src/semantic/pickles/words.txt
Normal file
92
src/semantic/pickles/words.txt
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
's
|
||||||
|
a
|
||||||
|
adverse
|
||||||
|
all
|
||||||
|
anyone
|
||||||
|
are
|
||||||
|
awesome
|
||||||
|
be
|
||||||
|
behavior
|
||||||
|
blood
|
||||||
|
by
|
||||||
|
bye
|
||||||
|
can
|
||||||
|
causing
|
||||||
|
chatting
|
||||||
|
check
|
||||||
|
could
|
||||||
|
data
|
||||||
|
day
|
||||||
|
detail
|
||||||
|
do
|
||||||
|
dont
|
||||||
|
drug
|
||||||
|
entry
|
||||||
|
find
|
||||||
|
for
|
||||||
|
give
|
||||||
|
good
|
||||||
|
goodbye
|
||||||
|
have
|
||||||
|
hello
|
||||||
|
help
|
||||||
|
helpful
|
||||||
|
helping
|
||||||
|
hey
|
||||||
|
hi
|
||||||
|
history
|
||||||
|
hola
|
||||||
|
hospital
|
||||||
|
how
|
||||||
|
i
|
||||||
|
id
|
||||||
|
is
|
||||||
|
later
|
||||||
|
list
|
||||||
|
load
|
||||||
|
locate
|
||||||
|
log
|
||||||
|
looking
|
||||||
|
lookup
|
||||||
|
management
|
||||||
|
me
|
||||||
|
module
|
||||||
|
nearby
|
||||||
|
next
|
||||||
|
nice
|
||||||
|
of
|
||||||
|
offered
|
||||||
|
open
|
||||||
|
patient
|
||||||
|
pharmacy
|
||||||
|
pressure
|
||||||
|
provide
|
||||||
|
reaction
|
||||||
|
related
|
||||||
|
result
|
||||||
|
search
|
||||||
|
searching
|
||||||
|
see
|
||||||
|
show
|
||||||
|
suitable
|
||||||
|
support
|
||||||
|
task
|
||||||
|
thank
|
||||||
|
thanks
|
||||||
|
that
|
||||||
|
there
|
||||||
|
till
|
||||||
|
time
|
||||||
|
to
|
||||||
|
transfer
|
||||||
|
up
|
||||||
|
want
|
||||||
|
what
|
||||||
|
which
|
||||||
|
with
|
||||||
|
you
|
||||||
|
navigation
|
||||||
|
map
|
||||||
|
locate
|
||||||
|
navigate
|
||||||
|
building
|
96
src/semantic/train_buildings.py
Normal file
96
src/semantic/train_buildings.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
import numpy as np
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, Activation, Dropout
|
||||||
|
from keras.optimizers import SGD
|
||||||
|
import random
|
||||||
|
|
||||||
|
import nltk
|
||||||
|
from nltk.stem import WordNetLemmatizer
|
||||||
|
lemmatizer = WordNetLemmatizer()
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
building_words=[]
|
||||||
|
buildings = []
|
||||||
|
documents = []
|
||||||
|
ignore_letters = ['!', '?', ',', '.']
|
||||||
|
buildingIntents_file = open('intents/buildingIntents.json').read()
|
||||||
|
buildingIntents = json.loads(buildingIntents_file)
|
||||||
|
|
||||||
|
# download nltk resources
|
||||||
|
nltk.download('punkt')
|
||||||
|
nltk.download('wordnet')
|
||||||
|
|
||||||
|
for intent in buildingIntents['intents']:
|
||||||
|
for pattern in intent['patterns']:
|
||||||
|
#tokenize each word
|
||||||
|
word = nltk.word_tokenize(pattern)
|
||||||
|
building_words.extend(word)
|
||||||
|
#add documents in the corpus
|
||||||
|
documents.append((word, intent['tag']))
|
||||||
|
# add to our buildings list
|
||||||
|
if intent['tag'] not in buildings:
|
||||||
|
buildings.append(intent['tag'])
|
||||||
|
print(documents)
|
||||||
|
# lemmaztize and lower each word and remove duplicates
|
||||||
|
building_words = [lemmatizer.lemmatize(w.lower()) for w in building_words if w not in ignore_letters]
|
||||||
|
building_words = sorted(list(set(building_words)))
|
||||||
|
# sort buildings
|
||||||
|
buildings = sorted(list(set(buildings)))
|
||||||
|
# documents = combination between patterns and buildingIntents
|
||||||
|
print (len(documents), "documents")
|
||||||
|
# buildings = buildingIntents
|
||||||
|
print (len(buildings), "buildings", buildings)
|
||||||
|
# building_words = all building_words, vocabulary
|
||||||
|
print (len(building_words), "unique lemmatized building_words", building_words)
|
||||||
|
|
||||||
|
pickle.dump(building_words,open('pickles/building_words.pkl','wb'))
|
||||||
|
pickle.dump(buildings,open('pickles/buildings.pkl','wb'))
|
||||||
|
|
||||||
|
# create our training data
|
||||||
|
training = []
|
||||||
|
# create an empty array for our output
|
||||||
|
output_empty = [0] * len(buildings)
|
||||||
|
# training set, bag of building_words for each sentence
|
||||||
|
for doc in documents:
|
||||||
|
# initialize our bag of building_words
|
||||||
|
bag = []
|
||||||
|
# list of tokenized building_words for the pattern
|
||||||
|
pattern_building_words = doc[0]
|
||||||
|
# lemmatize each word - create base word, in attempt to represent related building_words
|
||||||
|
pattern_building_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_building_words]
|
||||||
|
# create our bag of building_words array with 1, if word match found in current pattern
|
||||||
|
for word in building_words:
|
||||||
|
bag.append(1) if word in pattern_building_words else bag.append(0)
|
||||||
|
|
||||||
|
# output is a '0' for each tag and '1' for current tag (for each pattern)
|
||||||
|
output_row = list(output_empty)
|
||||||
|
output_row[buildings.index(doc[1])] = 1
|
||||||
|
|
||||||
|
training.append([bag, output_row])
|
||||||
|
# shuffle our features and turn into np.array
|
||||||
|
random.shuffle(training)
|
||||||
|
training = np.array(training)
|
||||||
|
# create train and test lists. X - patterns, Y - buildingIntents
|
||||||
|
train_x = list(training[:,0])
|
||||||
|
train_y = list(training[:,1])
|
||||||
|
print("Buildings Training data created")
|
||||||
|
|
||||||
|
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
|
||||||
|
# equal to number of buildingIntents to predict output intent with softmax
|
||||||
|
model = Sequential()
|
||||||
|
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(64, activation='relu'))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(len(train_y[0]), activation='softmax'))
|
||||||
|
|
||||||
|
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
|
||||||
|
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||||
|
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
|
||||||
|
|
||||||
|
#fitting and saving the model
|
||||||
|
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
|
||||||
|
model.save('buildings_model.h5', hist)
|
||||||
|
|
||||||
|
print("building model created")
|
96
src/semantic/train_chatbot.py
Normal file
96
src/semantic/train_chatbot.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
import numpy as np
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, Activation, Dropout
|
||||||
|
from keras.optimizers import SGD
|
||||||
|
import random
|
||||||
|
|
||||||
|
import nltk
|
||||||
|
from nltk.stem import WordNetLemmatizer
|
||||||
|
lemmatizer = WordNetLemmatizer()
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
words=[]
|
||||||
|
classes = []
|
||||||
|
documents = []
|
||||||
|
ignore_letters = ['!', '?', ',', '.']
|
||||||
|
intents_file = open('intents/intents.json').read()
|
||||||
|
intents = json.loads(intents_file)
|
||||||
|
|
||||||
|
# download nltk resources
|
||||||
|
nltk.download('punkt')
|
||||||
|
nltk.download('wordnet')
|
||||||
|
|
||||||
|
for intent in intents['intents']:
|
||||||
|
for pattern in intent['patterns']:
|
||||||
|
#tokenize each word
|
||||||
|
word = nltk.word_tokenize(pattern)
|
||||||
|
words.extend(word)
|
||||||
|
#add documents in the corpus
|
||||||
|
documents.append((word, intent['tag']))
|
||||||
|
# add to our classes list
|
||||||
|
if intent['tag'] not in classes:
|
||||||
|
classes.append(intent['tag'])
|
||||||
|
print(documents)
|
||||||
|
# lemmaztize and lower each word and remove duplicates
|
||||||
|
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_letters]
|
||||||
|
words = sorted(list(set(words)))
|
||||||
|
# sort classes
|
||||||
|
classes = sorted(list(set(classes)))
|
||||||
|
# documents = combination between patterns and intents
|
||||||
|
print (len(documents), "documents")
|
||||||
|
# classes = intents
|
||||||
|
print (len(classes), "classes", classes)
|
||||||
|
# words = all words, vocabulary
|
||||||
|
print (len(words), "unique lemmatized words", words)
|
||||||
|
|
||||||
|
pickle.dump(words,open('pickles/words.pkl','wb'))
|
||||||
|
pickle.dump(classes,open('pickles/classes.pkl','wb'))
|
||||||
|
|
||||||
|
# create our training data
|
||||||
|
training = []
|
||||||
|
# create an empty array for our output
|
||||||
|
output_empty = [0] * len(classes)
|
||||||
|
# training set, bag of words for each sentence
|
||||||
|
for doc in documents:
|
||||||
|
# initialize our bag of words
|
||||||
|
bag = []
|
||||||
|
# list of tokenized words for the pattern
|
||||||
|
pattern_words = doc[0]
|
||||||
|
# lemmatize each word - create base word, in attempt to represent related words
|
||||||
|
pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
|
||||||
|
# create our bag of words array with 1, if word match found in current pattern
|
||||||
|
for word in words:
|
||||||
|
bag.append(1) if word in pattern_words else bag.append(0)
|
||||||
|
|
||||||
|
# output is a '0' for each tag and '1' for current tag (for each pattern)
|
||||||
|
output_row = list(output_empty)
|
||||||
|
output_row[classes.index(doc[1])] = 1
|
||||||
|
|
||||||
|
training.append([bag, output_row])
|
||||||
|
# shuffle our features and turn into np.array
|
||||||
|
random.shuffle(training)
|
||||||
|
training = np.array(training)
|
||||||
|
# create train and test lists. X - patterns, Y - intents
|
||||||
|
train_x = list(training[:,0])
|
||||||
|
train_y = list(training[:,1])
|
||||||
|
print("Training data created")
|
||||||
|
|
||||||
|
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
|
||||||
|
# equal to number of intents to predict output intent with softmax
|
||||||
|
model = Sequential()
|
||||||
|
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(64, activation='relu'))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(len(train_y[0]), activation='softmax'))
|
||||||
|
|
||||||
|
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
|
||||||
|
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||||
|
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
|
||||||
|
|
||||||
|
#fitting and saving the model
|
||||||
|
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
|
||||||
|
model.save('chatbot_model.h5', hist)
|
||||||
|
|
||||||
|
print("model created")
|
Reference in New Issue
Block a user