mirror of
https://github.com/Mobile-Robotics-W20-Team-9/UMICH-NCLT-SLAP.git
synced 2025-09-09 04:13:14 +00:00
added ability to detect buildings from one location to another from chatbot including BBB, EECS, Pierpont, the Dude, and FXB
This commit is contained in:
28
src/semantic/buildingIntents.json
Normal file
28
src/semantic/buildingIntents.json
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
{"intents": [
|
||||||
|
{"tag": "Bob and Betty Beyster",
|
||||||
|
"patterns": ["BBB", "CSE", "CS","Computer Science", "Bob", "Bob and Betty Beyster", "Betty"],
|
||||||
|
"responses": ["Bob and Betty Beyster"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "Duderstadt",
|
||||||
|
"patterns": ["Dude", "the Dude", "Duderstadt", "Mujos", "Library", "North Campus Library"],
|
||||||
|
"responses": ["Duderstadt"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "FXB",
|
||||||
|
"patterns": ["FXB", "Francois-Xavier Bagnoud", "Aerospace", "Aerospace Engineering", "planes"],
|
||||||
|
"responses": ["FXB"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "Electrical and Computer Engineering",
|
||||||
|
"patterns": ["Electrical and Computer Engineering","Electrical", "Electrical Engineering", "Computer Engineering", "Computer", "EECS", "ECE"],
|
||||||
|
"responses": ["Electrical and Computer Engineering"],
|
||||||
|
"context": [""]
|
||||||
|
},
|
||||||
|
{"tag": "Pierpont Commons",
|
||||||
|
"patterns": ["Pierpont", "Pierpont Commons", "Commons", "Panda Express"],
|
||||||
|
"responses": ["Pierpont Commons"],
|
||||||
|
"context": [""]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
BIN
src/semantic/building_words.pkl
Normal file
BIN
src/semantic/building_words.pkl
Normal file
Binary file not shown.
14
src/semantic/building_words.txt
Normal file
14
src/semantic/building_words.txt
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
Dude
|
||||||
|
Computer Science
|
||||||
|
CSE
|
||||||
|
FXB
|
||||||
|
BBB
|
||||||
|
Aerospace Engineering
|
||||||
|
Electrical Engineering
|
||||||
|
EECS
|
||||||
|
ECE
|
||||||
|
Pierpont
|
||||||
|
Duderstadt
|
||||||
|
Francois-Xavier Bagnoud
|
||||||
|
Bob and Betty Beyster
|
||||||
|
Pierpont Commons
|
BIN
src/semantic/buildings.pkl
Normal file
BIN
src/semantic/buildings.pkl
Normal file
Binary file not shown.
5
src/semantic/buildings.txt
Normal file
5
src/semantic/buildings.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
Duderstadt
|
||||||
|
Electrical and Computer Engineering
|
||||||
|
FXB
|
||||||
|
Pierpont Commons
|
||||||
|
Bob and Betty Beyster
|
BIN
src/semantic/buildings_model.h5
Normal file
BIN
src/semantic/buildings_model.h5
Normal file
Binary file not shown.
@@ -9,11 +9,15 @@ import spacy
|
|||||||
|
|
||||||
from keras.models import load_model
|
from keras.models import load_model
|
||||||
model = load_model('chatbot_model.h5')
|
model = load_model('chatbot_model.h5')
|
||||||
|
modelBuilding = load_model('buildings_model.h5')
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
intents = json.loads(open('intents.json').read())
|
intents = json.loads(open('intents.json').read())
|
||||||
words = pickle.load(open('words.pkl','rb'))
|
words = pickle.load(open('words.pkl','rb'))
|
||||||
classes = pickle.load(open('classes.pkl','rb'))
|
classes = pickle.load(open('classes.pkl','rb'))
|
||||||
|
buildingsIntents = json.loads(open('buildingIntents.json').read())
|
||||||
|
building_words = pickle.load(open('building_words.pkl','rb'))
|
||||||
|
buildings = pickle.load(open('buildings.pkl','rb'))
|
||||||
|
|
||||||
def clean_up_sentence(sentence):
|
def clean_up_sentence(sentence):
|
||||||
# tokenize the pattern - splitting words into array
|
# tokenize the pattern - splitting words into array
|
||||||
@@ -24,13 +28,13 @@ def clean_up_sentence(sentence):
|
|||||||
|
|
||||||
|
|
||||||
# return bag of words array: 0 or 1 for words that exist in sentence
|
# return bag of words array: 0 or 1 for words that exist in sentence
|
||||||
def bag_of_words(sentence, words, show_details=True):
|
def bag_of_words(sentence, wording, show_details=True):
|
||||||
# tokenizing patterns
|
# tokenizing patterns
|
||||||
sentence_words = clean_up_sentence(sentence)
|
sentence_words = clean_up_sentence(sentence)
|
||||||
# bag of words - vocabulary matrix
|
# bag of words - vocabulary matrix
|
||||||
bag = [0]*len(words)
|
bag = [0]*len(wording)
|
||||||
for s in sentence_words:
|
for s in sentence_words:
|
||||||
for i,word in enumerate(words):
|
for i,word in enumerate(wording):
|
||||||
if word == s:
|
if word == s:
|
||||||
# assign 1 if current word is in the vocabulary position
|
# assign 1 if current word is in the vocabulary position
|
||||||
bag[i] = 1
|
bag[i] = 1
|
||||||
@@ -51,18 +55,29 @@ def predict_class(sentence):
|
|||||||
return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
|
return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
|
||||||
return return_list
|
return return_list
|
||||||
|
|
||||||
|
def predict_building(currbuilding):
|
||||||
|
# filter below threshold predictions
|
||||||
|
p = bag_of_words(currbuilding, building_words,show_details=False)
|
||||||
|
res = modelBuilding.predict(np.array([p]))[0]
|
||||||
|
ERROR_THRESHOLD = 0.5
|
||||||
|
results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
|
||||||
|
# sorting strength probability
|
||||||
|
results.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
return_list = []
|
||||||
|
for r in results:
|
||||||
|
return_list.append({"buildingIntents": buildings[r[0]], "probability": str(r[1])})
|
||||||
|
return return_list
|
||||||
|
|
||||||
def getResponse(ints, intents_json):
|
def getResponse(ints, intents_json):
|
||||||
tag = ints[0]['intent']
|
tag = ints[0]['intent']
|
||||||
list_of_intents = intents_json['intents']
|
list_of_intents = intents_json['intents']
|
||||||
print("ints")
|
|
||||||
print(ints)
|
|
||||||
for i in list_of_intents:
|
for i in list_of_intents:
|
||||||
if(i['tag']== tag):
|
if(i['tag']== tag):
|
||||||
result = random.choice(i['responses'])
|
result = random.choice(i['responses'])
|
||||||
break
|
break
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getInfo(sentence):
|
def getBuildingInfo(sentence):
|
||||||
doc = nlp(sentence)
|
doc = nlp(sentence)
|
||||||
start = 0
|
start = 0
|
||||||
end = 0
|
end = 0
|
||||||
@@ -101,9 +116,18 @@ def send():
|
|||||||
|
|
||||||
ints = predict_class(msg)
|
ints = predict_class(msg)
|
||||||
if ints[0]['intent'] == "navigation":
|
if ints[0]['intent'] == "navigation":
|
||||||
building = getInfo(msgClean)
|
currbuilding = getBuildingInfo(msgClean)
|
||||||
#TODO: Check if buildings are available
|
if currbuilding[0] == 'random location':
|
||||||
res = "Now navigating to " + building[1] + " from " + building[0]
|
currbuilding[0] = buildings[random.randint(0, len(buildings)-1)]
|
||||||
|
while currbuilding[0] == currbuilding[1]:
|
||||||
|
currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
|
||||||
|
if currbuilding[1] == 'random location':
|
||||||
|
currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
|
||||||
|
while currbuilding[0] == currbuilding[1]:
|
||||||
|
currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
|
||||||
|
fromBuild = predict_building(currbuilding[0])
|
||||||
|
toBuild = predict_building(currbuilding[1])
|
||||||
|
res = "Now navigating to " + toBuild[0]['buildingIntents'] + " from " + fromBuild[0]['buildingIntents']
|
||||||
#TODO: START CONVERSION TO GPS COORDINATES
|
#TODO: START CONVERSION TO GPS COORDINATES
|
||||||
elif ints[0]['intent'] == "exit":
|
elif ints[0]['intent'] == "exit":
|
||||||
res = getResponse(ints, intents)
|
res = getResponse(ints, intents)
|
||||||
|
96
src/semantic/train_buildings.py
Normal file
96
src/semantic/train_buildings.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
import numpy as np
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, Activation, Dropout
|
||||||
|
from keras.optimizers import SGD
|
||||||
|
import random
|
||||||
|
|
||||||
|
import nltk
|
||||||
|
from nltk.stem import WordNetLemmatizer
|
||||||
|
lemmatizer = WordNetLemmatizer()
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
building_words=[]
|
||||||
|
buildings = []
|
||||||
|
documents = []
|
||||||
|
ignore_letters = ['!', '?', ',', '.']
|
||||||
|
buildingIntents_file = open('buildingIntents.json').read()
|
||||||
|
buildingIntents = json.loads(buildingIntents_file)
|
||||||
|
|
||||||
|
# download nltk resources
|
||||||
|
nltk.download('punkt')
|
||||||
|
nltk.download('wordnet')
|
||||||
|
|
||||||
|
for intent in buildingIntents['intents']:
|
||||||
|
for pattern in intent['patterns']:
|
||||||
|
#tokenize each word
|
||||||
|
word = nltk.word_tokenize(pattern)
|
||||||
|
building_words.extend(word)
|
||||||
|
#add documents in the corpus
|
||||||
|
documents.append((word, intent['tag']))
|
||||||
|
# add to our buildings list
|
||||||
|
if intent['tag'] not in buildings:
|
||||||
|
buildings.append(intent['tag'])
|
||||||
|
print(documents)
|
||||||
|
# lemmaztize and lower each word and remove duplicates
|
||||||
|
building_words = [lemmatizer.lemmatize(w.lower()) for w in building_words if w not in ignore_letters]
|
||||||
|
building_words = sorted(list(set(building_words)))
|
||||||
|
# sort buildings
|
||||||
|
buildings = sorted(list(set(buildings)))
|
||||||
|
# documents = combination between patterns and buildingIntents
|
||||||
|
print (len(documents), "documents")
|
||||||
|
# buildings = buildingIntents
|
||||||
|
print (len(buildings), "buildings", buildings)
|
||||||
|
# building_words = all building_words, vocabulary
|
||||||
|
print (len(building_words), "unique lemmatized building_words", building_words)
|
||||||
|
|
||||||
|
pickle.dump(building_words,open('building_words.pkl','wb'))
|
||||||
|
pickle.dump(buildings,open('buildings.pkl','wb'))
|
||||||
|
|
||||||
|
# create our training data
|
||||||
|
training = []
|
||||||
|
# create an empty array for our output
|
||||||
|
output_empty = [0] * len(buildings)
|
||||||
|
# training set, bag of building_words for each sentence
|
||||||
|
for doc in documents:
|
||||||
|
# initialize our bag of building_words
|
||||||
|
bag = []
|
||||||
|
# list of tokenized building_words for the pattern
|
||||||
|
pattern_building_words = doc[0]
|
||||||
|
# lemmatize each word - create base word, in attempt to represent related building_words
|
||||||
|
pattern_building_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_building_words]
|
||||||
|
# create our bag of building_words array with 1, if word match found in current pattern
|
||||||
|
for word in building_words:
|
||||||
|
bag.append(1) if word in pattern_building_words else bag.append(0)
|
||||||
|
|
||||||
|
# output is a '0' for each tag and '1' for current tag (for each pattern)
|
||||||
|
output_row = list(output_empty)
|
||||||
|
output_row[buildings.index(doc[1])] = 1
|
||||||
|
|
||||||
|
training.append([bag, output_row])
|
||||||
|
# shuffle our features and turn into np.array
|
||||||
|
random.shuffle(training)
|
||||||
|
training = np.array(training)
|
||||||
|
# create train and test lists. X - patterns, Y - buildingIntents
|
||||||
|
train_x = list(training[:,0])
|
||||||
|
train_y = list(training[:,1])
|
||||||
|
print("Buildings Training data created")
|
||||||
|
|
||||||
|
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
|
||||||
|
# equal to number of buildingIntents to predict output intent with softmax
|
||||||
|
model = Sequential()
|
||||||
|
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(64, activation='relu'))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(len(train_y[0]), activation='softmax'))
|
||||||
|
|
||||||
|
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
|
||||||
|
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||||
|
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
|
||||||
|
|
||||||
|
#fitting and saving the model
|
||||||
|
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
|
||||||
|
model.save('buildings_model.h5', hist)
|
||||||
|
|
||||||
|
print("building model created")
|
Reference in New Issue
Block a user