added ability to detect buildings from one location to another from chatbot including BBB, EECS, Pierpont, the Dude, and FXB

2025-09-08 20:13:13 +00:00 · 2020-04-19 23:32:10 -04:00
parent a030c250c9
commit 2995b6ebdc
8 changed files with 176 additions and 9 deletions
--- a/src/semantic/buildingIntents.json
+++ b/src/semantic/buildingIntents.json
@@ -0,0 +1,28 @@
+{"intents": [
+    {"tag": "Bob and Betty Beyster",
+     "patterns": ["BBB", "CSE", "CS","Computer Science", "Bob", "Bob and Betty Beyster", "Betty"],
+     "responses": ["Bob and Betty Beyster"],
+     "context": [""]
+    },
+    {"tag": "Duderstadt",
+     "patterns": ["Dude", "the Dude", "Duderstadt", "Mujos", "Library", "North Campus Library"],
+     "responses": ["Duderstadt"],
+     "context": [""]
+    },
+    {"tag": "FXB",
+     "patterns": ["FXB", "Francois-Xavier Bagnoud", "Aerospace", "Aerospace Engineering", "planes"],
+     "responses": ["FXB"],
+     "context": [""]
+    },
+    {"tag": "Electrical and Computer Engineering",
+     "patterns": ["Electrical and Computer Engineering","Electrical", "Electrical Engineering", "Computer Engineering", "Computer", "EECS", "ECE"],
+     "responses": ["Electrical and Computer Engineering"],
+     "context": [""]
+    },
+    {"tag": "Pierpont Commons",
+     "patterns": ["Pierpont", "Pierpont Commons", "Commons", "Panda Express"],
+     "responses": ["Pierpont Commons"],
+     "context": [""]
+    }
+]
+}
--- a/src/semantic/building_words.pkl
+++ b/src/semantic/building_words.pkl
--- a/src/semantic/building_words.txt
+++ b/src/semantic/building_words.txt
@@ -0,0 +1,14 @@
+Dude
+Computer Science
+CSE
+FXB
+BBB
+Aerospace Engineering
+Electrical Engineering
+EECS
+ECE
+Pierpont
+Duderstadt
+Francois-Xavier Bagnoud
+Bob and Betty Beyster
+Pierpont Commons
--- a/src/semantic/buildings.pkl
+++ b/src/semantic/buildings.pkl
--- a/src/semantic/buildings.txt
+++ b/src/semantic/buildings.txt
@@ -0,0 +1,5 @@
+Duderstadt
+Electrical and Computer Engineering
+FXB
+Pierpont Commons
+Bob and Betty Beyster
--- a/src/semantic/buildings_model.h5
+++ b/src/semantic/buildings_model.h5
--- a/src/semantic/gui_chatbot.py
+++ b/src/semantic/gui_chatbot.py
@@ -9,11 +9,15 @@ import spacy

 from keras.models import load_model
 model = load_model('chatbot_model.h5')
+modelBuilding = load_model('buildings_model.h5')
 import json
 import random
 intents = json.loads(open('intents.json').read())
 words = pickle.load(open('words.pkl','rb'))
 classes = pickle.load(open('classes.pkl','rb'))
+buildingsIntents = json.loads(open('buildingIntents.json').read())
+building_words = pickle.load(open('building_words.pkl','rb'))
+buildings = pickle.load(open('buildings.pkl','rb'))

 def clean_up_sentence(sentence):
    # tokenize the pattern - splitting words into array
@@ -24,13 +28,13 @@ def clean_up_sentence(sentence):


 # return bag of words array: 0 or 1 for words that exist in sentence
-def bag_of_words(sentence, words, show_details=True):
+def bag_of_words(sentence, wording, show_details=True):
    # tokenizing patterns
    sentence_words = clean_up_sentence(sentence)
    # bag of words - vocabulary matrix
-    bag = [0]*len(words)
+    bag = [0]*len(wording)
    for s in sentence_words:
-        for i,word in enumerate(words):
+        for i,word in enumerate(wording):
            if word == s:
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
@@ -51,18 +55,29 @@ def predict_class(sentence):
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

+def predict_building(currbuilding):
+    # filter below  threshold predictions
+    p = bag_of_words(currbuilding, building_words,show_details=False)
+    res = modelBuilding.predict(np.array([p]))[0]
+    ERROR_THRESHOLD = 0.5
+    results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
+    # sorting strength probability
+    results.sort(key=lambda x: x[1], reverse=True)
+    return_list = []
+    for r in results:
+        return_list.append({"buildingIntents": buildings[r[0]], "probability": str(r[1])})
+    return return_list
+
 def getResponse(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
-    print("ints")
-    print(ints)
    for i in list_of_intents:
        if(i['tag']== tag):
            result = random.choice(i['responses'])
            break
    return result

-def getInfo(sentence):
+def getBuildingInfo(sentence):
    doc = nlp(sentence)
    start = 0
    end = 0
@@ -101,9 +116,18 @@ def send():

        ints = predict_class(msg)
        if ints[0]['intent'] == "navigation":
-            building = getInfo(msgClean)
-            #TODO: Check if buildings are available
-            res = "Now navigating to " + building[1] + " from " + building[0]
+            currbuilding = getBuildingInfo(msgClean)
+            if currbuilding[0] == 'random location':
+                currbuilding[0] = buildings[random.randint(0, len(buildings)-1)] 
+                while currbuilding[0] == currbuilding[1]:
+                    currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
+            if currbuilding[1] == 'random location':
+                currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
+                while currbuilding[0] == currbuilding[1]:
+                    currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
+            fromBuild = predict_building(currbuilding[0])
+            toBuild = predict_building(currbuilding[1])
+            res = "Now navigating to " + toBuild[0]['buildingIntents'] + " from " + fromBuild[0]['buildingIntents']
            #TODO: START CONVERSION TO GPS COORDINATES
        elif ints[0]['intent'] == "exit":
            res = getResponse(ints, intents)
--- a/src/semantic/train_buildings.py
+++ b/src/semantic/train_buildings.py
@@ -0,0 +1,96 @@
+import numpy as np
+from keras.models import Sequential
+from keras.layers import Dense, Activation, Dropout
+from keras.optimizers import SGD
+import random
+
+import nltk
+from nltk.stem import WordNetLemmatizer
+lemmatizer = WordNetLemmatizer()
+import json
+import pickle
+
+building_words=[]
+buildings = []
+documents = []
+ignore_letters = ['!', '?', ',', '.']
+buildingIntents_file = open('buildingIntents.json').read()
+buildingIntents = json.loads(buildingIntents_file)
+
+# download nltk resources
+nltk.download('punkt')
+nltk.download('wordnet')
+
+for intent in buildingIntents['intents']:
+    for pattern in intent['patterns']:
+        #tokenize each word
+        word = nltk.word_tokenize(pattern)
+        building_words.extend(word)
+        #add documents in the corpus
+        documents.append((word, intent['tag']))
+        # add to our buildings list
+        if intent['tag'] not in buildings:
+            buildings.append(intent['tag'])
+print(documents)
+# lemmaztize and lower each word and remove duplicates
+building_words = [lemmatizer.lemmatize(w.lower()) for w in building_words if w not in ignore_letters]
+building_words = sorted(list(set(building_words)))
+# sort buildings
+buildings = sorted(list(set(buildings)))
+# documents = combination between patterns and buildingIntents
+print (len(documents), "documents")
+# buildings = buildingIntents
+print (len(buildings), "buildings", buildings)
+# building_words = all building_words, vocabulary
+print (len(building_words), "unique lemmatized building_words", building_words)
+
+pickle.dump(building_words,open('building_words.pkl','wb'))
+pickle.dump(buildings,open('buildings.pkl','wb'))
+
+# create our training data
+training = []
+# create an empty array for our output
+output_empty = [0] * len(buildings)
+# training set, bag of building_words for each sentence
+for doc in documents:
+    # initialize our bag of building_words
+    bag = []
+    # list of tokenized building_words for the pattern
+    pattern_building_words = doc[0]
+    # lemmatize each word - create base word, in attempt to represent related building_words
+    pattern_building_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_building_words]
+    # create our bag of building_words array with 1, if word match found in current pattern
+    for word in building_words:
+        bag.append(1) if word in pattern_building_words else bag.append(0)
+        
+    # output is a '0' for each tag and '1' for current tag (for each pattern)
+    output_row = list(output_empty)
+    output_row[buildings.index(doc[1])] = 1
+    
+    training.append([bag, output_row])
+# shuffle our features and turn into np.array
+random.shuffle(training)
+training = np.array(training)
+# create train and test lists. X - patterns, Y - buildingIntents
+train_x = list(training[:,0])
+train_y = list(training[:,1])
+print("Buildings Training data created")
+
+# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
+# equal to number of buildingIntents to predict output intent with softmax
+model = Sequential()
+model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(64, activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(len(train_y[0]), activation='softmax'))
+
+# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
+sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
+model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
+
+#fitting and saving the model 
+hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
+model.save('buildings_model.h5', hist)
+
+print("building model created")