Merge pull request #14 from Mobile-Robotics-W20-Team-9/semantic

Semantic
2025-09-08 04:03:14 +00:00 · 2020-04-30 17:20:26 -04:00
parent edff80c147 2725e0a1e0
commit bcf4c9abbe
23 changed files with 655 additions and 3 deletions
--- a/docs/home.md
+++ b/docs/home.md
@@ -36,6 +36,14 @@
 - [Scipy](https://www.scipy.org/)
 - [Matplotlib](https://matplotlib.org/)
 - [Natural Language Toolkit](https://www.nltk.org/)
+- [Cpython](https://pypi.org/project/cPython/)
+- [NLTK](https://pypi.org/project/nltk/)
+- [Setup Tools](https://pypi.org/project/setuptools/)
+- [Pylint](https://pypi.org/project/pylint/)
+- [Spacy](https://pypi.org/project/spacy/)
+- [Pickle](https://pypi.org/project/pickle-mixin/)
+- [TensorFlow](https://pypi.org/project/tensorflow/)
+- [Keras](https://pypi.org/project/Keras/)

 ## Docker

@@ -49,3 +57,21 @@ After cloning the repo, start your docker machine and following commands shown b

 1. `cd /PATH/TO/UMICH_NCLT_SLAP/src`
 2. `docker-compose run --rm python-dev`
+
+### Semantic Language Parsing: Chatbot
+
+For standalone testing of the chatbot, run the following commands
+
+1. `cd /PATH/TO/UMICH_NCLT_SLAP/semantic/src`
+2. `docker-compose run --rm python-dev`
+1. `cd app/semantic`
+2. `python gui_chatbot.py`
+
+You can update the models by changing the intent or pickle files. Intent.json can be changed wiht a basic text editor and pickles can be read and changed using pickleManage.py.
+1. `cd /PATH/TO/UMICH_NCLT_SLAP/src/datset/dataManipulation/pickles`
+2. `python`
+3. `from pickleManage import *`
+4. Use desired functions. Functions are documented with examples in pickleManage.py file.
+To update the models are making changes run:
+'python 
+
--- a/src/dataset/ManageDataset/project_vel_to_cam.py
+++ b/src/dataset/ManageDataset/project_vel_to_cam.py
@@ -120,7 +120,7 @@ def project_vel_to_cam(hits, cam_num):
 def main(args):

    if len(args)<4:
-        print  """Incorrect usage.
+        print("""Incorrect usage.

 To use:

@@ -129,7 +129,7 @@ To use:
      vel:  The velodyne binary file (timestamp.bin)
      img:  The undistorted image (timestamp.tiff)
  cam_num:  The index (0 through 5) of the camera
-"""
+  """)
        return 1


--- a/src/dataset/dataManipulation/pycache/findBuildingCoord.cpython-36.pyc
+++ b/src/dataset/dataManipulation/pycache/findBuildingCoord.cpython-36.pyc
--- a/src/semantic/.env
+++ b/src/semantic/.env
--- a/src/semantic/Dockerfile
+++ b/src/semantic/Dockerfile
@@ -4,7 +4,8 @@ RUN apt-get update && \
    apt-get install -y \
        build-essential \
        python-opencv \
-        libpcl-dev
+        libpcl-dev \
+        x11-apps

 RUN pip install -U pip && \
    pip install -U \
@@ -15,6 +16,13 @@ RUN pip install -U pip && \
        nltk \
        setuptools \
        pylint \
+        pickle-mixin \
+        spacy \
+        --upgrade setuptools \
+        --no-cache-dir tensorflow \
+        keras
+
+RUN python -m spacy download en_core_web_sm
        pickle-mixin

 CMD ["/bin/bash"]
--- a/src/semantic/buildings_model.h5
+++ b/src/semantic/buildings_model.h5
--- a/src/semantic/chatbot_model.h5
+++ b/src/semantic/chatbot_model.h5
--- a/src/semantic/docker-compose.yml
+++ b/src/semantic/docker-compose.yml
--- a/src/semantic/docker-compose_ubuntu
+++ b/src/semantic/docker-compose_ubuntu
@@ -0,0 +1,27 @@
+# Docker Compose
+
+# docker-compose.yml format version
+version: '3'
+
+# Define services
+services:
+    # Python Development Container
+    python-dev:
+        # Use Dockerfile in current folder
+        build: .
+        # Mount ros-dev folder on host to app folder in container
+        volumes:
+            - ./control:/app/control
+            - ./dataset:/app/dataset
+            - ./localization:/app/localization
+            - ./planning:/app/planning
+            - ./semantic:/app/semantic
+            - ./visualization:/app/visualization
+            - /tmp/.X11-unix/:/tmp/.X11-unix
+        # Set DISPLAY variable and network mode for GUIs
+        environment:
+            - DISPLAY=$DISPLAY
+            #- DISPLAY=${IP_ADDRESS}:0.0
+        network_mode: "host"
+        # Set working directory in container to app folder
+        working_dir: /app
--- a/src/semantic/gui_chatbot.py
+++ b/src/semantic/gui_chatbot.py
@@ -0,0 +1,201 @@
+# from python example and tutorial here: https://data-flair.training/blogs/python-chatbot-project/
+# also utilizes examples from spacy website
+
+import nltk
+from nltk.stem import WordNetLemmatizer
+lemmatizer = WordNetLemmatizer()
+import pickle
+import numpy as np
+import spacy
+import tkinter
+from tkinter import *
+
+
+from keras.models import load_model
+model = load_model('chatbot_model.h5')
+modelBuilding = load_model('buildings_model.h5')
+import json
+import random
+intents = json.loads(open('intents/intents.json').read())
+words = pickle.load(open('pickles/words.pkl','rb'))
+classes = pickle.load(open('pickles/classes.pkl','rb'))
+buildingsIntents = json.loads(open('intents/buildingIntents.json').read())
+building_words = pickle.load(open('pickles/building_words.pkl','rb'))
+buildings = pickle.load(open('pickles/buildings.pkl','rb'))
+confirmation = 0
+startNav = 0 #TODO: START CONVERSION TO GPS COORDINATES
+completedNav = 0 #TODO: Add response once complete
+emergencyExit = 0 #TODO: OPTIONAL STOP EVERYTHING
+
+def clean_up_sentence(sentence):
+    # tokenize the pattern - splitting words into array
+    sentence_words = nltk.word_tokenize(sentence)
+    # stemming every word - reducing to base form
+    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
+    return sentence_words
+
+
+# return bag of words array: 0 or 1 for words that exist in sentence
+def bag_of_words(sentence, wording, show_details=True):
+    # tokenizing patterns
+    sentence_words = clean_up_sentence(sentence)
+    # bag of words - vocabulary matrix
+    bag = [0]*len(wording)
+    for s in sentence_words:
+        for i,word in enumerate(wording):
+            if word == s:
+                # assign 1 if current word is in the vocabulary position
+                bag[i] = 1
+                if show_details:
+                    print ("found in bag: %s" % word)
+    return(np.array(bag))
+
+def predict_class(sentence):
+    # filter below  threshold predictions
+    p = bag_of_words(sentence, words,show_details=False)
+    res = model.predict(np.array([p]))[0]
+    ERROR_THRESHOLD = 0.25
+    results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
+    # sorting strength probability
+    results.sort(key=lambda x: x[1], reverse=True)
+    return_list = []
+    for r in results:
+        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
+    return return_list
+
+def predict_building(currbuilding):
+    # filter below  threshold predictions
+    p = bag_of_words(currbuilding, building_words,show_details=False)
+    res = modelBuilding.predict(np.array([p]))[0]
+    ERROR_THRESHOLD = 0.5
+    results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
+    # sorting strength probability
+    results.sort(key=lambda x: x[1], reverse=True)
+    return_list = []
+    for r in results:
+        return_list.append({"buildingIntents": buildings[r[0]], "probability": str(r[1])})
+    return return_list
+
+def getResponse(ints, intents_json):
+    tag = ints[0]['intent']
+    list_of_intents = intents_json['intents']
+    for i in list_of_intents:
+        if(i['tag']== tag):
+            result = random.choice(i['responses'])
+            break
+    return result
+
+def getBuildingInfo(sentence):
+    doc = nlp(sentence)
+    start = 0
+    end = 0
+    startBuilding = "random location"
+    stopBuilding = "random location"
+    for token in doc:
+        if token.pos_ == "PROPN" and start == 1:
+            startBuilding = token.text
+        elif token.pos_ == "PROPN" and end == 1:
+            stopBuilding = token.text
+        elif token.text == "to":
+            start = 0
+            end = 1
+        elif token.text == "from":
+            start = 1
+            end = 0
+        else:
+            pass
+            # print(token.text)
+    return [startBuilding, stopBuilding]
+
+
+#Creating tkinter GUI
+def send():
+    msgClean = EntryBox.get("1.0",'end-1c')
+    msg = msgClean.strip()
+    EntryBox.delete("0.0",END)
+
+    if msg != '':
+        ChatBox.config(state=NORMAL)
+        ChatBox.insert(END, "You: " + msg + '\n\n')
+        ChatBox.config(foreground="#446665", font=("Verdana", 12 ))
+
+        ints = predict_class(msg)
+        global confirmation
+        global startNav
+        global emergencyExit
+        # adds rule based chatbot to confirm navigation
+        if (ints[0]['intent'] == "yes" or ints[0]['intent'] == "no") and confirmation == 1 and startNav == 0:
+            emergencyExit = 0
+            if ints[0]['intent'] == "yes":
+                res = "Starting navigation. Please wait for process to complete. This may take a couple minutes."
+                startNav = 1
+            elif ints[0]['intent'] == "no":
+                res = "Cancelled operation"
+            confirmation = 0
+        elif ints[0]['intent'] == "navigation" and startNav == 0:
+            emergencyExit = 0
+            currbuilding = getBuildingInfo(msgClean)
+            if currbuilding[0] == 'random location':
+                currbuilding[0] = buildings[random.randint(0, len(buildings)-1)] 
+                while currbuilding[0] == currbuilding[1]:
+                    currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
+            if currbuilding[1] == 'random location':
+                currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
+                while currbuilding[0] == currbuilding[1]:
+                    currbuilding[1] = buildings[random.randint(0, len(buildings)-1)]
+            fromBuild = predict_building(currbuilding[0])
+            toBuild = predict_building(currbuilding[1])
+            res = "You chose navigating to " + toBuild[0]['buildingIntents'] + " building from " + fromBuild[0]['buildingIntents'] + " building. Is this correct?"
+            confirmation = 1
+        elif ints[0]['intent'] == "exit":
+            res = getResponse(ints, intents)
+            startNav = 0
+            emergencyExit = 1
+        elif startNav == 1:
+            emergencyExit = 0
+            res = "Please wait while the navigation is processing"
+        else:
+            emergencyExit = 0
+            res = getResponse(ints, intents)
+        ChatBox.insert(END, "Belatrix: " + res + '\n\n')
+
+        ChatBox.config(state=DISABLED)
+        ChatBox.yview(END)
+
+
+root = Tk()
+root.title("Chatbot")
+root.geometry("400x500")
+root.resizable(width=FALSE, height=FALSE)
+
+#import nlp dictionary
+nlp = spacy.load("en_core_web_sm")
+nltk.download('punkt')
+nltk.download('wordnet')
+
+#Create Chat window
+ChatBox = Text(root, bd=0, bg="white", height="8", width="50", font="Arial",)
+
+ChatBox.config(state=DISABLED)
+
+#Bind scrollbar to Chat window
+scrollbar = Scrollbar(root, command=ChatBox.yview, cursor="heart")
+ChatBox['yscrollcommand'] = scrollbar.set
+
+#Create Button to send message
+SendButton = Button(root, font=("Verdana",12,'bold'), text="Send", width="12", height=5,
+                    bd=0, bg="#f9a602", activebackground="#3c9d9b",fg='#000000',
+                    command= send )
+
+#Create the box to enter message
+EntryBox = Text(root, bd=0, bg="white",width="29", height="5", font="Arial")
+#EntryBox.bind("<Return>", send)
+
+
+#Place all components on the screen
+scrollbar.place(x=376,y=6, height=386)
+ChatBox.place(x=6,y=6, height=386, width=370)
+EntryBox.place(x=128, y=401, height=90, width=265)
+SendButton.place(x=6, y=401, height=90)
+
+root.mainloop()
--- a/src/semantic/intents/buildingIntents.json
+++ b/src/semantic/intents/buildingIntents.json
@@ -0,0 +1,28 @@
+{"intents": [
+    {"tag": "Bob and Betty Beyster",
+     "patterns": ["BBB", "CSE", "CS","Computer Science", "Computer", "Bob", "Bob and Betty Beyster", "Betty", "Computer Science Department", "CS Department"],
+     "responses": ["Bob and Betty Beyster"],
+     "context": [""]
+    },
+    {"tag": "Duderstadt",
+     "patterns": ["Dude", "the Dude", "Duderstadt", "Mujos", "Library", "North Campus Library"],
+     "responses": ["Duderstadt"],
+     "context": [""]
+    },
+    {"tag": "FXB",
+     "patterns": ["FXB", "Francois-Xavier Bagnoud", "Aerospace", "aerospace", "Aerospace Engineering", "planes", "Aerospace Department", "Aerospace Engineering Department"],
+     "responses": ["FXB"],
+     "context": [""]
+    },
+    {"tag": "Electrical and Computer Engineering",
+     "patterns": ["Electrical and Computer Engineering","Electrical", "Electrical Engineering", "Computer Engineering", "EECS", "ECE", "Electrical Engineering Department", "EECS Department", "ECE Department"],
+     "responses": ["Electrical and Computer Engineering"],
+     "context": [""]
+    },
+    {"tag": "Pierpont Commons",
+     "patterns": ["Pierpont", "Pierpont Commons", "Commons", "Panda Express"],
+     "responses": ["Pierpont Commons"],
+     "context": [""]
+    }
+]
+}
--- a/src/semantic/intents/intents.json
+++ b/src/semantic/intents/intents.json
@@ -0,0 +1,48 @@
+{"intents": [
+        {"tag": "greeting",
+         "patterns": ["Hi there", "How are you", "Is anyone there?","Hey","Hola", "Hello", "Good day"],
+         "responses": ["Hello, thanks for asking", "Good to see you again", "Hi there, how can I help?"],
+         "context": [""]
+        },
+        {"tag": "goodbye",
+         "patterns": ["Bye", "See you later", "Goodbye", "Nice chatting to you, bye", "Till next time"],
+         "responses": ["See you!", "Have a nice day", "Bye! Come back again soon."],
+         "context": [""]
+        },
+        {"tag": "thanks",
+         "patterns": ["Thanks", "Thank you", "That's helpful", "Awesome, thanks", "Thanks for helping me"],
+         "responses": ["Happy to help!", "Any time!", "My pleasure"],
+         "context": [""]
+        },
+        {"tag": "noanswer",
+         "patterns": [],
+         "responses": ["Sorry, can't understand you", "Please give me more info", "Not sure I understand"],
+         "context": [""]
+        },
+        {"tag": "options",
+         "patterns": ["How you could help me?", "What you can do?", "What help you provide?", "How you can be helpful?", "What support is offered"],
+         "responses": ["I can take you to multiple buildings including BBB, EECS, and more on north campus."],
+         "context": [""]
+        },
+        {"tag": "navigation",
+         "patterns": ["Can you take me to the ", "Take me to the building", "Map me to the location", "Navigate me to the building from the building"],
+         "responses": ["Starting Navigation"],
+         "context": ["navigation_to_building"]
+        },
+        {"tag": "exit",
+         "patterns": ["stop", "quit", "end", "I want to stop navigation"],
+         "responses": ["Ending current navigation"],
+         "context": ["navigation_to_building"]
+        },
+        {"tag": "yes",
+                "patterns": ["yes", "y", "sure", "right", "correct"],
+                "responses": ["I am sorry. I don't understand"],
+                "context": ["navigation_to_building"]
+        },
+        {"tag": "no",
+                "patterns": ["no", "nope", "n", "wrong", "incorrect"],
+                "responses": ["I am sorry. I don't understand"],
+                "context": ["navigation_to_building"]
+        }
+   ]
+}
--- a/src/semantic/pickles/building_words.pkl
+++ b/src/semantic/pickles/building_words.pkl
--- a/src/semantic/pickles/building_words.txt
+++ b/src/semantic/pickles/building_words.txt
@@ -0,0 +1,14 @@
+Dude
+Computer Science
+CSE
+FXB
+BBB
+Aerospace Engineering
+Electrical Engineering
+EECS
+ECE
+Pierpont
+Duderstadt
+Francois-Xavier Bagnoud
+Bob and Betty Beyster
+Pierpont Commons
--- a/src/semantic/pickles/buildings.pkl
+++ b/src/semantic/pickles/buildings.pkl
--- a/src/semantic/pickles/buildings.txt
+++ b/src/semantic/pickles/buildings.txt
@@ -0,0 +1,5 @@
+Duderstadt
+Electrical and Computer Engineering
+FXB
+Pierpont Commons
+Bob and Betty Beyster
--- a/src/semantic/pickles/classes.pkl
+++ b/src/semantic/pickles/classes.pkl
--- a/src/semantic/pickles/classes.txt
+++ b/src/semantic/pickles/classes.txt
@@ -0,0 +1,11 @@
+blood_pressure_search
+exit
+goodbye
+greeting
+hospital_search
+navigation
+options
+pharmacy_search
+thanks
+navigation
+exit
--- a/src/semantic/pickles/pickleManage.pyc
+++ b/src/semantic/pickles/pickleManage.pyc
--- a/src/semantic/pickles/words.pkl
+++ b/src/semantic/pickles/words.pkl
--- a/src/semantic/pickles/words.txt
+++ b/src/semantic/pickles/words.txt
@@ -0,0 +1,92 @@
+'s
+a
+adverse
+all
+anyone
+are
+awesome
+be
+behavior
+blood
+by
+bye
+can
+causing
+chatting
+check
+could
+data
+day
+detail
+do
+dont
+drug
+entry
+find
+for
+give
+good
+goodbye
+have
+hello
+help
+helpful
+helping
+hey
+hi
+history
+hola
+hospital
+how
+i
+id
+is
+later
+list
+load
+locate
+log
+looking
+lookup
+management
+me
+module
+nearby
+next
+nice
+of
+offered
+open
+patient
+pharmacy
+pressure
+provide
+reaction
+related
+result
+search
+searching
+see
+show
+suitable
+support
+task
+thank
+thanks
+that
+there
+till
+time
+to
+transfer
+up
+want
+what
+which
+with
+you
+navigation
+map
+locate
+navigate
+building
--- a/src/semantic/train_buildings.py
+++ b/src/semantic/train_buildings.py
@@ -0,0 +1,96 @@
+import numpy as np
+from keras.models import Sequential
+from keras.layers import Dense, Activation, Dropout
+from keras.optimizers import SGD
+import random
+
+import nltk
+from nltk.stem import WordNetLemmatizer
+lemmatizer = WordNetLemmatizer()
+import json
+import pickle
+
+building_words=[]
+buildings = []
+documents = []
+ignore_letters = ['!', '?', ',', '.']
+buildingIntents_file = open('intents/buildingIntents.json').read()
+buildingIntents = json.loads(buildingIntents_file)
+
+# download nltk resources
+nltk.download('punkt')
+nltk.download('wordnet')
+
+for intent in buildingIntents['intents']:
+    for pattern in intent['patterns']:
+        #tokenize each word
+        word = nltk.word_tokenize(pattern)
+        building_words.extend(word)
+        #add documents in the corpus
+        documents.append((word, intent['tag']))
+        # add to our buildings list
+        if intent['tag'] not in buildings:
+            buildings.append(intent['tag'])
+print(documents)
+# lemmaztize and lower each word and remove duplicates
+building_words = [lemmatizer.lemmatize(w.lower()) for w in building_words if w not in ignore_letters]
+building_words = sorted(list(set(building_words)))
+# sort buildings
+buildings = sorted(list(set(buildings)))
+# documents = combination between patterns and buildingIntents
+print (len(documents), "documents")
+# buildings = buildingIntents
+print (len(buildings), "buildings", buildings)
+# building_words = all building_words, vocabulary
+print (len(building_words), "unique lemmatized building_words", building_words)
+
+pickle.dump(building_words,open('pickles/building_words.pkl','wb'))
+pickle.dump(buildings,open('pickles/buildings.pkl','wb'))
+
+# create our training data
+training = []
+# create an empty array for our output
+output_empty = [0] * len(buildings)
+# training set, bag of building_words for each sentence
+for doc in documents:
+    # initialize our bag of building_words
+    bag = []
+    # list of tokenized building_words for the pattern
+    pattern_building_words = doc[0]
+    # lemmatize each word - create base word, in attempt to represent related building_words
+    pattern_building_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_building_words]
+    # create our bag of building_words array with 1, if word match found in current pattern
+    for word in building_words:
+        bag.append(1) if word in pattern_building_words else bag.append(0)
+        
+    # output is a '0' for each tag and '1' for current tag (for each pattern)
+    output_row = list(output_empty)
+    output_row[buildings.index(doc[1])] = 1
+    
+    training.append([bag, output_row])
+# shuffle our features and turn into np.array
+random.shuffle(training)
+training = np.array(training)
+# create train and test lists. X - patterns, Y - buildingIntents
+train_x = list(training[:,0])
+train_y = list(training[:,1])
+print("Buildings Training data created")
+
+# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
+# equal to number of buildingIntents to predict output intent with softmax
+model = Sequential()
+model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(64, activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(len(train_y[0]), activation='softmax'))
+
+# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
+sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
+model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
+
+#fitting and saving the model 
+hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
+model.save('buildings_model.h5', hist)
+
+print("building model created")
--- a/src/semantic/train_chatbot.py
+++ b/src/semantic/train_chatbot.py
@@ -0,0 +1,96 @@
+import numpy as np
+from keras.models import Sequential
+from keras.layers import Dense, Activation, Dropout
+from keras.optimizers import SGD
+import random
+
+import nltk
+from nltk.stem import WordNetLemmatizer
+lemmatizer = WordNetLemmatizer()
+import json
+import pickle
+
+words=[]
+classes = []
+documents = []
+ignore_letters = ['!', '?', ',', '.']
+intents_file = open('intents/intents.json').read()
+intents = json.loads(intents_file)
+
+# download nltk resources
+nltk.download('punkt')
+nltk.download('wordnet')
+
+for intent in intents['intents']:
+    for pattern in intent['patterns']:
+        #tokenize each word
+        word = nltk.word_tokenize(pattern)
+        words.extend(word)
+        #add documents in the corpus
+        documents.append((word, intent['tag']))
+        # add to our classes list
+        if intent['tag'] not in classes:
+            classes.append(intent['tag'])
+print(documents)
+# lemmaztize and lower each word and remove duplicates
+words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_letters]
+words = sorted(list(set(words)))
+# sort classes
+classes = sorted(list(set(classes)))
+# documents = combination between patterns and intents
+print (len(documents), "documents")
+# classes = intents
+print (len(classes), "classes", classes)
+# words = all words, vocabulary
+print (len(words), "unique lemmatized words", words)
+
+pickle.dump(words,open('pickles/words.pkl','wb'))
+pickle.dump(classes,open('pickles/classes.pkl','wb'))
+
+# create our training data
+training = []
+# create an empty array for our output
+output_empty = [0] * len(classes)
+# training set, bag of words for each sentence
+for doc in documents:
+    # initialize our bag of words
+    bag = []
+    # list of tokenized words for the pattern
+    pattern_words = doc[0]
+    # lemmatize each word - create base word, in attempt to represent related words
+    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
+    # create our bag of words array with 1, if word match found in current pattern
+    for word in words:
+        bag.append(1) if word in pattern_words else bag.append(0)
+        
+    # output is a '0' for each tag and '1' for current tag (for each pattern)
+    output_row = list(output_empty)
+    output_row[classes.index(doc[1])] = 1
+    
+    training.append([bag, output_row])
+# shuffle our features and turn into np.array
+random.shuffle(training)
+training = np.array(training)
+# create train and test lists. X - patterns, Y - intents
+train_x = list(training[:,0])
+train_y = list(training[:,1])
+print("Training data created")
+
+# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
+# equal to number of intents to predict output intent with softmax
+model = Sequential()
+model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(64, activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(len(train_y[0]), activation='softmax'))
+
+# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
+sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
+model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
+
+#fitting and saving the model 
+hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
+model.save('chatbot_model.h5', hist)
+
+print("model created")