You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
72 lines
2.7 KiB
Python
72 lines
2.7 KiB
Python
"""
|
|
In this example we are trying to predict color of a point only with the x and y values.
|
|
|
|
x,y,color
|
|
x : X coordinate on a cartisian coordinate plain
|
|
y : Y coordinate on a cartision coordinate plain
|
|
color: One of two values (0 , 1).
|
|
|
|
"""
|
|
|
|
import tensorflow as tf
|
|
from tensorflow import keras
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
train_df = pd.read_csv('./data/train.csv')
|
|
|
|
# You always want to shuffle your training data. This can avoid things like in this particular
|
|
# Set of training data, where our colors are all grouped in sequence. We want to avoid having
|
|
# Highly correalated data next to each other. Much more like how we will see in the wild.
|
|
|
|
# This shuffle method works in place. You do not need to set the result to another variable.
|
|
np.random.shuffle(train_df.values)
|
|
|
|
print(train_df.head())
|
|
|
|
# Sequential here lets us define our layers in sequence from input to output.
|
|
# This example is using a fully connected feed forward network (Dense).
|
|
# 2 Input -> 4 Hidden -> 2 output
|
|
model = keras.Sequential([
|
|
# Our input data is x and y
|
|
keras.layers.Dense(4, input_shape=(2,), activation='relu'),
|
|
keras.layers.Dense(2, activation='sigmoid')])
|
|
|
|
# You can view what a loss function here is https://www.tensorflow.org/api_docs/python/tf
|
|
# CategoricalCrossentropy is encoded in a "One hot" fashion. SparseCategoricalCrossentropy
|
|
# Expects two or more labels to be encoded as integers.
|
|
|
|
# Don't know what Logits means? https://stackoverflow.com/questions/34240703/what-is-logits-softmax-and-softmax-cross-entropy-with-logits
|
|
|
|
# So we aren't using input values between 0 and 1 so we want to use the keyword.
|
|
|
|
model.compile(optimizer='adam',
|
|
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
|
|
metrics=['accuracy'])
|
|
|
|
|
|
print(train_df.head())
|
|
|
|
# Converts dataframe to a numpy array stacking the two input values together
|
|
x = np.column_stack((train_df.x.values, train_df.y.values))
|
|
|
|
# TUNING ###################
|
|
|
|
# Look up what the arg values are expected or supported. Here they are using a np array but there
|
|
# Are other acceptable values.
|
|
# model.fit(x, train_df.color.values, batch_size=16) # 50-ish %
|
|
# model.fit(x, train_df.color.values, batch_size=4) # 81-ish %
|
|
# Try going back and doubling (8) the the hidden layer size. I got 88% accuracy.
|
|
|
|
# Running 5 epochs showed that the NN can classify 100% of the training data correctly
|
|
# This makes sense because our data is linear and well labeled.
|
|
model.fit(x, train_df.color.values, batch_size=4, epochs=5)
|
|
|
|
test_df = pd.read_csv('./data/test.csv')
|
|
test_x = np.column_stack((test_df.x.values, test_df.y.values))
|
|
|
|
print("EVALUATION")
|
|
# Is this a "manual" holdout?
|
|
model.evaluate(test_x, test_df.color.values)
|