"""
In this example we are trying to predict color of a point only with the x and y values.

x,y,color
x : X coordinate on a cartisian coordinate plain
y : Y coordinate on a cartision coordinate plain
color: One of two values (0 , 1).

"""

import tensorflow as tf
from tensorflow import keras

import pandas as pd
import numpy as np

train_df = pd.read_csv('./data/train.csv')

# You always want to shuffle your training data. This can avoid things like in this particular
# Set of training data, where our colors are all grouped in sequence. We want to avoid having
# Highly correalated data next to each other. Much more like how we will see in the wild.

# This shuffle method works in place. You do not need to set the result to another variable.
np.random.shuffle(train_df.values)

print(train_df.head())

# Sequential here lets us define our layers in sequence from input to output.
# This example is using a fully connected feed forward network (Dense).
# 2 Input -> 4 Hidden -> 2 output
model = keras.Sequential([
    # Our input data is x and y
    keras.layers.Dense(4, input_shape=(2,), activation='relu'),
    keras.layers.Dense(2, activation='sigmoid')])

# You can view what a loss function here is https://www.tensorflow.org/api_docs/python/tf
# CategoricalCrossentropy is encoded in a "One hot" fashion. SparseCategoricalCrossentropy
# Expects two or more labels to be encoded as integers.

# Don't know what Logits means? https://stackoverflow.com/questions/34240703/what-is-logits-softmax-and-softmax-cross-entropy-with-logits

# So we aren't using input values between 0 and 1 so we want to use the keyword.

model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


print(train_df.head())

# Converts dataframe to a numpy array stacking the two input values together
x = np.column_stack((train_df.x.values, train_df.y.values))

# TUNING ###################

# Look up what the arg values are expected or supported. Here they are using a np array but there
# Are other acceptable values.
# model.fit(x, train_df.color.values, batch_size=16) # 50-ish %
# model.fit(x, train_df.color.values, batch_size=4) # 81-ish %
# Try going back and doubling (8) the the hidden layer size. I got 88% accuracy.

# Running 5 epochs showed that the NN can classify 100% of the training data correctly
# This makes sense because our data is linear and well labeled.
model.fit(x, train_df.color.values, batch_size=4, epochs=5)

test_df = pd.read_csv('./data/test.csv')
test_x = np.column_stack((test_df.x.values, test_df.y.values))

print("EVALUATION")
# Is this a "manual" holdout?
model.evaluate(test_x, test_df.color.values)