python_playground/ml_ai/nueral_nets/linear/network_linear.py

"""
In this example we are trying to predict color of a point only with the x and y values.

x,y,color
x : X coordinate on a cartisian coordinate plain
y : Y coordinate on a cartision coordinate plain
color: One of two values (0 , 1).

"""

import tensorflow as tf
from tensorflow import keras

import pandas as pd
import numpy as np

train_df = pd.read_csv('./data/train.csv')

# You always want to shuffle your training data. This can avoid things like in this particular
# Set of training data, where our colors are all grouped in sequence. We want to avoid having
# Highly correalated data next to each other. Much more like how we will see in the wild.

# This shuffle method works in place. You do not need to set the result to another variable.
np.random.shuffle(train_df.values)

print(train_df.head())

# Sequential here lets us define our layers in sequence from input to output.
# This example is using a fully connected feed forward network (Dense).
# 2 Input -> 4 Hidden -> 2 output
model = keras.Sequential([
    # Our input data is x and y
    keras.layers.Dense(4, input_shape=(2,), activation='relu'),
    keras.layers.Dense(2, activation='sigmoid')])

# You can view what a loss function here is https://www.tensorflow.org/api_docs/python/tf
# CategoricalCrossentropy is encoded in a "One hot" fashion. SparseCategoricalCrossentropy
# Expects two or more labels to be encoded as integers.

# Don't know what Logits means? https://stackoverflow.com/questions/34240703/what-is-logits-softmax-and-softmax-cross-entropy-with-logits

# So we aren't using input values between 0 and 1 so we want to use the keyword.

model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


print(train_df.head())

# Converts dataframe to a numpy array stacking the two input values together
x = np.column_stack((train_df.x.values, train_df.y.values))

# TUNING ###################

# Look up what the arg values are expected or supported. Here they are using a np array but there
# Are other acceptable values.
# model.fit(x, train_df.color.values, batch_size=16) # 50-ish %
# model.fit(x, train_df.color.values, batch_size=4) # 81-ish %
# Try going back and doubling (8) the the hidden layer size. I got 88% accuracy.

# Running 5 epochs showed that the NN can classify 100% of the training data correctly
# This makes sense because our data is linear and well labeled.
model.fit(x, train_df.color.values, batch_size=4, epochs=5)

test_df = pd.read_csv('./data/test.csv')
test_x = np.column_stack((test_df.x.values, test_df.y.values))

print("EVALUATION")
# Is this a "manual" holdout?
model.evaluate(test_x, test_df.color.values)
Saving both Blog and NN stuff 5 years ago			`"""`
			`In this example we are trying to predict color of a point only with the x and y values.`

			`x,y,color`
			`x : X coordinate on a cartisian coordinate plain`
			`y : Y coordinate on a cartision coordinate plain`
			`color: One of two values (0 , 1).`

			`"""`

			`import tensorflow as tf`
			`from tensorflow import keras`

			`import pandas as pd`
			`import numpy as np`

			`train_df = pd.read_csv('./data/train.csv')`

			`# You always want to shuffle your training data. This can avoid things like in this particular`
			`# Set of training data, where our colors are all grouped in sequence. We want to avoid having`
			`# Highly correalated data next to each other. Much more like how we will see in the wild.`

			`# This shuffle method works in place. You do not need to set the result to another variable.`
			`np.random.shuffle(train_df.values)`

			`print(train_df.head())`

			`# Sequential here lets us define our layers in sequence from input to output.`
			`# This example is using a fully connected feed forward network (Dense).`
			`# 2 Input -> 4 Hidden -> 2 output`
			`model = keras.Sequential([`
			`# Our input data is x and y`
			`keras.layers.Dense(4, input_shape=(2,), activation='relu'),`
			`keras.layers.Dense(2, activation='sigmoid')])`

			`# You can view what a loss function here is https://www.tensorflow.org/api_docs/python/tf`
			`# CategoricalCrossentropy is encoded in a "One hot" fashion. SparseCategoricalCrossentropy`
			`# Expects two or more labels to be encoded as integers.`

			`# Don't know what Logits means? https://stackoverflow.com/questions/34240703/what-is-logits-softmax-and-softmax-cross-entropy-with-logits`

			`# So we aren't using input values between 0 and 1 so we want to use the keyword.`

			`model.compile(optimizer='adam',`
			`loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),`
			`metrics=['accuracy'])`


			`print(train_df.head())`

			`# Converts dataframe to a numpy array stacking the two input values together`
			`x = np.column_stack((train_df.x.values, train_df.y.values))`

			`# TUNING ###################`

			`# Look up what the arg values are expected or supported. Here they are using a np array but there`
			`# Are other acceptable values.`
			`# model.fit(x, train_df.color.values, batch_size=16) # 50-ish %`
			`# model.fit(x, train_df.color.values, batch_size=4) # 81-ish %`
			`# Try going back and doubling (8) the the hidden layer size. I got 88% accuracy.`

			`# Running 5 epochs showed that the NN can classify 100% of the training data correctly`
			`# This makes sense because our data is linear and well labeled.`
			`model.fit(x, train_df.color.values, batch_size=4, epochs=5)`

			`test_df = pd.read_csv('./data/test.csv')`
			`test_x = np.column_stack((test_df.x.values, test_df.y.values))`

			`print("EVALUATION")`
			`# Is this a "manual" holdout?`
			`model.evaluate(test_x, test_df.color.values)`