You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			72 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			72 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
| """
 | |
| In this example we are trying to predict color of a point only with the x and y values.
 | |
| 
 | |
| x,y,color
 | |
| x : X coordinate on a cartisian coordinate plain
 | |
| y : Y coordinate on a cartision coordinate plain
 | |
| color: One of two values (0 , 1).
 | |
| 
 | |
| """
 | |
| 
 | |
| import tensorflow as tf
 | |
| from tensorflow import keras
 | |
| 
 | |
| import pandas as pd
 | |
| import numpy as np
 | |
| 
 | |
| train_df = pd.read_csv('./data/train.csv')
 | |
| 
 | |
| # You always want to shuffle your training data. This can avoid things like in this particular
 | |
| # Set of training data, where our colors are all grouped in sequence. We want to avoid having
 | |
| # Highly correalated data next to each other. Much more like how we will see in the wild.
 | |
| 
 | |
| # This shuffle method works in place. You do not need to set the result to another variable.
 | |
| np.random.shuffle(train_df.values)
 | |
| 
 | |
| print(train_df.head())
 | |
| 
 | |
| # Sequential here lets us define our layers in sequence from input to output.
 | |
| # This example is using a fully connected feed forward network (Dense).
 | |
| # 2 Input -> 4 Hidden -> 2 output
 | |
| model = keras.Sequential([
 | |
|     # Our input data is x and y
 | |
|     keras.layers.Dense(4, input_shape=(2,), activation='relu'),
 | |
|     keras.layers.Dense(2, activation='sigmoid')])
 | |
| 
 | |
| # You can view what a loss function here is https://www.tensorflow.org/api_docs/python/tf
 | |
| # CategoricalCrossentropy is encoded in a "One hot" fashion. SparseCategoricalCrossentropy
 | |
| # Expects two or more labels to be encoded as integers.
 | |
| 
 | |
| # Don't know what Logits means? https://stackoverflow.com/questions/34240703/what-is-logits-softmax-and-softmax-cross-entropy-with-logits
 | |
| 
 | |
| # So we aren't using input values between 0 and 1 so we want to use the keyword.
 | |
| 
 | |
| model.compile(optimizer='adam',
 | |
|               loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
 | |
|               metrics=['accuracy'])
 | |
| 
 | |
| 
 | |
| print(train_df.head())
 | |
| 
 | |
| # Converts dataframe to a numpy array stacking the two input values together
 | |
| x = np.column_stack((train_df.x.values, train_df.y.values))
 | |
| 
 | |
| # TUNING ###################
 | |
| 
 | |
| # Look up what the arg values are expected or supported. Here they are using a np array but there
 | |
| # Are other acceptable values.
 | |
| # model.fit(x, train_df.color.values, batch_size=16) # 50-ish %
 | |
| # model.fit(x, train_df.color.values, batch_size=4) # 81-ish %
 | |
| # Try going back and doubling (8) the the hidden layer size. I got 88% accuracy.
 | |
| 
 | |
| # Running 5 epochs showed that the NN can classify 100% of the training data correctly
 | |
| # This makes sense because our data is linear and well labeled.
 | |
| model.fit(x, train_df.color.values, batch_size=4, epochs=5)
 | |
| 
 | |
| test_df = pd.read_csv('./data/test.csv')
 | |
| test_x = np.column_stack((test_df.x.values, test_df.y.values))
 | |
| 
 | |
| print("EVALUATION")
 | |
| # Is this a "manual" holdout?
 | |
| model.evaluate(test_x, test_df.color.values)
 |