Added matrix operations, and descriptive stats functions.
							parent
							
								
									d2ace996da
								
							
						
					
					
						commit
						f3ef7bb169
					
				| @ -0,0 +1,43 @@ | ||||
| """ | ||||
| A matrix is a two-dimensional collection of numbers. Math convention uses capital | ||||
| letter to represent matrices. | ||||
| 
 | ||||
| 2 x 3 Matrix | ||||
| A = [[1, 2, 3], [4, 5, 6]] | ||||
| 
 | ||||
| 3 x 2 Matrix | ||||
| B = [[1, 2], [3, 4], [5, 6]] | ||||
| 
 | ||||
| """ | ||||
| from typing import List, Tuple, Callable | ||||
| 
 | ||||
| 
 | ||||
| def shape(A: List[List[float]]) -> Tuple[int, int]: | ||||
|     """Calculates the shape of a matrix. | ||||
| 
 | ||||
|     If matrix has n rows and k columns we call it a n x k matrix.""" | ||||
|     num_rows = len(A) | ||||
|     num_columns = len(A[0]) if A else 0 | ||||
|     return num_rows, num_columns | ||||
| 
 | ||||
| 
 | ||||
| def get_row(A: List[List[float]], i) -> List[float]: | ||||
|     """Returns the ith row from a matrix.""" | ||||
|     return A[i] | ||||
| 
 | ||||
| 
 | ||||
| def get_column(A: List[List[float]], j) -> List[float]: | ||||
|     """Returns the jth column from a matrix.""" | ||||
|     return [A_i[j] for A_i in A] | ||||
| 
 | ||||
| 
 | ||||
| def make_matrix( | ||||
|     num_rows: int, num_colums: int, entry_fn: Callable | ||||
| ) -> List[List[float]]: | ||||
|     """Creates a n x k matrix whose (i, j)th entry is entry(i, j).""" | ||||
|     return [[entry_fn(i, j) for j in range(num_colums)] for i in range(num_rows)] | ||||
| 
 | ||||
| 
 | ||||
| def is_diagonal(i , j) -> int: | ||||
|     """1's on the 'diagonal', 0's everywhere else.""" | ||||
|     return 1 if i == j else 0 | ||||
| @ -0,0 +1,23 @@ | ||||
| from collections import Counter | ||||
| from matplotlib import pyplot as plt | ||||
| 
 | ||||
| num_friends = [100, 49, 41, 40, 25, 21, 21, 19, 19, 18, 18, 16, 15, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 10, | ||||
|                10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, | ||||
|                9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, | ||||
|                6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, | ||||
|                4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||||
|                3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||||
|                1, 1, 1, 1, 1, 1, 1, 1] | ||||
| 
 | ||||
| friend_counts = Counter(num_friends) | ||||
| 
 | ||||
| xs = range(101)  # largest value is just 100 | ||||
| ys = [friend_counts[x] for x in xs]  # Height is just number of friends | ||||
| plt.bar(xs, ys) | ||||
| plt.axis([0, 101, 0, 25]) | ||||
| plt.title('Histogram of friend counts') | ||||
| plt.xlabel('# of Friends') | ||||
| plt.ylabel('# of People') | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     plt.show() | ||||
| @ -0,0 +1,96 @@ | ||||
| import math | ||||
| from collections import Counter | ||||
| from typing import List, Union | ||||
| from .friends3 import num_friends | ||||
| from ch4.vector import sum_of_squares | ||||
| 
 | ||||
| num_points = len(num_friends) | ||||
| largest_value = max(num_friends) | ||||
| min_value = min(num_friends) | ||||
| sorted_values = sorted(num_friends) | ||||
| smallest_value = sorted_values[0] | ||||
| second_smallest_value = sorted_values[1] | ||||
| second_largest_value = sorted_values[-2] | ||||
| 
 | ||||
| 
 | ||||
| # Central Tendency | ||||
| 
 | ||||
| 
 | ||||
| def mean(seq: List[Union[int, float]]) -> float: | ||||
|     """Returns the arithmetic mean of a list of integers.""" | ||||
|     return sum(seq) / len(seq) | ||||
| 
 | ||||
| 
 | ||||
| mean_friendships = mean(num_friends) | ||||
| 
 | ||||
| 
 | ||||
| def median(seq: List[Union[int, float]]) -> float: | ||||
|     """Calculates the median value of a list of integers.""" | ||||
|     sorted_seq = sorted(seq) | ||||
|     seq_len = len(sorted_seq) | ||||
|     mid_point_index = seq_len // 2 | ||||
|     if seq_len % 2 == 0:  # even case | ||||
|         return (sorted_seq[mid_point_index] + sorted_seq[mid_point_index + 1]) / 2 | ||||
|     else: | ||||
|         return sorted_seq[mid_point_index] | ||||
| 
 | ||||
| 
 | ||||
| def quantile(seq: List[Union[int, float]], pth: float) -> Union[int, float]: | ||||
|     """Returns the pth-percentile value""" | ||||
|     p_index = int(pth * len(seq)) | ||||
|     sorted_seq = sorted(seq) | ||||
|     return sorted_seq[p_index] | ||||
| 
 | ||||
| 
 | ||||
| def mode(seq: List[Union[int, float]]) -> List[Union[int, float]]: | ||||
|     """Returns a list of the most common values.""" | ||||
|     counts = Counter(seq) | ||||
| 
 | ||||
|     max_value = max(counts.values()) | ||||
| 
 | ||||
|     results = [x_i for x_i, count in counts.items() if count == max_value] | ||||
|     return sorted(results) | ||||
| 
 | ||||
| 
 | ||||
| def data_range(seq: List[Union[int, float]]) -> float: | ||||
|     """A measure of data dispersion. The Spread being the difference between | ||||
|      max and min values. | ||||
| 
 | ||||
|      Outliers are still a concern withing the provided data. | ||||
|      """ | ||||
|     return max(seq) - min(seq) | ||||
| 
 | ||||
| 
 | ||||
| def _de_mean(seq: List[Union[int, float]]) -> List[Union[int, float]]: | ||||
|     """Translates a sequence of integers by subtracting the mean producing | ||||
|     a list of deviations from the mean.""" | ||||
|     x_bar = mean(seq) | ||||
|     return [x_i - x_bar for x_i in seq] | ||||
| 
 | ||||
| 
 | ||||
| def variance(seq: List[Union[int, float]]) -> float: | ||||
|     """Determines the variance within a data set from the mean. Note | ||||
|     variance is returned as the square of whatever units were provided. | ||||
|     If observations were of inches this would return a float value | ||||
|     in inches squared. | ||||
|     """ | ||||
|     assert len(seq) >= 2 | ||||
|     n = len(seq) | ||||
|     deviations = _de_mean(seq) | ||||
|     return sum_of_squares(deviations) / (n - 1) | ||||
| 
 | ||||
| 
 | ||||
| def standard_deviation(seq: List[Union[int, float]]) -> float: | ||||
|     """A measure of dispersion with the same units as the data set. | ||||
|     Easier to reason about if for example your data set was the | ||||
|     count of 'Number of friends'. | ||||
| 
 | ||||
|     Outliers are still a concern withing the provided data. | ||||
|     """ | ||||
|     return math.sqrt(variance(seq)) | ||||
| 
 | ||||
| 
 | ||||
| def interquartile_range(seq: List[Union[int, float]]) -> float: | ||||
|     """A more robust measure of dispersion. Is less affected by | ||||
|     a small number of outliers.""" | ||||
|     return float(quantile(seq, .75) - quantile(seq, .25)) | ||||
					Loading…
					
					
				
		Reference in New Issue