From f3ef7bb169fd38cc3d0ade1ab16dbb462db3c771 Mon Sep 17 00:00:00 2001 From: androiddrew Date: Sun, 10 Mar 2019 09:54:49 -0400 Subject: [PATCH] Added matrix operations, and descriptive stats functions. --- ch4/matrix.py | 43 ++++++++++++++++++++++ ch5/__init__.py | 0 ch5/friends3.py | 23 ++++++++++++ ch5/stats.py | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 162 insertions(+) create mode 100644 ch4/matrix.py create mode 100644 ch5/__init__.py create mode 100644 ch5/friends3.py create mode 100644 ch5/stats.py diff --git a/ch4/matrix.py b/ch4/matrix.py new file mode 100644 index 0000000..9a0bac4 --- /dev/null +++ b/ch4/matrix.py @@ -0,0 +1,43 @@ +""" +A matrix is a two-dimensional collection of numbers. Math convention uses capital +letter to represent matrices. + +2 x 3 Matrix +A = [[1, 2, 3], [4, 5, 6]] + +3 x 2 Matrix +B = [[1, 2], [3, 4], [5, 6]] + +""" +from typing import List, Tuple, Callable + + +def shape(A: List[List[float]]) -> Tuple[int, int]: + """Calculates the shape of a matrix. + + If matrix has n rows and k columns we call it a n x k matrix.""" + num_rows = len(A) + num_columns = len(A[0]) if A else 0 + return num_rows, num_columns + + +def get_row(A: List[List[float]], i) -> List[float]: + """Returns the ith row from a matrix.""" + return A[i] + + +def get_column(A: List[List[float]], j) -> List[float]: + """Returns the jth column from a matrix.""" + return [A_i[j] for A_i in A] + + +def make_matrix( + num_rows: int, num_colums: int, entry_fn: Callable +) -> List[List[float]]: + """Creates a n x k matrix whose (i, j)th entry is entry(i, j).""" + return [[entry_fn(i, j) for j in range(num_colums)] for i in range(num_rows)] + + +def is_diagonal(i , j) -> int: + """1's on the 'diagonal', 0's everywhere else.""" + return 1 if i == j else 0 \ No newline at end of file diff --git a/ch5/__init__.py b/ch5/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ch5/friends3.py b/ch5/friends3.py new file mode 100644 index 0000000..9a6bd30 --- /dev/null +++ b/ch5/friends3.py @@ -0,0 +1,23 @@ +from collections import Counter +from matplotlib import pyplot as plt + +num_friends = [100, 49, 41, 40, 25, 21, 21, 19, 19, 18, 18, 16, 15, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1] + +friend_counts = Counter(num_friends) + +xs = range(101) # largest value is just 100 +ys = [friend_counts[x] for x in xs] # Height is just number of friends +plt.bar(xs, ys) +plt.axis([0, 101, 0, 25]) +plt.title('Histogram of friend counts') +plt.xlabel('# of Friends') +plt.ylabel('# of People') + +if __name__ == "__main__": + plt.show() diff --git a/ch5/stats.py b/ch5/stats.py new file mode 100644 index 0000000..2fe09f9 --- /dev/null +++ b/ch5/stats.py @@ -0,0 +1,96 @@ +import math +from collections import Counter +from typing import List, Union +from .friends3 import num_friends +from ch4.vector import sum_of_squares + +num_points = len(num_friends) +largest_value = max(num_friends) +min_value = min(num_friends) +sorted_values = sorted(num_friends) +smallest_value = sorted_values[0] +second_smallest_value = sorted_values[1] +second_largest_value = sorted_values[-2] + + +# Central Tendency + + +def mean(seq: List[Union[int, float]]) -> float: + """Returns the arithmetic mean of a list of integers.""" + return sum(seq) / len(seq) + + +mean_friendships = mean(num_friends) + + +def median(seq: List[Union[int, float]]) -> float: + """Calculates the median value of a list of integers.""" + sorted_seq = sorted(seq) + seq_len = len(sorted_seq) + mid_point_index = seq_len // 2 + if seq_len % 2 == 0: # even case + return (sorted_seq[mid_point_index] + sorted_seq[mid_point_index + 1]) / 2 + else: + return sorted_seq[mid_point_index] + + +def quantile(seq: List[Union[int, float]], pth: float) -> Union[int, float]: + """Returns the pth-percentile value""" + p_index = int(pth * len(seq)) + sorted_seq = sorted(seq) + return sorted_seq[p_index] + + +def mode(seq: List[Union[int, float]]) -> List[Union[int, float]]: + """Returns a list of the most common values.""" + counts = Counter(seq) + + max_value = max(counts.values()) + + results = [x_i for x_i, count in counts.items() if count == max_value] + return sorted(results) + + +def data_range(seq: List[Union[int, float]]) -> float: + """A measure of data dispersion. The Spread being the difference between + max and min values. + + Outliers are still a concern withing the provided data. + """ + return max(seq) - min(seq) + + +def _de_mean(seq: List[Union[int, float]]) -> List[Union[int, float]]: + """Translates a sequence of integers by subtracting the mean producing + a list of deviations from the mean.""" + x_bar = mean(seq) + return [x_i - x_bar for x_i in seq] + + +def variance(seq: List[Union[int, float]]) -> float: + """Determines the variance within a data set from the mean. Note + variance is returned as the square of whatever units were provided. + If observations were of inches this would return a float value + in inches squared. + """ + assert len(seq) >= 2 + n = len(seq) + deviations = _de_mean(seq) + return sum_of_squares(deviations) / (n - 1) + + +def standard_deviation(seq: List[Union[int, float]]) -> float: + """A measure of dispersion with the same units as the data set. + Easier to reason about if for example your data set was the + count of 'Number of friends'. + + Outliers are still a concern withing the provided data. + """ + return math.sqrt(variance(seq)) + + +def interquartile_range(seq: List[Union[int, float]]) -> float: + """A more robust measure of dispersion. Is less affected by + a small number of outliers.""" + return float(quantile(seq, .75) - quantile(seq, .25)) \ No newline at end of file