Added matrix operations, and descriptive stats functions.

master
androiddrew 6 years ago
parent d2ace996da
commit f3ef7bb169

@ -0,0 +1,43 @@
"""
A matrix is a two-dimensional collection of numbers. Math convention uses capital
letter to represent matrices.
2 x 3 Matrix
A = [[1, 2, 3], [4, 5, 6]]
3 x 2 Matrix
B = [[1, 2], [3, 4], [5, 6]]
"""
from typing import List, Tuple, Callable
def shape(A: List[List[float]]) -> Tuple[int, int]:
"""Calculates the shape of a matrix.
If matrix has n rows and k columns we call it a n x k matrix."""
num_rows = len(A)
num_columns = len(A[0]) if A else 0
return num_rows, num_columns
def get_row(A: List[List[float]], i) -> List[float]:
"""Returns the ith row from a matrix."""
return A[i]
def get_column(A: List[List[float]], j) -> List[float]:
"""Returns the jth column from a matrix."""
return [A_i[j] for A_i in A]
def make_matrix(
num_rows: int, num_colums: int, entry_fn: Callable
) -> List[List[float]]:
"""Creates a n x k matrix whose (i, j)th entry is entry(i, j)."""
return [[entry_fn(i, j) for j in range(num_colums)] for i in range(num_rows)]
def is_diagonal(i , j) -> int:
"""1's on the 'diagonal', 0's everywhere else."""
return 1 if i == j else 0

@ -0,0 +1,23 @@
from collections import Counter
from matplotlib import pyplot as plt
num_friends = [100, 49, 41, 40, 25, 21, 21, 19, 19, 18, 18, 16, 15, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1]
friend_counts = Counter(num_friends)
xs = range(101) # largest value is just 100
ys = [friend_counts[x] for x in xs] # Height is just number of friends
plt.bar(xs, ys)
plt.axis([0, 101, 0, 25])
plt.title('Histogram of friend counts')
plt.xlabel('# of Friends')
plt.ylabel('# of People')
if __name__ == "__main__":
plt.show()

@ -0,0 +1,96 @@
import math
from collections import Counter
from typing import List, Union
from .friends3 import num_friends
from ch4.vector import sum_of_squares
num_points = len(num_friends)
largest_value = max(num_friends)
min_value = min(num_friends)
sorted_values = sorted(num_friends)
smallest_value = sorted_values[0]
second_smallest_value = sorted_values[1]
second_largest_value = sorted_values[-2]
# Central Tendency
def mean(seq: List[Union[int, float]]) -> float:
"""Returns the arithmetic mean of a list of integers."""
return sum(seq) / len(seq)
mean_friendships = mean(num_friends)
def median(seq: List[Union[int, float]]) -> float:
"""Calculates the median value of a list of integers."""
sorted_seq = sorted(seq)
seq_len = len(sorted_seq)
mid_point_index = seq_len // 2
if seq_len % 2 == 0: # even case
return (sorted_seq[mid_point_index] + sorted_seq[mid_point_index + 1]) / 2
else:
return sorted_seq[mid_point_index]
def quantile(seq: List[Union[int, float]], pth: float) -> Union[int, float]:
"""Returns the pth-percentile value"""
p_index = int(pth * len(seq))
sorted_seq = sorted(seq)
return sorted_seq[p_index]
def mode(seq: List[Union[int, float]]) -> List[Union[int, float]]:
"""Returns a list of the most common values."""
counts = Counter(seq)
max_value = max(counts.values())
results = [x_i for x_i, count in counts.items() if count == max_value]
return sorted(results)
def data_range(seq: List[Union[int, float]]) -> float:
"""A measure of data dispersion. The Spread being the difference between
max and min values.
Outliers are still a concern withing the provided data.
"""
return max(seq) - min(seq)
def _de_mean(seq: List[Union[int, float]]) -> List[Union[int, float]]:
"""Translates a sequence of integers by subtracting the mean producing
a list of deviations from the mean."""
x_bar = mean(seq)
return [x_i - x_bar for x_i in seq]
def variance(seq: List[Union[int, float]]) -> float:
"""Determines the variance within a data set from the mean. Note
variance is returned as the square of whatever units were provided.
If observations were of inches this would return a float value
in inches squared.
"""
assert len(seq) >= 2
n = len(seq)
deviations = _de_mean(seq)
return sum_of_squares(deviations) / (n - 1)
def standard_deviation(seq: List[Union[int, float]]) -> float:
"""A measure of dispersion with the same units as the data set.
Easier to reason about if for example your data set was the
count of 'Number of friends'.
Outliers are still a concern withing the provided data.
"""
return math.sqrt(variance(seq))
def interquartile_range(seq: List[Union[int, float]]) -> float:
"""A more robust measure of dispersion. Is less affected by
a small number of outliers."""
return float(quantile(seq, .75) - quantile(seq, .25))
Loading…
Cancel
Save