Added matrix operations, and descriptive stats functions.
parent
d2ace996da
commit
f3ef7bb169
@ -0,0 +1,43 @@
|
|||||||
|
"""
|
||||||
|
A matrix is a two-dimensional collection of numbers. Math convention uses capital
|
||||||
|
letter to represent matrices.
|
||||||
|
|
||||||
|
2 x 3 Matrix
|
||||||
|
A = [[1, 2, 3], [4, 5, 6]]
|
||||||
|
|
||||||
|
3 x 2 Matrix
|
||||||
|
B = [[1, 2], [3, 4], [5, 6]]
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import List, Tuple, Callable
|
||||||
|
|
||||||
|
|
||||||
|
def shape(A: List[List[float]]) -> Tuple[int, int]:
|
||||||
|
"""Calculates the shape of a matrix.
|
||||||
|
|
||||||
|
If matrix has n rows and k columns we call it a n x k matrix."""
|
||||||
|
num_rows = len(A)
|
||||||
|
num_columns = len(A[0]) if A else 0
|
||||||
|
return num_rows, num_columns
|
||||||
|
|
||||||
|
|
||||||
|
def get_row(A: List[List[float]], i) -> List[float]:
|
||||||
|
"""Returns the ith row from a matrix."""
|
||||||
|
return A[i]
|
||||||
|
|
||||||
|
|
||||||
|
def get_column(A: List[List[float]], j) -> List[float]:
|
||||||
|
"""Returns the jth column from a matrix."""
|
||||||
|
return [A_i[j] for A_i in A]
|
||||||
|
|
||||||
|
|
||||||
|
def make_matrix(
|
||||||
|
num_rows: int, num_colums: int, entry_fn: Callable
|
||||||
|
) -> List[List[float]]:
|
||||||
|
"""Creates a n x k matrix whose (i, j)th entry is entry(i, j)."""
|
||||||
|
return [[entry_fn(i, j) for j in range(num_colums)] for i in range(num_rows)]
|
||||||
|
|
||||||
|
|
||||||
|
def is_diagonal(i , j) -> int:
|
||||||
|
"""1's on the 'diagonal', 0's everywhere else."""
|
||||||
|
return 1 if i == j else 0
|
@ -0,0 +1,23 @@
|
|||||||
|
from collections import Counter
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
num_friends = [100, 49, 41, 40, 25, 21, 21, 19, 19, 18, 18, 16, 15, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 10,
|
||||||
|
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||||
|
9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6,
|
||||||
|
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1]
|
||||||
|
|
||||||
|
friend_counts = Counter(num_friends)
|
||||||
|
|
||||||
|
xs = range(101) # largest value is just 100
|
||||||
|
ys = [friend_counts[x] for x in xs] # Height is just number of friends
|
||||||
|
plt.bar(xs, ys)
|
||||||
|
plt.axis([0, 101, 0, 25])
|
||||||
|
plt.title('Histogram of friend counts')
|
||||||
|
plt.xlabel('# of Friends')
|
||||||
|
plt.ylabel('# of People')
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
plt.show()
|
@ -0,0 +1,96 @@
|
|||||||
|
import math
|
||||||
|
from collections import Counter
|
||||||
|
from typing import List, Union
|
||||||
|
from .friends3 import num_friends
|
||||||
|
from ch4.vector import sum_of_squares
|
||||||
|
|
||||||
|
num_points = len(num_friends)
|
||||||
|
largest_value = max(num_friends)
|
||||||
|
min_value = min(num_friends)
|
||||||
|
sorted_values = sorted(num_friends)
|
||||||
|
smallest_value = sorted_values[0]
|
||||||
|
second_smallest_value = sorted_values[1]
|
||||||
|
second_largest_value = sorted_values[-2]
|
||||||
|
|
||||||
|
|
||||||
|
# Central Tendency
|
||||||
|
|
||||||
|
|
||||||
|
def mean(seq: List[Union[int, float]]) -> float:
|
||||||
|
"""Returns the arithmetic mean of a list of integers."""
|
||||||
|
return sum(seq) / len(seq)
|
||||||
|
|
||||||
|
|
||||||
|
mean_friendships = mean(num_friends)
|
||||||
|
|
||||||
|
|
||||||
|
def median(seq: List[Union[int, float]]) -> float:
|
||||||
|
"""Calculates the median value of a list of integers."""
|
||||||
|
sorted_seq = sorted(seq)
|
||||||
|
seq_len = len(sorted_seq)
|
||||||
|
mid_point_index = seq_len // 2
|
||||||
|
if seq_len % 2 == 0: # even case
|
||||||
|
return (sorted_seq[mid_point_index] + sorted_seq[mid_point_index + 1]) / 2
|
||||||
|
else:
|
||||||
|
return sorted_seq[mid_point_index]
|
||||||
|
|
||||||
|
|
||||||
|
def quantile(seq: List[Union[int, float]], pth: float) -> Union[int, float]:
|
||||||
|
"""Returns the pth-percentile value"""
|
||||||
|
p_index = int(pth * len(seq))
|
||||||
|
sorted_seq = sorted(seq)
|
||||||
|
return sorted_seq[p_index]
|
||||||
|
|
||||||
|
|
||||||
|
def mode(seq: List[Union[int, float]]) -> List[Union[int, float]]:
|
||||||
|
"""Returns a list of the most common values."""
|
||||||
|
counts = Counter(seq)
|
||||||
|
|
||||||
|
max_value = max(counts.values())
|
||||||
|
|
||||||
|
results = [x_i for x_i, count in counts.items() if count == max_value]
|
||||||
|
return sorted(results)
|
||||||
|
|
||||||
|
|
||||||
|
def data_range(seq: List[Union[int, float]]) -> float:
|
||||||
|
"""A measure of data dispersion. The Spread being the difference between
|
||||||
|
max and min values.
|
||||||
|
|
||||||
|
Outliers are still a concern withing the provided data.
|
||||||
|
"""
|
||||||
|
return max(seq) - min(seq)
|
||||||
|
|
||||||
|
|
||||||
|
def _de_mean(seq: List[Union[int, float]]) -> List[Union[int, float]]:
|
||||||
|
"""Translates a sequence of integers by subtracting the mean producing
|
||||||
|
a list of deviations from the mean."""
|
||||||
|
x_bar = mean(seq)
|
||||||
|
return [x_i - x_bar for x_i in seq]
|
||||||
|
|
||||||
|
|
||||||
|
def variance(seq: List[Union[int, float]]) -> float:
|
||||||
|
"""Determines the variance within a data set from the mean. Note
|
||||||
|
variance is returned as the square of whatever units were provided.
|
||||||
|
If observations were of inches this would return a float value
|
||||||
|
in inches squared.
|
||||||
|
"""
|
||||||
|
assert len(seq) >= 2
|
||||||
|
n = len(seq)
|
||||||
|
deviations = _de_mean(seq)
|
||||||
|
return sum_of_squares(deviations) / (n - 1)
|
||||||
|
|
||||||
|
|
||||||
|
def standard_deviation(seq: List[Union[int, float]]) -> float:
|
||||||
|
"""A measure of dispersion with the same units as the data set.
|
||||||
|
Easier to reason about if for example your data set was the
|
||||||
|
count of 'Number of friends'.
|
||||||
|
|
||||||
|
Outliers are still a concern withing the provided data.
|
||||||
|
"""
|
||||||
|
return math.sqrt(variance(seq))
|
||||||
|
|
||||||
|
|
||||||
|
def interquartile_range(seq: List[Union[int, float]]) -> float:
|
||||||
|
"""A more robust measure of dispersion. Is less affected by
|
||||||
|
a small number of outliers."""
|
||||||
|
return float(quantile(seq, .75) - quantile(seq, .25))
|
Loading…
Reference in New Issue