You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
96 lines
2.9 KiB
Python
96 lines
2.9 KiB
Python
6 years ago
|
import math
|
||
|
from collections import Counter
|
||
|
from typing import List, Union
|
||
|
from .friends3 import num_friends
|
||
|
from ch4.vector import sum_of_squares
|
||
|
|
||
|
num_points = len(num_friends)
|
||
|
largest_value = max(num_friends)
|
||
|
min_value = min(num_friends)
|
||
|
sorted_values = sorted(num_friends)
|
||
|
smallest_value = sorted_values[0]
|
||
|
second_smallest_value = sorted_values[1]
|
||
|
second_largest_value = sorted_values[-2]
|
||
|
|
||
|
|
||
|
# Central Tendency
|
||
|
|
||
|
|
||
|
def mean(seq: List[Union[int, float]]) -> float:
|
||
|
"""Returns the arithmetic mean of a list of integers."""
|
||
|
return sum(seq) / len(seq)
|
||
|
|
||
|
|
||
|
mean_friendships = mean(num_friends)
|
||
|
|
||
|
|
||
|
def median(seq: List[Union[int, float]]) -> float:
|
||
|
"""Calculates the median value of a list of integers."""
|
||
|
sorted_seq = sorted(seq)
|
||
|
seq_len = len(sorted_seq)
|
||
|
mid_point_index = seq_len // 2
|
||
|
if seq_len % 2 == 0: # even case
|
||
|
return (sorted_seq[mid_point_index] + sorted_seq[mid_point_index + 1]) / 2
|
||
|
else:
|
||
|
return sorted_seq[mid_point_index]
|
||
|
|
||
|
|
||
|
def quantile(seq: List[Union[int, float]], pth: float) -> Union[int, float]:
|
||
|
"""Returns the pth-percentile value"""
|
||
|
p_index = int(pth * len(seq))
|
||
|
sorted_seq = sorted(seq)
|
||
|
return sorted_seq[p_index]
|
||
|
|
||
|
|
||
|
def mode(seq: List[Union[int, float]]) -> List[Union[int, float]]:
|
||
|
"""Returns a list of the most common values."""
|
||
|
counts = Counter(seq)
|
||
|
|
||
|
max_value = max(counts.values())
|
||
|
|
||
|
results = [x_i for x_i, count in counts.items() if count == max_value]
|
||
|
return sorted(results)
|
||
|
|
||
|
|
||
|
def data_range(seq: List[Union[int, float]]) -> float:
|
||
|
"""A measure of data dispersion. The Spread being the difference between
|
||
|
max and min values.
|
||
|
|
||
|
Outliers are still a concern withing the provided data.
|
||
|
"""
|
||
|
return max(seq) - min(seq)
|
||
|
|
||
|
|
||
|
def _de_mean(seq: List[Union[int, float]]) -> List[Union[int, float]]:
|
||
|
"""Translates a sequence of integers by subtracting the mean producing
|
||
|
a list of deviations from the mean."""
|
||
|
x_bar = mean(seq)
|
||
|
return [x_i - x_bar for x_i in seq]
|
||
|
|
||
|
|
||
|
def variance(seq: List[Union[int, float]]) -> float:
|
||
|
"""Determines the variance within a data set from the mean. Note
|
||
|
variance is returned as the square of whatever units were provided.
|
||
|
If observations were of inches this would return a float value
|
||
|
in inches squared.
|
||
|
"""
|
||
|
assert len(seq) >= 2
|
||
|
n = len(seq)
|
||
|
deviations = _de_mean(seq)
|
||
|
return sum_of_squares(deviations) / (n - 1)
|
||
|
|
||
|
|
||
|
def standard_deviation(seq: List[Union[int, float]]) -> float:
|
||
|
"""A measure of dispersion with the same units as the data set.
|
||
|
Easier to reason about if for example your data set was the
|
||
|
count of 'Number of friends'.
|
||
|
|
||
|
Outliers are still a concern withing the provided data.
|
||
|
"""
|
||
|
return math.sqrt(variance(seq))
|
||
|
|
||
|
|
||
|
def interquartile_range(seq: List[Union[int, float]]) -> float:
|
||
|
"""A more robust measure of dispersion. Is less affected by
|
||
|
a small number of outliers."""
|
||
|
return float(quantile(seq, .75) - quantile(seq, .25))
|