You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
2.9 KiB
Python

import math
from collections import Counter
from typing import List, Union
from .friends3 import num_friends
from ch4.vector import sum_of_squares
num_points = len(num_friends)
largest_value = max(num_friends)
min_value = min(num_friends)
sorted_values = sorted(num_friends)
smallest_value = sorted_values[0]
second_smallest_value = sorted_values[1]
second_largest_value = sorted_values[-2]
# Central Tendency
def mean(seq: List[Union[int, float]]) -> float:
"""Returns the arithmetic mean of a list of integers."""
return sum(seq) / len(seq)
mean_friendships = mean(num_friends)
def median(seq: List[Union[int, float]]) -> float:
"""Calculates the median value of a list of integers."""
sorted_seq = sorted(seq)
seq_len = len(sorted_seq)
mid_point_index = seq_len // 2
if seq_len % 2 == 0: # even case
return (sorted_seq[mid_point_index] + sorted_seq[mid_point_index + 1]) / 2
else:
return sorted_seq[mid_point_index]
def quantile(seq: List[Union[int, float]], pth: float) -> Union[int, float]:
"""Returns the pth-percentile value"""
p_index = int(pth * len(seq))
sorted_seq = sorted(seq)
return sorted_seq[p_index]
def mode(seq: List[Union[int, float]]) -> List[Union[int, float]]:
"""Returns a list of the most common values."""
counts = Counter(seq)
max_value = max(counts.values())
results = [x_i for x_i, count in counts.items() if count == max_value]
return sorted(results)
def data_range(seq: List[Union[int, float]]) -> float:
"""A measure of data dispersion. The Spread being the difference between
max and min values.
Outliers are still a concern withing the provided data.
"""
return max(seq) - min(seq)
def _de_mean(seq: List[Union[int, float]]) -> List[Union[int, float]]:
"""Translates a sequence of integers by subtracting the mean producing
a list of deviations from the mean."""
x_bar = mean(seq)
return [x_i - x_bar for x_i in seq]
def variance(seq: List[Union[int, float]]) -> float:
"""Determines the variance within a data set from the mean. Note
variance is returned as the square of whatever units were provided.
If observations were of inches this would return a float value
in inches squared.
"""
assert len(seq) >= 2
n = len(seq)
deviations = _de_mean(seq)
return sum_of_squares(deviations) / (n - 1)
def standard_deviation(seq: List[Union[int, float]]) -> float:
"""A measure of dispersion with the same units as the data set.
Easier to reason about if for example your data set was the
count of 'Number of friends'.
Outliers are still a concern withing the provided data.
"""
return math.sqrt(variance(seq))
def interquartile_range(seq: List[Union[int, float]]) -> float:
"""A more robust measure of dispersion. Is less affected by
a small number of outliers."""
return float(quantile(seq, .75) - quantile(seq, .25))