import math from collections import Counter from typing import List, Union from .friends3 import num_friends from ch4.vector import sum_of_squares num_points = len(num_friends) largest_value = max(num_friends) min_value = min(num_friends) sorted_values = sorted(num_friends) smallest_value = sorted_values[0] second_smallest_value = sorted_values[1] second_largest_value = sorted_values[-2] # Central Tendency def mean(seq: List[Union[int, float]]) -> float: """Returns the arithmetic mean of a list of integers.""" return sum(seq) / len(seq) mean_friendships = mean(num_friends) def median(seq: List[Union[int, float]]) -> float: """Calculates the median value of a list of integers.""" sorted_seq = sorted(seq) seq_len = len(sorted_seq) mid_point_index = seq_len // 2 if seq_len % 2 == 0: # even case return (sorted_seq[mid_point_index] + sorted_seq[mid_point_index + 1]) / 2 else: return sorted_seq[mid_point_index] def quantile(seq: List[Union[int, float]], pth: float) -> Union[int, float]: """Returns the pth-percentile value""" p_index = int(pth * len(seq)) sorted_seq = sorted(seq) return sorted_seq[p_index] def mode(seq: List[Union[int, float]]) -> List[Union[int, float]]: """Returns a list of the most common values.""" counts = Counter(seq) max_value = max(counts.values()) results = [x_i for x_i, count in counts.items() if count == max_value] return sorted(results) def data_range(seq: List[Union[int, float]]) -> float: """A measure of data dispersion. The Spread being the difference between max and min values. Outliers are still a concern withing the provided data. """ return max(seq) - min(seq) def _de_mean(seq: List[Union[int, float]]) -> List[Union[int, float]]: """Translates a sequence of integers by subtracting the mean producing a list of deviations from the mean.""" x_bar = mean(seq) return [x_i - x_bar for x_i in seq] def variance(seq: List[Union[int, float]]) -> float: """Determines the variance within a data set from the mean. Note variance is returned as the square of whatever units were provided. If observations were of inches this would return a float value in inches squared. """ assert len(seq) >= 2 n = len(seq) deviations = _de_mean(seq) return sum_of_squares(deviations) / (n - 1) def standard_deviation(seq: List[Union[int, float]]) -> float: """A measure of dispersion with the same units as the data set. Easier to reason about if for example your data set was the count of 'Number of friends'. Outliers are still a concern withing the provided data. """ return math.sqrt(variance(seq)) def interquartile_range(seq: List[Union[int, float]]) -> float: """A more robust measure of dispersion. Is less affected by a small number of outliers.""" return float(quantile(seq, .75) - quantile(seq, .25))