from collections import Counter, defaultdict from typing import List, Dict, Tuple, Any USERS = [ {"id": 0, "name": "Hero"}, {"id": 1, "name": "Dunn"}, {"id": 2, "name": "Sue"}, {"id": 3, "name": "Chi"}, {"id": 4, "name": "Thor"}, {"id": 5, "name": "Clive"}, {"id": 6, "name": "Hicks"}, {"id": 7, "name": "Devin"}, {"id": 8, "name": "Kate"}, {"id": 9, "name": "Klein"}, ] FRIENDSHIPS = [ (0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4), (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9), ] INTERETS = [ (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"), (0, "Spark"), (0, "Storm"), (0, "Cassandra"), (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"), (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"), (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"), (3, "statistics"), (3, "regression"), (3, "probability"), (4, "machine learning"), (4, "regression"), (4, "decision trees"), (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"), (5, "Haskell"), (5, "programming languages"), (6, "statistics"), (6, "probability"), (6, "mathematics"), (6, "theory"), (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"), (7, "neural networks"), (8, "neural networks"), (8, "deep learning"), (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"), (9, "Java"), (9, "MapReduce"), (9, "Big Data"), ] def add_friendships( users: List[Dict], friendships: List[Tuple[int, int]] ) -> List[Dict]: for user in users: user["friends"] = [] for i, j in friendships: users[i]["friends"].append(users[j]) # add j as friend of i users[j]["friends"].append(users[i]) # add j as friend of i return users def number_of_friends(user: Dict) -> int: return len(user["friends"]) def friend_counts(users: List[Dict]) -> List[Tuple[int, int]]: """Returns a sorted list by number of friends""" number_of_friends_by_id = [(user["id"], number_of_friends(user)) for user in users] _sorted_list = sorted(number_of_friends_by_id, key=lambda tup: tup[1], reverse=True) return _sorted_list def friend_of_friend_ids_bad(user: Dict[str, Any]) -> List[int]: return [foaf["id"] for friend in user["friends"] for foaf in friend["friends"]] def find_friend_of_friends(user: Dict) -> List[int]: """Returns the unique list of ids for a user's friends of a friend.""" foaf = { foaf.get("id") for friend in user.get("friends") for foaf in friend.get("friends") } difference_set = {friend.get("id") for friend in user.get("friends")} difference_set.add(user.get("id")) return list(foaf.difference(difference_set)) def not_the_same(user: Dict, other_user: Dict) -> bool: """tests for user equivalence. If this was OOP this would have used __eq__ """ return user.get("id") != other_user.get("id") def not_friends(user: Dict, other_user: Dict) -> bool: """Tests for active friendship.""" return all(not_the_same(friend, other_user) for friend in user.get("friends")) def friends_of_friends_ids(user: Dict[str, Any]) -> Counter: return Counter( foaf.get("id") for friend in user.get("friends") for foaf in friend.get("friends") if not_the_same(user, foaf) and not_friends(user, foaf) ) def data_scientist_who_like(target_interst: str) -> List[int]: """returns a list of DS ids if they have a particular target_interest. This is not a very efficient function since it always has to search through. The entire list of interests. """ return [user_id for user_id, interest in INTERETS if target_interst == interest] def build_interest_to_user_index( interests: List[Tuple[int, "str"]] ) -> Dict[str, List[int]]: user_ids_by_interest = defaultdict(list) for user_id, interest in interests: user_ids_by_interest[interest].append(user_id) return user_ids_by_interest def build_user_to_interest_index( interests: List[Tuple[int, "str"]] ) -> Dict[int, List[str]]: interests_by_user_id = defaultdict(list) for user_id, interest in interests: interests_by_user_id[user_id].append(interest) return interests_by_user_id def most_common_interests_with(user, user_index, interest_index): return Counter( interested_user_id for interest in user_index.get(user.get("id")) for interested_user_id in interest_index.get(interest) if user.get("id") != interested_user_id ) # Write a function that will produce a count of users expressing an interest def build_interest_counter(interests: List[Tuple[int, str]]) -> Counter: return Counter( word for user_id, interest in interests for word in interest.lower().split() ) def print_counter(counter: Counter): for word, count in counter.items(): print(f"{word}: {count}") if __name__ == "__main__": users = add_friendships(USERS, FRIENDSHIPS) total_connections = sum(number_of_friends(user) for user in users) num_users = len(users) avg_connections = total_connections / num_users counts = friend_counts(users) heros_suggested_friends = friend_of_friend_ids_bad(users[0]) better_friend_suggestion = find_friend_of_friends(users[0]) chis_friend_suggestions_with_counts = friends_of_friends_ids(users[3]) print(f"Total user count: {num_users}") print(f"Total number of connections: {total_connections}") print(f"Average connections: {avg_connections}") print(counts) print(heros_suggested_friends) print(better_friend_suggestion) print(f"Chi's friend suggestions: {chis_friend_suggestions_with_counts}") print("-" * 100) user_idx = build_user_to_interest_index(INTERETS) interest_idx = build_interest_to_user_index(INTERETS) chis_common_interests = most_common_interests_with(users[3], user_idx, interest_idx) print("Chi's most users with greatest interest overlap:") print(chis_common_interests) print("-" * 100) interest_counter = build_interest_counter(INTERETS) print_counter(interest_counter)