From 8f24419504ca90d77b66fe57d17b47975135fa92 Mon Sep 17 00:00:00 2001 From: Michael Kennedy Date: Tue, 13 Jun 2017 09:50:11 -0700 Subject: [PATCH] The starter code is ready to roll. --- .gitignore | 1 + src/08_perf/setup_steps.md | 16 ++ .../starter_big_dealership/db_stats.py | 24 +++ .../starter_big_dealership/load_data.py | 169 ++++++++++++++++++ .../starter_big_dealership/nosql/car.py | 25 +++ .../starter_big_dealership/nosql/engine.py | 10 ++ .../nosql/mongo_setup.py | 9 + .../starter_big_dealership/nosql/owner.py | 25 +++ .../nosql/service_record.py | 9 + src/08_perf/starter_big_dealership/q_and_a.py | 100 +++++++++++ .../starter_big_dealership/requirements.txt | 3 + .../services/car_service.py | 86 +++++++++ 12 files changed, 477 insertions(+) create mode 100644 src/08_perf/setup_steps.md create mode 100644 src/08_perf/starter_big_dealership/db_stats.py create mode 100644 src/08_perf/starter_big_dealership/load_data.py create mode 100644 src/08_perf/starter_big_dealership/nosql/car.py create mode 100644 src/08_perf/starter_big_dealership/nosql/engine.py create mode 100644 src/08_perf/starter_big_dealership/nosql/mongo_setup.py create mode 100644 src/08_perf/starter_big_dealership/nosql/owner.py create mode 100644 src/08_perf/starter_big_dealership/nosql/service_record.py create mode 100644 src/08_perf/starter_big_dealership/q_and_a.py create mode 100644 src/08_perf/starter_big_dealership/requirements.txt create mode 100644 src/08_perf/starter_big_dealership/services/car_service.py diff --git a/.gitignore b/.gitignore index feb8e4b..c2c7e59 100644 --- a/.gitignore +++ b/.gitignore @@ -107,3 +107,4 @@ src/07_mongoengine/service_central/.idea/modules.xml src/07_mongoengine/service_central/.idea/service_central.iml src/07_mongoengine/service_central/.idea/inspectionProfiles/profiles_settings.xml screencaster.xml +.idea diff --git a/src/08_perf/setup_steps.md b/src/08_perf/setup_steps.md new file mode 100644 index 0000000..b9ba9f0 --- /dev/null +++ b/src/08_perf/setup_steps.md @@ -0,0 +1,16 @@ +This section on performance is best performed with a large database of cars and owners. + +There is a load_data part of the app which will generate this database but it will take a long time (30 minutes or something like this). + +To make things faster, I have included a DB which can be imported in `REPO/data/dealership_db_250k.zip` + +To use this database, you simple need to unzip and then restore it with the following command: + +In the terminal / command line change into the extracted folder containing the `*.bson` and `*.json`, then type: + +`mongorestore --drop --db dealership ./` + +On Windows, use `.\` rather than `./` + +Now you should have a dealership db in MongoDB. Be sure to check the indexes. You may need to drop them (other than `_id` indexes). + diff --git a/src/08_perf/starter_big_dealership/db_stats.py b/src/08_perf/starter_big_dealership/db_stats.py new file mode 100644 index 0000000..b85a99e --- /dev/null +++ b/src/08_perf/starter_big_dealership/db_stats.py @@ -0,0 +1,24 @@ +from nosql import mongo_setup +from nosql.car import Car +from nosql.owner import Owner + + +def main(): + mongo_setup.init() + + print("Computing stats, this WILL take awhile...", flush=True) + + cars = list(Car.objects()) + print("There are {:,} cars.".format(len(cars))) + + owners = list(Owner.objects()) + print("There are {:,} owners.".format(len(owners))) + owned_cars = sum((len(o.car_ids) for o in owners)) + print("Each owner owns an average of {:.2f} cars.".format(owned_cars / len(owners))) + + service_histories = sum((len(c.service_history) for c in cars)) + print("There are {:,} service histories.".format(service_histories)) + print("Each car has an average of {:.2f} service records.".format(service_histories / len(cars))) + + +main() diff --git a/src/08_perf/starter_big_dealership/load_data.py b/src/08_perf/starter_big_dealership/load_data.py new file mode 100644 index 0000000..a8138c9 --- /dev/null +++ b/src/08_perf/starter_big_dealership/load_data.py @@ -0,0 +1,169 @@ +import nosql.mongo_setup as mongo_setup +import services.car_service as car_service +from nosql.car import Car +from nosql.engine import Engine +from nosql.owner import Owner + +from datetime import datetime +import random +from faker import Faker + +from nosql.service_record import ServiceRecord + + +def main(): + # large data DB example + car_count = 250_000 + owner_count = 100_000 + + # simple DB example + # car_count = 200 + # owner_count = 100 + + mongo_setup.init() + clear_db() + + t0 = datetime.now() + + fake = create_faker_and_seed() + owners = create_owners(fake, count=owner_count) + print("Created {:,.0f} owners".format(len(owners))) + cars = create_cars(count=car_count) + print("Created {:,.0f} cars".format(len(cars))) + if cars and owners: + add_cars_to_owners(owners, cars) + create_service_records(cars, fake) + + dt = datetime.now() - t0 + print("Done in {} sec".format(dt.total_seconds())) + + +models = [ + 'Ferrari 488 GTB', + 'Ferrari 360 modena', + 'F430', + '599 GTB Fiorano', + '458 Italia', + 'LaFerrari', + 'Testarossa', + 'F12 Berlinetta', + '308 GTB/GTS', + 'F355', + 'California', + '575M Maranello', + 'F50', + 'F40', + 'Enzo Ferrari', +] + +service_operations = [ + ('Oil change', 200), + ('New tires', 1000), + ('New engine', 15000), + ('Body repair', 4000), + ('New seat', 5000), + ('Tune up', 1500), + ('Air filter', 100), + ('Flat tire', 200), +] + + +def create_faker_and_seed(): + fake = Faker() + fake.seed(42) + random.seed(42) + return fake + + +def clear_db(): + Car.drop_collection() + Owner.drop_collection() + + +def create_owners(fake, count=100): + datetime_start = datetime(year=2000, month=1, day=1) + datetime_end = datetime(year=datetime.now().year, month=1, day=1) + + owners = [] + print("Building owners") + for _ in range(0, count): + owner = Owner() + owner.name = fake.name() + owner.created = fake.date_time_between_dates(datetime_start=datetime_start, + datetime_end=datetime_end, + tzinfo=None) + owners.append(owner) + + print("Saving owners") + Owner.objects().insert(owners, load_bulk=True) + + return list(Owner.objects()) + + +def create_cars(count=200): + current_car_count = Car.objects().count() + if current_car_count >= count: + print("There are currently {:,} cars. Skipping create.") + return [] + + count = count - current_car_count + + hp_factor = 660 + mpg_factor = 21 + liters_factor = 4 + + cars = [] + print("Building cars...") + for _ in range(0, count): + model = random.choice(models) + make = 'Ferrari' + year = random.randint(1985, datetime.now().year) + mileage = random.randint(0, 150000) + + mpg = int((mpg_factor + mpg_factor * random.random() / 4) * 10) / 10.0 + horsepower = int(hp_factor + hp_factor * random.random() / 2) + liters = int((liters_factor + liters_factor * random.random() / 2) * 100) / 100.0 + + engine = Engine(horsepower=horsepower, liters=liters, mpg=mpg) + car = Car(model=model, make=make, year=year, engine=engine, mileage=mileage) + cars.append(car) + + print("Saving cars...") + Car.objects().insert(cars) + + return list(Car.objects()) + + +def add_cars_to_owners(owners: list, cars: list): + for o in owners: + counter = random.randint(0, 5) + for _ in range(0, counter): + car = random.choice(cars) + car_service.add_owner(o.id, car.id) + + +def create_service_records(cars, fake): + datetime_start = datetime(year=2000, month=1, day=1) + datetime_end = datetime(year=datetime.now().year, month=1, day=1) + + for car in cars: + counter = random.randint(0, 10) + is_positive = random.randint(0, 1) == 1 + for _ in range(0, counter): + s = random.choice(service_operations) + sr = ServiceRecord() + sr.description = s[0] + sr.date = fake.date_time_between_dates(datetime_start=datetime_start, + datetime_end=datetime_end, + tzinfo=None) + sr.price = int(s[1] + (random.random() - .5) * s[1] / 4) + if is_positive: + sr.customer_rating = random.randint(4, 5) + else: + sr.customer_rating = random.randint(1, 3) + car.service_history.append(sr) + car.save() + + +if __name__ == '__main__': + main() diff --git a/src/08_perf/starter_big_dealership/nosql/car.py b/src/08_perf/starter_big_dealership/nosql/car.py new file mode 100644 index 0000000..0def06f --- /dev/null +++ b/src/08_perf/starter_big_dealership/nosql/car.py @@ -0,0 +1,25 @@ +import uuid +import mongoengine + +from nosql.engine import Engine +from nosql.service_record import ServiceRecord + + +class Car(mongoengine.Document): + model = mongoengine.StringField(required=True) + make = mongoengine.StringField(required=True) + year = mongoengine.IntField(required=True) + mileage = mongoengine.IntField(default=0) + vi_number = mongoengine.StringField(default=lambda: str(uuid.uuid4()).replace("-", '')) + + engine = mongoengine.EmbeddedDocumentField(Engine, required=True) + service_history = mongoengine.EmbeddedDocumentListField(ServiceRecord) + + # no need to reference owners here, that is entirely contained in owner class + + meta = { + 'db_alias': 'core', + 'collection': 'cars', + 'indexes': [ + ] + } diff --git a/src/08_perf/starter_big_dealership/nosql/engine.py b/src/08_perf/starter_big_dealership/nosql/engine.py new file mode 100644 index 0000000..512b163 --- /dev/null +++ b/src/08_perf/starter_big_dealership/nosql/engine.py @@ -0,0 +1,10 @@ +import uuid + +import mongoengine + + +class Engine(mongoengine.EmbeddedDocument): + horsepower = mongoengine.IntField(required=True) + liters = mongoengine.FloatField(required=True) + mpg = mongoengine.FloatField(required=True) + serial_number = mongoengine.StringField(default=lambda: str(uuid.uuid4())) diff --git a/src/08_perf/starter_big_dealership/nosql/mongo_setup.py b/src/08_perf/starter_big_dealership/nosql/mongo_setup.py new file mode 100644 index 0000000..7558b02 --- /dev/null +++ b/src/08_perf/starter_big_dealership/nosql/mongo_setup.py @@ -0,0 +1,9 @@ +import mongoengine + +alias_core = 'core' + + +def init(): + db = 'dealership' + # Other connection options here (server, port, username, etc.) + mongoengine.register_connection(alias=alias_core, name=db) diff --git a/src/08_perf/starter_big_dealership/nosql/owner.py b/src/08_perf/starter_big_dealership/nosql/owner.py new file mode 100644 index 0000000..08a8a44 --- /dev/null +++ b/src/08_perf/starter_big_dealership/nosql/owner.py @@ -0,0 +1,25 @@ +from datetime import datetime + +import mongoengine + + +class Owner(mongoengine.Document): + # show off required (not available in mongo or pymongo directly) + name = mongoengine.StringField(required=True) + + # show off default + created = mongoengine.DateTimeField(default=datetime.now) + + # allows us to use $set and $inc + number_of_visits = mongoengine.IntField(default=0) + + # show off many-to-many modeling with one sided list field + # cars can have multiple owners and an owner can own multiple cares + car_ids = mongoengine.ListField(mongoengine.ObjectIdField()) + + meta = { + 'db_alias': 'core', + 'collection': 'owners', + 'indexes': [ + ] + } diff --git a/src/08_perf/starter_big_dealership/nosql/service_record.py b/src/08_perf/starter_big_dealership/nosql/service_record.py new file mode 100644 index 0000000..3a89d24 --- /dev/null +++ b/src/08_perf/starter_big_dealership/nosql/service_record.py @@ -0,0 +1,9 @@ +import datetime +import mongoengine + + +class ServiceRecord(mongoengine.EmbeddedDocument): + date = mongoengine.DateTimeField(default=datetime.datetime.now) + description = mongoengine.StringField() + price = mongoengine.FloatField(required=True) + customer_rating = mongoengine.IntField(required=True) # 1 - 5 satisfaction level. diff --git a/src/08_perf/starter_big_dealership/q_and_a.py b/src/08_perf/starter_big_dealership/q_and_a.py new file mode 100644 index 0000000..23fcb8c --- /dev/null +++ b/src/08_perf/starter_big_dealership/q_and_a.py @@ -0,0 +1,100 @@ +from nosql.car import Car +from nosql.owner import Owner +from datetime import datetime +import nosql.mongo_setup as mongo_setup + + +def timed(msg, func): + t0 = datetime.now() + + func() + + dt = datetime.now() - t0 + print("{} Time: {:,.3f} ms".format(msg, dt.total_seconds() * 1000.0), flush=True) + + +mongo_setup.init() + +print("Time to ask some questions") + +timed( + 'How many owners?', + lambda: Owner.objects().filter().count() +) +timed( + 'How many cars?', + lambda: Owner.objects().filter().count() +) + +timed( + 'Find the 10,000th owner?', + lambda: Owner.objects().order_by('name')[10000:10001][0] +) + +owner = Owner.objects().order_by('name')[10000:10001][0] + + +def find_cars_by_owner(owner_id): + the_owner = Owner.objects(id=owner_id).first() + cars = Car.objects().filter(id__in=the_owner.car_ids) + return list(cars) + + +timed( + 'How many cars are owned by the 10,000th owner?', + lambda: find_cars_by_owner(owner.id) +) + + +def find_owners_by_car(car_id): + owners = Owner.objects(car_ids=car_id) + return list(owners) + + +car = Car.objects()[10000:10001][0] +timed( + 'How many owners own the 10,000th car?', + lambda: find_owners_by_car(car.id) +) + +owner50k = Owner.objects()[50000:50001][0] +timed( + 'Find owner 50,000 by name?', + lambda: Owner.objects(name=owner50k.name).first() +) + +timed( + 'Cars with expensive service?', + lambda: Car.objects(service_history__price__gt=16800).count() +) + +timed( + 'Cars with expensive service and spark plugs?', + lambda: Car.objects(service_history__price__gt=16800, service_history__description='Spark plugs').count() +) + +timed( + 'Load cars with expensive service and spark plugs?', + lambda: list(Car.objects(service_history__price__gt=16800, service_history__description='Spark plugs')) +) + +timed( + 'Load car name and ids with expensive service and spark plugs?', + lambda: list(Car.objects(service_history__price__gt=16800, service_history__description='Spark plugs') + .only('make', 'model', 'id')) +) + +timed( + 'Highly rated, high price service events?', + lambda: Car.objects(service_history__customer_rating=5, service_history__price__gt=16800).count() +) + +timed( + 'Low rated, low price service events?', + lambda: Car.objects(service_history__customer_rating=1, service_history__price__lt=50).count() +) + +timed( + 'How many high mileage cars?', + lambda: Car.objects(mileage__gt=140000).count() +) diff --git a/src/08_perf/starter_big_dealership/requirements.txt b/src/08_perf/starter_big_dealership/requirements.txt new file mode 100644 index 0000000..56cbb75 --- /dev/null +++ b/src/08_perf/starter_big_dealership/requirements.txt @@ -0,0 +1,3 @@ +mongoengine +faker +python-dateutil # for DateTimeField parsing diff --git a/src/08_perf/starter_big_dealership/services/car_service.py b/src/08_perf/starter_big_dealership/services/car_service.py new file mode 100644 index 0000000..41e5a4e --- /dev/null +++ b/src/08_perf/starter_big_dealership/services/car_service.py @@ -0,0 +1,86 @@ +import typing + +import bson +import datetime + +from nosql.car import Car +from nosql.engine import Engine +from nosql.owner import Owner +from nosql.service_record import ServiceRecord + + +def create_owner(name: str) -> Owner: + owner = Owner(name=name) + owner.save() + + return owner + + +def create_car(model: str, make: str, year: int, + horsepower: int, liters: float, + mpg: float, mileage: int) -> Car: + engine = Engine(horsepower=horsepower, liters=liters, mpg=mpg) + car = Car(model=model, make=make, year=year, engine=engine, mileage=mileage) + car.save() + + return car + + +def record_visit(customer): + Owner.objects(name=customer).update_one(inc__number_of_visits=1) + + +def find_cars_by_make(make) -> Car: + car = Car.objects(make=make).first() + return car + + +def find_owner_by_name(name) -> Owner: + t0 = datetime.datetime.now() + owner = Owner.objects(name=name).first() + dt = datetime.datetime.now() - t0 + print("Owner found in {} ms".format(dt.total_seconds() * 1000)) + + return owner + + +def find_owner_by_id(owner_id) -> Owner: + owner = Owner.objects(id=owner_id).first() + return owner + + +def find_cars_with_bad_service(limit=10) -> typing.List[Car]: + cars = Car.objects(service_history__customer_rating__lt=4)[:limit] + return list(cars) + + +def percent_cars_with_bad_service(): + t0 = datetime.datetime.now() + bad = Car.objects().filter(service_history__customer_rating__lte=1).count() + dt = datetime.datetime.now() - t0 + print("bad computed in {} ms, bad: {:,}".format(dt.total_seconds() * 1000, bad)) + + all_cars = Car.objects().count() + + percent = 100 * bad / max(all_cars, 1) + return percent + + +def find_car_by_id(car_id: bson.ObjectId) -> Car: + car = Car.objects(id=car_id).first() + Car.objects().filter(id=car_id).first() + return car + + +def add_service_record(car_id, description, price, customer_rating): + record = ServiceRecord(description=description, price=price, customer_rating=customer_rating) + + res = Car.objects(id=car_id).update_one(push__service_history=record) + if res != 1: + raise Exception("No car with id {}".format(car_id)) + + +def add_owner(owner_id, car_id): + res = Owner.objects(id=owner_id).update_one(add_to_set__car_ids=car_id) + if res != 1: + raise Exception("No owner with id {}".format(owner_id))