From 4b088507563089a5a8d12583fac7e3ebfd04662f Mon Sep 17 00:00:00 2001 From: androiddrew Date: Sun, 10 Feb 2019 13:16:41 -0500 Subject: [PATCH] Finishing out chapter 3 --- ch3/README.md | 13 +++++++++++++ ch3/multiline_chart.py | 16 ++++++++++++++++ ch3/scatter_plot.py | 19 +++++++++++++++++++ ch3/scatter_plot_and_scales.py | 22 ++++++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 ch3/README.md create mode 100644 ch3/multiline_chart.py create mode 100644 ch3/scatter_plot.py create mode 100644 ch3/scatter_plot_and_scales.py diff --git a/ch3/README.md b/ch3/README.md new file mode 100644 index 0000000..8e92485 --- /dev/null +++ b/ch3/README.md @@ -0,0 +1,13 @@ +# Chapter 3 + +## Takeaway + +This chapter gives a brief introduction to simple visualizations with matplotlib. It can do a lot more than this though. + +For further resources you should check out: + +- [Seaborn](https://seaborn.pydata.org/) a framework built on top of matplotlib. Produces more complex and prettier visualizations. + +- [Bokeh](https://bokeh.pydata.org/en/latest/) is an interactive visualization library that targets modern web browsers for presentation. Its goal is to provide elegant, concise construction of versatile graphics, and to extend this capability with high-performance interactivity over very large or streaming datasets. + +- [ggplot](http://ggplot.yhathq.com/) Which is a python port of the R library's ggplot2. \ No newline at end of file diff --git a/ch3/multiline_chart.py b/ch3/multiline_chart.py new file mode 100644 index 0000000..993c8ea --- /dev/null +++ b/ch3/multiline_chart.py @@ -0,0 +1,16 @@ +from matplotlib import pyplot as plt + +variance = [1, 2, 4, 8, 16, 32, 64, 128, 256] +bias_squared = [256, 128, 64, 32, 16, 8, 4, 2, 1] +total_error = [x + y for x, y in zip(variance, bias_squared)] +xs = range(0, len(variance)) + + +plt.plot(xs, variance, "g-", label="variance") +plt.plot(xs, bias_squared, "r-", label="bias squared") +plt.plot(xs, total_error, "b:", label="total error") + +plt.legend(loc=9) +plt.xlabel("mode complixity") +plt.title("The Bias-Variance Tradeoff") +plt.show() diff --git a/ch3/scatter_plot.py b/ch3/scatter_plot.py new file mode 100644 index 0000000..8c998a3 --- /dev/null +++ b/ch3/scatter_plot.py @@ -0,0 +1,19 @@ +from matplotlib import pyplot as plt + +friends = [70, 65, 72, 63, 71, 64, 60, 64, 67, ] +minutes = [175, 170, 205, 120, 220, 130, 105, 145, 190] +labels = [l for l in 'abcdefghi'] + +plt.scatter(friends, minutes) + +for label, friend_count, minute_count in zip(labels, friends, minutes): + plt.annotate(label, + xy=(friend_count, minute_count), + xytext=(5, -5), + textcoords='offset points') + +plt.title("Daily Minutes vs. Number of Friends") +plt.xlabel("X of friends") +plt.ylabel("Daily minutes spent on site") +plt.axis([58, 74, 80, 240]) +plt.show() diff --git a/ch3/scatter_plot_and_scales.py b/ch3/scatter_plot_and_scales.py new file mode 100644 index 0000000..6e5a7ad --- /dev/null +++ b/ch3/scatter_plot_and_scales.py @@ -0,0 +1,22 @@ +""" +This is to demonstrate why it may be necessary to adjust the scale of your visualization. + +This particular example works because we are scatter plotting two comparable variables. +We have to change the scale to make sure that we aren't getting a misleading picture of the +relationship. +""" +from matplotlib import pyplot as plt + + +test_grades1 = [99, 90, 85, 97, 80] +test_grades2 = [100, 85, 60, 90, 70] + +plt.scatter(test_grades1, test_grades2) +plt.title("Axes Aren't Comparable") +plt.xlabel("Test 1 grade") +plt.ylabel("Test 2 grade") +# Uncomment this line below to realign the axes +plt.axis([40, 120, 40, 120]) +# plt.axis('equal') +plt.show() +