Visualisation of one million Uber requests in New York.

Here’s how I made this.

First I grabbed the data from fivethirtyeight’s github respository. There’s five more months of data which could also be used.

import pandas as pd
df = pd.DataFrame()
df.read_csv('uber-trip-data/uber-raw-data-' + sep + '14.csv')
df = df.set_index(pd.DatetimeIndex(df['Date/Time']))

I created 4320 10 minutes buckets for September. There must be an easy way to do this natively with Pandas but I couldn’t figure it out.

import datetime
import time

time_slices = []
start = datetime.datetime(2014, 9, 1)
for i in xrange(4320):
    end = start + datetime.timedelta(minutes = 10)
    time_to_print = start.strftime("%a %b %-d %Y %H:%M")
    time_slices.append((df[end.strftime("%Y-%m-%d")].between_time(start.strftime("%H:%M"), end.strftime("%H:%M"))))
    start = end

I plot the points using Bokeh. This code excludes the day/night cycle because I’m not sure it works that well.

# most of this is from:
from bokeh.document import Document
from bokeh.embed import file_html
from bokeh.models.glyphs import Circle
from bokeh.plotting import figure, show
from bokeh.models import (
    GMapPlot, Range1d, ColumnDataSource, LinearAxis,
    PanTool, WheelZoomTool, BoxSelectTool,
    BoxSelectionOverlay, GMapOptions,
    NumeralTickFormatter, PrintfTickFormatter)
from bokeh.resources import INLINE

x_range = Range1d()
y_range = Range1d()

map_options = GMapOptions(lat=40.7470, lng=-73.9860, map_type="roadmap", zoom=14, styles="""

for c in xrange(0,len(time_slices)):
    plot = GMapPlot(
    x_range=x_range, y_range=y_range,
    title = "New York", plot_width=2560, plot_height=1600 )
    doc = Document()
    count = c * 100
    lat = time_slices1['Lat'].tolist()
    lon = time_slices1['Lon'].tolist()
    color = [ 'red' for i in xrange(len(lat)) ] 
    new_source = ColumnDataSource(
        data=dict(lat=lat, lon=lon, color=color)
    circle = Circle(x="lon", y="lat", size=10, fill_color="red", line_color="black")
    plot.add_glyph(new_source, circle)

    filename = "maps-" + str(c) + ".html"
    with open(filename, "w") as f:
        f.write(file_html(doc, INLINE, "Google Maps Example"))
        print("Wrote %s" % filename)

At the moment Bokeh doesn’t appear to have a way to save plots, and I couldn’t get the look I wanted with other plotting packages so I decided to just take 4320 screenshots of the html pages I wrote using selenium.

from selenium import webdriver
for i in xrange(1,4320):
browser = webdriver.Firefox()
browser.get('file:///Users/richard/Dropbox/uber_data/maps-' + str(i) + '.html')
browser.save_screenshot('screenie' + str(i) + '.png')

Then I cropped the images, and draw the timestamp on each frame.

from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw
import datetime
import time

def crop_timestamp_save(file, time):
    image ='./screenshots/' + file)
    print image.size
    cropped_image = image.crop((100, 180, 2500, 1600))
    draw = ImageDraw.Draw(cropped_image)
    font = ImageFont.truetype("roboto/Roboto-Black.ttf", 36)
    draw.text((40,50), time, (255,255,255), font=font)"./cropped_screenshots/cropped_" + file)

start = datetime.datetime(2014, 9, 1)
for i in xrange(1,5):
    time_to_print = start.strftime("%a %b %-d %Y %H:%M")
    start += datetime.timedelta(minutes = 10)
    crop_timestamp_save("screenie" + str(i) + ".png", time_to_print)

Finally I created movie with all the screenshots using ffpeg.

ffmpeg -start_number 1 -i screenshot%d.png -vcodec libx264 uber.avi
ffmpeg -i uber.avi -i audio.mp3 -codec copy -shortest uber2.avi

I’ll probably post a Jupyter Notebook to github when I’ve tidied it up a little.