import requests, cPickle, shutil, time
all = {}
errorout = open("errors.log", "w")
for i in range(600):
playerurl = "http://fantasy.premierleague.com/web/api/elements/%s/"
r = requests.get(playerurl % i)
# skip non-existent players
if r.status_code != 200: continue
all[i] = r.json()
cPickle.dump(all, outfile)
website("http://ipython.org/notebook.html")
import cPickle
players = cPickle.load(open("players.data.pickle"))
players[1]
{u'added': u'2013-07-15T13:21:12 UTC+0000', u'code': 37096, u'current_fixture': u'Crystal Palace (H)', u'element_type_id': 1, u'event_cost': 42, u'event_explain': [[u'Minutes played', 0, 0]], u'event_points': 0, u'event_total': 0, u'first_name': u'Lukasz', u'fixture_history': {u'all': [[u'17 Aug 15:00', 1, u'AVL(H) 1-3', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0], [u'24 Aug 12:45', 2, u'FUL(A) 3-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1322, 45, 0], [u'01 Sep 16:00', 3, u'TOT(H) 1-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1350, 45, 0], [u'14 Sep 15:00', 4, u'SUN(A) 3-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1259, 44, 0], [u'22 Sep 13:30', 5, u'STK(H) 3-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1091, 44, 0], [u'28 Sep 17:30', 6, u'SWA(A) 2-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -852, 44, 0], [u'06 Oct 16:00', 7, u'WBA(A) 1-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -712, 44, 0], [u'19 Oct 15:00', 8, u'NOR(H) 4-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -388, 44, 0], [u'26 Oct 12:45', 9, u'CRY(A) 2-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -343, 43, 0], [u'02 Nov 17:30', 10, u'LIV(H) 2-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -218, 43, 0], [u'10 Nov 16:10', 11, u'MUN(A) 0-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -247, 43, 0], [u'23 Nov 15:00', 12, u'SOU(H) 2-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -221, 43, 0], [u'30 Nov 15:00', 13, u'CAR(A) 3-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -257, 43, 0], [u'04 Dec 19:45', 14, u'HUL(H) 2-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -283, 43, 0], [u'08 Dec 16:00', 15, u'EVE(H) 1-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -251, 43, 0], [u'14 Dec 12:45', 16, u'MCI(A) 3-6', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -213, 42, 0], [u'23 Dec 20:00', 17, u'CHE(H) 0-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -123, 42, 0], [u'26 Dec 15:00', 18, u'WHU(A) 3-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -105, 42, 0], [u'29 Dec 13:30', 19, u'NEW(A) 1-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -103, 42, 0], [u'01 Jan 15:00', 20, u'CAR(H) 2-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -83, 42, 0], [u'13 Jan 20:00', 21, u'AVL(A) 2-1', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17, 42, 0], [u'18 Jan 15:00', 22, u'FUL(H) 2-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -32, 42, 0], [u'28 Jan 19:45', 23, u'SOU(A) 2-2', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -35, 42, 0], [u'02 Feb 16:00', 24, u'CRY(H) 2-0', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 42, 0]], u'summary': [[22, u'FUL (H)', 0], [23, u'SOU (A)', 0], [24, u'CRY (H)', 0]]}, u'fixtures': {u'all': [[u'08 Feb 12:45', u'Gameweek 25', u'Liverpool (A)'], [u'12 Feb 19:45', u'Gameweek 26', u'Man Utd (H)'], [u'22 Feb 15:00', u'Gameweek 27', u'Sunderland (H)'], [u'01 Mar 15:00', u'Gameweek 28', u'Stoke City (A)'], [u'08 Mar 15:00', u'Gameweek 29', u'Swansea (H)'], [u'16 Mar 16:00', u'Gameweek 30', u'Tottenham (A)'], [u'22 Mar 12:45', u'Gameweek 31', u'Chelsea (A)'], [u'29 Mar 17:30', u'Gameweek 32', u'Man City (H)'], [u'05 Apr 15:00', u'Gameweek 33', u'Everton (A)'], [u'12 Apr 15:00', u'Gameweek 34', u'West Ham (H)'], [u'19 Apr 15:00', u'Gameweek 35', u'Hull City (A)'], [u'26 Apr 15:00', u'Gameweek 36', u'Newcastle (H)'], [u'03 May 15:00', u'Gameweek 37', u'West Brom (H)'], [u'11 May 15:00', u'Gameweek 38', u'Norwich (A)']], u'summary': [[25, u'LIV (A)', u'08 Feb 12:45'], [26, u'MUN (H)', u'12 Feb 19:45'], [27, u'SUN (H)', u'22 Feb 15:00']]}, u'form': 0.0, u'id': 1, u'in_dreamteam': False, u'last_season_points': 0, u'max_cost': 45, u'min_cost': 42, u'news': u'', u'news_added': None, u'news_return': None, u'news_updated': None, u'next_fixture': u'Liverpool (A)', u'now_cost': 42, u'original_cost': 45, u'photo_mobile_url': u'http://cdn.ismfg.net/static/plfpl/img/shirts/photos/37096.jpg', u'points_per_game': 0.0, u'season_history': [[u'2007/08', 248, 0, 0, 0, 2, 0, 0, 0, 0, 0, 8, 0, 0, 0, 46, 15], [u'2008/09', 463, 0, 0, 0, 10, 0, 0, 0, 0, 0, 18, 0, 0, 0, 46, 20], [u'2009/10', 360, 0, 0, 0, 5, 0, 0, 0, 1, 0, 11, 0, 0, 0, 49, 15], [u'2010/11', 1260, 0, 1, 5, 14, 0, 0, 0, 0, 0, 33, 2, 0, 0, 45, 55], [u'2011/12', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 0], [u'2012/13', 360, 0, 0, 1, 3, 0, 0, 0, 0, 0, 7, 0, 57, 0, 42, 13]], u'second_name': u'Fabianski', u'selected': 17295, u'selected_by': u'0.5', u'shirt_image_url': u'http://cdn.ismfg.net/static/plfpl/img/shirts/shirt_1_1.png', u'shirt_mobile_image_url': u'http://cdn.ismfg.net/static/plfpl/img/shirts/mobile/shirt_1_1.png', u'squad_number': None, u'status': u'a', u'team_code': 37096, u'team_id': 1, u'team_name': u'Arsenal', u'total_points': 0, u'transfers_in': 4112, u'transfers_in_event': 31, u'transfers_out': 13666, u'transfers_out_event': 74, u'type_name': u'Goalkeeper', u'web_name': u'Fabianski'}
%matplotlib inline
matplotlib.rc('font', size=18)
figsize(12, 4)
import numpy as np
import re
import StringIO
#import prettyplotlib as ppl
#dir(ppl)
points = {}
for p in players.itervalues():
for game in p['fixture_history']['all']:
if game[3] > 0:
points.setdefault(p['type_name'], []).append(game[19])
def poshist(axis, position):
axis.hist(points[position])
axis.set_title(position)
return axis
fig, (ax0, ax1, ax2, ax3) = plt.subplots(ncols=4, sharey=True, figsize=(18,4))
poshist(ax0, "Forward")
poshist(ax1, "Midfielder")
poshist(ax2, "Defender")
poshist(ax3, "Goalkeeper")
fig.show()
opponents = {}
for player in players.itervalues():
for game in player["fixture_history"]["all"]:
#skip games where the player played 0 minutes
if game[3] == 0: continue
opp = game[2][:3]
pts = game[19]
opponents.setdefault(opp, [0,0])[0] += pts
opponents[opp][1] += 1
from collections import OrderedDict
avgs = {}
for opponent, (score, n) in opponents.iteritems():
avgs[opponent] = score/float(n)
sorted_avgs = OrderedDict(sorted(avgs.items(), key=lambda t: t[1]))
fig, ax = plt.subplots(figsize=(18,4))
x_pos = np.arange(0, len(sorted_avgs.keys()))
ax.set_xticks(x_pos)
ax.set_xticklabels(sorted_avgs.keys(), rotation=45)
ax.plot(x_pos, sorted_avgs.values(), linewidth=3)
fig.show()
avg_opponent = sum(avgs.values())/float(len(avgs))
team_abbreviations = {
'Norwich': 'NOR',
'Cardiff City': 'CAR',
'Man City': 'MCI',
'Newcastle': 'NEW',
'West Brom': 'WBA',
'West Ham': 'WHU',
'Southampton': 'SOU',
'Sunderland': 'SUN',
'Stoke City': 'STK',
'Crystal Palace': 'CRY',
'Arsenal': 'ARS',
'Swansea': 'SWA',
'Liverpool': 'LIV',
'Hull City': 'HUL',
'Man Utd': 'MUN',
'Everton': 'EVE',
'Fulham': 'FUL',
'Tottenham': 'TOT',
'Aston Villa': 'AVL',
'Chelsea': 'CHE',
}
class Game(object):
def __init__(self, game_json):
self.opp = game_json[2][:3]
self.loc = game_json[2][4] # "A" for away, "H" for home
self.points = game_json[19]
self.minutes = game_json[3]
def __repr__(self):
return "Game vs. %s %s: %s pts" % (self.opp, self.loc, self.points)
class Player(object):
def __init__(self, player_json):
self.raw = player_json
self.games = [Game(g) for g in player_json["fixture_history"]["all"]]
self.name = u"{first_name} {second_name}".format(**player_json)
self.cost = player_json["event_cost"]
self.position = player_json["type_name"]
self.team = team_abbreviations[player_json["team_name"]]
self.idn = player_json["id"]
self.news = player_json["news"]
self.news_return = player_json["news_return"]
self.pos = self.shortname(self.position)
self.upcoming = self.get_upcoming_fixtures(player_json["fixtures"]["all"])
def get_upcoming_fixtures(self, fixtures):
upcoming = []
for _, gameweek, opponent in fixtures:
week = int(gameweek.split()[-1])
if opponent == "-":
continue
opp, loc = opponent.split('(')
opp = team_abbreviations[opp.strip()]
loc = loc[0]
upcoming.append((week, opp, loc))
return upcoming
def shortname(self, position):
pos_abbreviations = {
"Goalkeeper": "gk",
"Defender": "d",
"Midfielder": "m",
"Forward": "f"
}
return pos_abbreviations[position]
def __repr__(self):
return "#%s %s %s £%s %s" % (self.idn, self.team, self.name.encode("ascii", "ignore"), self.cost, self.pos)
def __unicode__(self):
return "#%s %s £%s %s" % (self.idn, self.name, self.cost, self.pos)
player_objs = [Player(p) for p in players.itervalues()]
def find_player(needle):
return [p for p in player_objs if needle.lower() in p.name.lower()]
p = find_player('Van Persie')[0]
print p.name
print p.position
print p.cost
print p.idn
print p.upcoming[:3] # upcoming games
print p.games[:3] # games he's already played
print p
Robin van Persie Forward 136 264 [(25, 'FUL', u'H'), (26, 'ARS', u'A'), (27, 'CRY', u'A')] [Game vs. SWA A: 12 pts, Game vs. CHE H: 2 pts, Game vs. LIV A: 1 pts] #264 MUN Robin van Persie £136 f
homeaway = {"A": 0, "H": 0}
n = 0.
for player in player_objs:
#only consider full games to eliminate minute bias
for game in [p for p in player.games if p.minutes == 90]:
homeaway[game.loc] += game.points
n += 1
homeaway["A"] /= n
homeaway["H"] /= n
homefield = homeaway["H"] - homeaway["A"]
print homefield, homeaway
fig, ax = plt.subplots(figsize=(2,4))
x_pos = np.arange(0, len(homeaway.keys()))
ax.set_xticks(x_pos+.4)
ax.set_xticklabels(["Away", "Home"], rotation=45)
ax.bar(x_pos, homeaway.values())
fig.show()
0.32794340576 {'A': 1.6139464375947448, 'H': 1.9418898433552298}
def adjusted_score(game):
pts = game.points
pts += homefield/2 if game.loc == "A" else -homefield/2
pts += avg_opponent - avgs[game.opp]
return pts
def adjusted_average(player):
return sum(adjusted_score(g) for g in player.games) / len(player.games)
def game_value(game):
adj = 0
adj += homefield/2 if game[2] == "H" else -homefield/2
adj += avgs[game[1]] - avg_opponent
return adj
def expected_points(player, n=5):
"""return the number of expected points in the next n games"""
av = adjusted_average(player)
ev = 0.
for game in player.upcoming[:n]:
ev += av + game_value(game)
return ev/n
print expected_points(find_player(u"Mutch")[0])
print expected_points(find_player(u"Suárez")[0])
print expected_points(find_player(u"Sanogo")[0])
3.41602554986 8.14701669057 -0.131252429959
# re-create our player objects, this time with our monkey-patched expected points function.
# In Real Life™, I would have just gone back to the player object and put it there, but that
# wouldn't make sense in this presentation, since I don't introduce the model until later.
Player.expected_points = expected_points
player_objs = [Player(p) for p in players.itervalues()]
player_objs[1].expected_points()
4.3687475700409664
Given the constraints:
Maximize expected team value
Given this simple optimization problem:
x1 >= 1 x2 >= 1 x1 + x2 >= 2 minimize x1 + x2 where x1 is an integer
lp_solve takes a file that looks like:
min: x1 + x2; x1 >= 1; x2 >= 1; x1 + x2 >= 2; int x1, x2;
Which, when run, results in:
$ lp_solve /tmp/simple_example Value of objective function: 2.00000000 Actual values of the variables: x1 1 x2 1
max: 5.6 gk1 + 4.3 mf2 + …; /* maximize expected points */ 3.7 gk1 + 9.3 mf2 + … < 100; /* team must cost <100£ */ gk1 + gk12 + gk34 + … = 2; /* limit to 2 goalkeepers */ d3 + d4 + d23 + … = 5; /* limit to 5 defenders */ … /* repeat for all positions */ bin gk1, mf2, d3, f4, d5, …; /* all variables are binary */
def objective_function():
m = " + ".join("{ev} {p.pos}{p.idn}".format(p=p, ev=p.expected_points())
for p in player_objs)
return "max: " + m + ";\n"
def cost_constraint(max_price):
c = " + ".join("{p.cost} {p.pos}{p.idn}".format(p=p)
for p in player_objs)
return "cost_constraint: " + c + " <= %s;\n" % max_price
def position_constraints():
constraints = StringIO.StringIO()
gks = [p for p in player_objs if p.position == "Goalkeeper"]
gk_list = " + ".join(("gk{p.idn}".format(**locals()) for p in gks))
constraints.write("gk_limit: " + gk_list + " = 2;\n")
ds = [p for p in player_objs if p.position == "Defender"]
d_list = " + ".join(("d{p.idn}".format(**locals()) for p in ds))
constraints.write("d_limit: " + d_list + " = 5;\n")
ms = [p for p in player_objs if p.position == "Midfielder"]
m_list = " + ".join(("m{p.idn}".format(**locals()) for p in ms))
constraints.write("m_limit: " + m_list + " = 5;\n")
fs = [p for p in player_objs if p.position == "Forward"]
f_list = " + ".join(("f{p.idn}".format(**locals()) for p in fs))
constraints.write("f_limit: " + f_list + " = 3;\n")
return constraints.getvalue()
#create a buffer to hold all the constraints
buf = StringIO.StringIO()
buf.write(objective_function())
buf.write(cost_constraint(1000))
buf.write(position_constraints())
# I've skipped this, it's probably easier to skip the declaration of all the variable names?
# not very exciting
def all_player_variables():
variables = ", ".join("{p.pos}{p.idn}".format(**locals()) for p in player_objs)
return "bin %s;\n" % variables
buf.write(all_player_variables())
import subprocess, re
def get_player(idn):
"""given an id, return a player"""
for p in player_objs:
if p.idn == idn: return p
raise ValueError("Unable to find player")
def return_team(lp):
"""run lp_solve ands return a list of player objects"""
cmd = "echo '%s' | lp_solve" % lp
val = subprocess.check_output(cmd, shell=True).split('\n')
get_id = lambda l: int(re.search("^\w+?(\d+)", l).group(1))
team_ids = [get_id(l) for l in val if re.search(r" 1$", l)]
return map(get_player, team_ids)
return_team(buf.getvalue())
[#2 ARS Wojciech Szczesny £60 gk, #8 ARS Per Mertesacker £66 d, #46 AVL Leandro Bacuna £44 m, #63 CAR Pete Whittingham £53 m, #69 CAR Jordan Mutch £46 m, #82 CHE John Terry £67 d, #130 EVE Seamus Coleman £66 d, #214 LIV Luis Surez £134 f, #232 MCI Gnegneri Yaya Tour £101 m, #297 NOR John Ruddy £49 gk, #326 SOU Jose Fonte £52 d, #328 SOU Luke Shaw £49 d, #333 SOU Adam Lallana £77 m, #342 SOU Rickie Lambert £70 f, #343 SOU Jay Rodriguez £64 f]
%run talktools