forked from justmarkham/DAT3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path16_recommenders_class.py
71 lines (62 loc) · 1.95 KB
/
16_recommenders_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
from __future__ import division
from collections import Counter
# load data and keep around in convenient forms
with open('../data/user_brand.csv') as f:
data = []
brandsfor = dict()
for user, brand in data:
brandsfor.setdefault(user, set()).add(brand)
# count frequencies that brands appear, for normalizing by
frequency = Counter([line[1] for line in data])
def jaccard(firsts, seconds):
"""
This is a sort of weighted Jaccard Index,
and depends dangerously on the global `frequency`.
"""
return (sum([1 / frequency.get(brand, 100) for brand in firsts & seconds])/
sum([1 / frequency.get(brand, 100) for brand in firsts | seconds]))
def safe_brands(brands):
"""
For convenience, to allow non-set arguments,
strings and lists are changed to sets.
"""
if isinstance(brands, str):
brands = set([brands])
if isinstance(brands, list):
brands = set(brands)
return brands
def recommend_for_brands(brands):
"""
Return top five recommended brands
when given brands to recommend for.
"""
return []
def recommend_for_user(user):
"""
Get a user's brands and recommend based on them.
"""
return []
def for_brands(brands):
"""
Return a pretty-print string of recommendations for brands alone.
"""
brands = safe_brands(brands)
recs = recommend_for_brands(brands)
return "For a user who likes {liked}, we recommend {recs}.".format(
liked=", ".join(brands),
recs=", ".join(recs))
def for_user(user):
"""
Return a pretty-print string of recommendations for a user.
"""
recs = recommend_for_user(user)
return "For user {user}, who likes {liked}, we recommend {recs}.".format(
user=user,
liked=", ".join(brandsfor.get(user, ["nothing"])),
recs=", ".join(recs))
if __name__ == "__main__":
print "\n" + for_brands("Target") + "\n"
print for_brands("Banana Republic") + "\n"
print for_user("86184") + "\n"
print for_user("83126") + "\n"