-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshapeways.py
44 lines (33 loc) · 1.06 KB
/
shapeways.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import lxml.html
from lxml.cssselect import CSSSelector
# get some html
import requests
def scrape_category(category):
"""
Scrapes the shapeways website for items in the specified category and
returns the results in a list.
"""
products = []
try:
r = requests.get('https://www.shapeways.com/' + category)
except:
r = None
if not r:
return None
# build the DOM Tree
tree = lxml.html.fromstring(r.text)
# construct a CSS Selector
sel = CSSSelector('div.grid-view div.product-row div.clearfix')
# Apply the selector to the DOM tree.
results = sel(tree)
for item in results:
subitem = item.cssselect('div.product-img a.product-url')[0]
img = subitem.cssselect('img')[0].attrib['src']
url = subitem.attrib['href']
name = \
item.cssselect('div.product-details div.product-name \
a.product-url')[0].text
products.append({'img': img,
'url': url,
'name': name})
return products