-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_item_links.sh
37 lines (31 loc) · 1013 Bytes
/
get_item_links.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
# Author: Dino Cajic
# Email: dinocajic@gmail.com
# Year: 2017
# Grabs the links from each web page that's specified in the links.txt file
# Initialize the variables
output="output/part_links.txt"
input="output/links.txt"
# Check to see if the output file exists. If it doesn't, create it. Otherwise, clear the content from it.
if [ ! -f $output ]; then
touch $output
else
truncate -s 0 $output
fi
# Cycle through each link in the links.txt file.
# Grabs all of item links from the page specified from links.txt file and appends them to part_links.txt
cat $input | while read line
do
echo $line
wget -O- $line \
| grep -e '<a href[^<]*<img' \
| sed 's/^.*<a href="//g' \
| sed 's/">.*//g' \
>> $output
done
# Cycles through the part_links.txt file
# Since each line has a link to a particular item, the link is set to get_skus.sh to get the material numbers and skus from that page
cat $output | while read line
do
bash ./get_skus.sh $line
done