Skip to content

Commit

Permalink
feat(crawler): init pull bing daliy wallpaper
Browse files Browse the repository at this point in the history
  • Loading branch information
litingyes committed Nov 24, 2024
1 parent f612d32 commit 5d8e174
Show file tree
Hide file tree
Showing 8 changed files with 656 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ yarn-error.log*
# typescript
*.tsbuildinfo
next-env.d.ts

# python
__pycache__
25 changes: 25 additions & 0 deletions crawler/bing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from json import dumps
from os import path
from utils import ensure_path_exists, get_database_path, getToday
from requests import get

MKTs = ["en-US", "zh-CN"]


def pull_daily_wallpaper():
target_dir = path.join(get_database_path(), "bing", "daily-wallpaper", getToday())

for mkt in MKTs:
r = get(
"https://www.bing.com/HPImageArchive.aspx",
{"format": "js", "idx": 0, "n": 8, "mkt": mkt},
)
data = r.json()["images"]
target_file = path.join(target_dir, mkt + ".json")
ensure_path_exists(target_file)
with open(target_file, "w") as f:
f.write(dumps(data, ensure_ascii=True, indent=2))


def pull_from_bing():
pull_daily_wallpaper()
9 changes: 9 additions & 0 deletions crawler/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from bing import pull_from_bing


def main():
pull_from_bing()


if __name__ == "__main__":
main()
25 changes: 25 additions & 0 deletions crawler/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from os import getcwd, path, makedirs
from datetime import datetime, timezone


# file
def get_database_path():
return path.join(getcwd(), "database")


def ensure_path_exists(_path, is_dir=False):
if path.exists(_path):
return

if is_dir:
return makedirs(_path)

dir_path = path.dirname(_path)
ensure_path_exists(dir_path, True)
with open(_path, "w") as file:
file.write("")


# time
def getToday():
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
138 changes: 138 additions & 0 deletions database/bing/daily-wallpaper/2024-11-24/en-US.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
[
{
"startdate": "20241123",
"fullstartdate": "202411230800",
"enddate": "20241124",
"url": "/th?id=OHR.FibonacciAloe_EN-US5137471725_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.FibonacciAloe_EN-US5137471725",
"copyright": "Spiral aloe, Kangaroo Island, Australia (\u00a9 Michael Melford/Alamy)",
"copyrightlink": "https://www.bing.com/search?q=Fibonacci+Day&form=hpcapt&filters=HpDate%3a%2220241123_0800%22",
"title": "Nature's secret code",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241123_FibonacciAloe%22&FORM=HPQUIZ",
"wp": true,
"hsh": "1f296b05a27925c2da92265f59d3a259",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241122",
"fullstartdate": "202411220800",
"enddate": "20241123",
"url": "/th?id=OHR.ZafraCastle_EN-US5032917939_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.ZafraCastle_EN-US5032917939",
"copyright": "Castle of Zafra, Guadalajara province, Spain (\u00a9 Eduard Gene/Getty Images)",
"copyrightlink": "https://www.bing.com/search?q=castle+of+zafra&form=hpcapt&filters=HpDate%3a%2220241122_0800%22",
"title": "Castle on a crag",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241122_ZafraCastle%22&FORM=HPQUIZ",
"wp": true,
"hsh": "3bf7d8fad5667cb2e2391bcdec53c8c1",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241121",
"fullstartdate": "202411210800",
"enddate": "20241122",
"url": "/th?id=OHR.LionCubs_EN-US4742616367_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.LionCubs_EN-US4742616367",
"copyright": "Lion cubs at sunrise, South Africa (\u00a9 Ruan Springorum/Getty Images)",
"copyrightlink": "https://www.bing.com/search?q=lion&form=hpcapt&filters=HpDate%3a%2220241121_0800%22",
"title": "Sunrise on the savanna",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241121_LionCubs%22&FORM=HPQUIZ",
"wp": true,
"hsh": "3ef9377bdaca3d4c9ef3a35784a75464",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241120",
"fullstartdate": "202411200800",
"enddate": "20241121",
"url": "/th?id=OHR.BeyondSaype_EN-US4398054405_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.BeyondSaype_EN-US4398054405",
"copyright": "Giant land art painting 'Beyond Crisis' by Saype, Leysin, Switzerland (\u00a9 VALENTIN FLAURAUD/EPA-EFE/Shutterstock)",
"copyrightlink": "https://www.bing.com/search?q=World+Children%27s+Day&form=hpcapt&filters=HpDate%3a%2220241120_0800%22",
"title": "United for World Children\u2019s Day",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241120_BeyondSaype%22&FORM=HPQUIZ",
"wp": false,
"hsh": "9e82f5690f170d4f7987498191a310b7",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241119",
"fullstartdate": "202411190800",
"enddate": "20241120",
"url": "/th?id=OHR.TasmansArch_EN-US4274981499_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.TasmansArch_EN-US4274981499",
"copyright": "Tasmans Arch, Tasmania, Australia (\u00a9 Gary Bell/Minden Pictures)",
"copyrightlink": "https://www.bing.com/search?q=Tasmania&form=hpcapt&filters=HpDate%3a%2220241119_0800%22",
"title": "An arch that rocks",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241119_TasmansArch%22&FORM=HPQUIZ",
"wp": true,
"hsh": "8c943c90dfa9d7139022b989fd564b0a",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241118",
"fullstartdate": "202411180800",
"enddate": "20241119",
"url": "/th?id=OHR.PorthcawlLighthouse_EN-US4147042402_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.PorthcawlLighthouse_EN-US4147042402",
"copyright": "Porthcawl Lighthouse, Wales (\u00a9 Leighton Collins/Alamy)",
"copyrightlink": "https://www.bing.com/search?q=Porthcawl+lighthouse+Wales&form=hpcapt&filters=HpDate%3a%2220241118_0800%22",
"title": "Guiding the way since 1860",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241118_PorthcawlLighthouse%22&FORM=HPQUIZ",
"wp": true,
"hsh": "6010ac5d1edcb7478a2646544f10d5ed",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241117",
"fullstartdate": "202411170800",
"enddate": "20241118",
"url": "/th?id=OHR.RedStag_EN-US3910525623_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.RedStag_EN-US3910525623",
"copyright": "Red deer stag in the Caledonian Forest, Glen Affric, Scottish Highlands (\u00a9 Terry Whittaker/Alamy)",
"copyrightlink": "https://www.bing.com/search?q=Red+deer&form=hpcapt&filters=HpDate%3a%2220241117_0800%22",
"title": "The hart of the Highlands",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241117_RedStag%22&FORM=HPQUIZ",
"wp": true,
"hsh": "6dbb3594458bc7b05053378af8076926",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241116",
"fullstartdate": "202411160800",
"enddate": "20241117",
"url": "/th?id=OHR.FrieslandNetherlands_EN-US3770890281_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.FrieslandNetherlands_EN-US3770890281",
"copyright": "Wadden Sea coast, near Moddergat, Friesland, Netherlands (\u00a9 Ron ter Burg/Minden Pictures)",
"copyrightlink": "https://www.bing.com/search?q=Wadden+Sea+coast&form=hpcapt&filters=HpDate%3a%2220241116_0800%22",
"title": "Mud, sea, and sky",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241116_FrieslandNetherlands%22&FORM=HPQUIZ",
"wp": true,
"hsh": "d645b82aa2891f1ffa45b9214bc7a1b3",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
}
]
138 changes: 138 additions & 0 deletions database/bing/daily-wallpaper/2024-11-24/zh-CN.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
[
{
"startdate": "20241123",
"fullstartdate": "202411231600",
"enddate": "20241124",
"url": "/th?id=OHR.SonomaCoast_ZH-CN9187330701_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.SonomaCoast_ZH-CN9187330701",
"copyright": "\u5dde\u7acb\u516c\u56ed\uff0c\u7d22\u8bfa\u739b\u6d77\u5cb8\uff0c\u7f8e\u56fd\u52a0\u5229\u798f\u5c3c\u4e9a\u5dde (\u00a9 Rachid Dahnoun/Tandem Stills + Motion)",
"copyrightlink": "https://www.bing.com/search?q=%E7%B4%A2%E8%AF%BA%E9%A9%AC%E6%B5%B7%E5%B2%B8%E5%B7%9E%E7%AB%8B%E5%85%AC%E5%9B%AD&form=hpcapt&mkt=zh-cn",
"title": "\u6f6e\u6c50\u4e0e\u66ae\u8272",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241123_SonomaCoast%22&FORM=HPQUIZ",
"wp": true,
"hsh": "31e1a244a11b1cd1fa734c5f1b41e232",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241122",
"fullstartdate": "202411221600",
"enddate": "20241123",
"url": "/th?id=OHR.FibonacciAloe_ZH-CN8974137481_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.FibonacciAloe_ZH-CN8974137481",
"copyright": "\u591a\u53f6\u82a6\u835f\uff0c\u888b\u9f20\u5c9b\uff0c\u6fb3\u5927\u5229\u4e9a (\u00a9 Michael Melford/Alamy)",
"copyrightlink": "https://www.bing.com/search?q=%E6%96%90%E6%B3%A2%E9%82%A3%E5%A5%91%E7%BA%AA%E5%BF%B5%E6%97%A5&form=hpcapt&mkt=zh-cn",
"title": "\u5927\u81ea\u7136\u7684\u5bc6\u7801",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241122_FibonacciAloe%22&FORM=HPQUIZ",
"wp": true,
"hsh": "d8118ce62b80700112ae617a007088bd",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241121",
"fullstartdate": "202411211600",
"enddate": "20241122",
"url": "/th?id=OHR.ZafraCastle_ZH-CN8791148758_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.ZafraCastle_ZH-CN8791148758",
"copyright": "\u8428\u592b\u62c9\u57ce\u5821\uff0c\u74dc\u8fbe\u62c9\u54c8\u62c9\u7701\uff0c\u897f\u73ed\u7259 (\u00a9 Eduard Gene/Getty Images)",
"copyrightlink": "https://www.bing.com/search?q=%E6%8B%89%E8%90%A8%E5%A4%AB%E6%8B%89%E5%9F%8E%E5%A0%A1&form=hpcapt&mkt=zh-cn",
"title": "\u60ac\u5d16\u4e0a\u7684\u57ce\u5821",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241121_ZafraCastle%22&FORM=HPQUIZ",
"wp": true,
"hsh": "021e80877afadc5853e284ffab1c5de0",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241120",
"fullstartdate": "202411201600",
"enddate": "20241121",
"url": "/th?id=OHR.LionCubs_ZH-CN8538754038_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.LionCubs_ZH-CN8538754038",
"copyright": "\u65e5\u51fa\u65f6\u7684\u4e24\u53ea\u5c0f\u72ee\u5b50\uff0c\u5357\u975e (\u00a9 Ruan Springorum/Getty Images)",
"copyrightlink": "https://www.bing.com/search?q=%E7%8B%AE%E5%AD%90&form=hpcapt&mkt=zh-cn",
"title": "\u5728\u80a9\u5934\u8fdc\u773a",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241120_LionCubs%22&FORM=HPQUIZ",
"wp": true,
"hsh": "1509355b9a72c603b78b79d7b51428a3",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241119",
"fullstartdate": "202411191600",
"enddate": "20241120",
"url": "/th?id=OHR.PontBordeaux_ZH-CN7656263575_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.PontBordeaux_ZH-CN7656263575",
"copyright": "\u76ae\u57c3\u5c14\u6865\uff0c\u6ce2\u5c14\u591a\uff0c\u6cd5\u56fd (\u00a9 SergiyN/Getty Images)",
"copyrightlink": "https://www.bing.com/search?q=%E6%B3%A2%E5%B0%94%E5%A4%9A%E7%9A%AE%E5%9F%83%E5%B0%94%E6%A1%A5&form=hpcapt&mkt=zh-cn",
"title": "\u52a0\u9f99\u6cb3\u4e0a\u7684\u5386\u53f2\u901a\u9053",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241119_PontBordeaux%22&FORM=HPQUIZ",
"wp": true,
"hsh": "fb05ec85c0517677d57c4375622225fc",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241118",
"fullstartdate": "202411181600",
"enddate": "20241119",
"url": "/th?id=OHR.TasmansArch_ZH-CN7062784426_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.TasmansArch_ZH-CN7062784426",
"copyright": "\u5854\u65af\u66fc\u62f1\u95e8\uff0c\u5854\u65af\u9a6c\u5c3c\u4e9a\u5dde\uff0c\u6fb3\u5927\u5229\u4e9a (\u00a9 Gary Bell/Minden Pictures)",
"copyrightlink": "https://www.bing.com/search?q=%E5%A1%94%E6%96%AF%E9%A9%AC%E5%B0%BC%E4%BA%9A%E6%B4%B2&form=hpcapt&mkt=zh-cn",
"title": "\u4ee4\u4eba\u9707\u64bc\u7684\u62f1\u95e8",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241118_TasmansArch%22&FORM=HPQUIZ",
"wp": true,
"hsh": "52088015cec496f939437f48d8e0b6b0",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241117",
"fullstartdate": "202411171600",
"enddate": "20241118",
"url": "/th?id=OHR.PorthcawlLighthouse_ZH-CN6655235820_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.PorthcawlLighthouse_ZH-CN6655235820",
"copyright": "\u6ce2\u65af\u8003\u5c14\u706f\u5854\uff0c\u5357\u5a01\u5c14\u58eb\uff0c\u82f1\u56fd (\u00a9 Leighton Collins/Alamy)",
"copyrightlink": "https://www.bing.com/search?q=%E5%A8%81%E5%B0%94%E5%A3%AB%E6%B3%A2%E6%96%AF%E8%80%83%E5%B0%94%E7%81%AF%E5%A1%94&form=hpcapt&mkt=zh-cn",
"title": "\u4e00\u76f4\u6307\u5f15\u7740\u65b9\u5411",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241117_PorthcawlLighthouse%22&FORM=HPQUIZ",
"wp": true,
"hsh": "ff201e576ba0df278f01b2f8b4ea3fbf",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
},
{
"startdate": "20241116",
"fullstartdate": "202411161600",
"enddate": "20241117",
"url": "/th?id=OHR.RedStag_ZH-CN6403546321_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp",
"urlbase": "/th?id=OHR.RedStag_ZH-CN6403546321",
"copyright": "\u52a0\u91cc\u4e1c\u6797\u4e2d\u7684\u9a6c\u9e7f\uff0c\u963f\u592b\u529b\u8c37\uff0c\u82cf\u683c\u5170\u9ad8\u5730 (\u00a9 Terry Whittaker/Alamy)",
"copyrightlink": "https://www.bing.com/search?q=%E9%A9%AC%E9%B9%BF&form=hpcapt&mkt=zh-cn",
"title": "\u82cf\u683c\u5170\u9ad8\u5730\u7684\u9a6c\u9e7f",
"quiz": "/search?q=Bing+homepage+quiz&filters=WQOskey:%22HPQuiz_20241116_RedStag%22&FORM=HPQUIZ",
"wp": true,
"hsh": "2dfe32ee45b2fe2ce39d0b084a0a7c9a",
"drk": 1,
"top": 1,
"bot": 1,
"hs": []
}
]
Loading

0 comments on commit 5d8e174

Please sign in to comment.