-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfetch.js
117 lines (102 loc) · 3.33 KB
/
fetch.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
const low = require('lowdb');
const _ = require('lodash');
const FileSync = require('lowdb/adapters/FileSync');
const axios = require('axios').default;
const { isRemoved } = require('./helpers');
const adapter = new FileSync('db.json');
const db = low(adapter);
// Set some defaults (required if your JSON file is empty)
db.defaults({ durations: [], notFound: [] }).write();
db.read();
const titleUrlExists = (example) => {
if(!example.titleUrl) {
return false;
}
return true;
}
const parseId = (example) => {
if (!example.titleUrl) {
console.error('bad example', example);
}
return {
...example,
id: example.titleUrl.split('\u003d').pop()
};
};
async function fetchBatch(videos) {
// https://developers.google.com/youtube/v3/docs/videos/list
const ids = videos.map(video => video.id);
const idsGetParam = encodeURIComponent(ids.join(','));
const apiKey = process.env.API_KEY;
// console.log(ids);
const response = await axios.get(
// TODO: explore api apart from contentDetail to get an idea of categories, etc
`https://content.googleapis.com/youtube/v3/videos?id=${idsGetParam}&part=snippet,contentDetails,topicDetails&key=${apiKey}`,
);
const respItems = response.data.items;
// Need to merge videos array and items array giving {id, title, titleUrl, duration...}
const items = respItems.map(item => {
let videoItem = videos.find(video => video.id === item.id);
if(item.topicDetails) {
item.topicDetails.topicCategories = item.topicDetails.topicCategories.map(category => {
// return 'Entertainment' for 'https://en.wikipedia.org/wiki/Entertainment'
return category.split('wiki/').pop();
})
} else {
item.topicDetails = {
topicCategories: []
};
}
item.snippet = (({publishedAt, title, thumbnails, channelTitle, tags}) => ({publishedAt, title, thumbnails: thumbnails.standard, channelTitle, tags: tags ? tags : []}))(item.snippet);
return {
...videoItem,
...item
}
})
const notFound = _.difference(
videos,
items.map((i) => i.id),
);
notFound.forEach((id) => db.get('notFound').push({ id }).write());
items.forEach((item) => {
console.log('Printing item', item);
db
.get('durations')
.push({ id: item.id,
duration: item.contentDetails.duration,
time: item.time,
// title: item.title, // Actual title already present in snippet object
titleUrl: item.titleUrl,
snippet: item.snippet,
category: item.topicDetails.topicCategories
})
.write()
});
}
const history = require('../watch-history.json');
const live = history.filter((e) => !isRemoved(e));
const allIds = live.filter(e => titleUrlExists(e)).map((e) => parseId(e));
const unique = _.uniq(allIds);
const onlyMissing = unique.filter(
({id}) =>
!db.get('durations').find({ id }).value() &&
!db.get('notFound').find({ id }).value(),
);
console.log(history.length, live.length, unique.length, onlyMissing.length);
if (!onlyMissing.length) {
console.log('all is fetched');
process.exit(0);
}
async function fetchAll(videos) {
const chunks = _.chunk(videos, 50);
for (const ch of chunks) {
await fetchBatch(ch);
await new Promise((resolve) => setTimeout(resolve, 100));
}
}
fetchAll(onlyMissing)
.then(() => console.log('done'))
.catch((e) => {
console.error(e);
process.exit(1);
});