-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathindex.js
103 lines (87 loc) · 2.43 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
const fetch = require("node-fetch"),
slugify = require("slugify"),
cheerio = require("cheerio"),
iconv = require("iconv-lite");
const baseUrl = 'https://www.pensador.com/';
module.exports = async (options) => {
if (typeof options == 'undefined') {
var options = { max: 10 };
}
// Default value for search term, Term is not mandatory anymore
var searchTerm = 'frases_curtas';
// if term is set then is changed to a new value
if (options.term != 'undefined') {
searchTerm = slugify(`frases de ${options.term}`, {
replacement: '_',
remove: /[*+~.()'"!:@]/g,
lower: true,
});
}
let keepGoing = true;
let current = 1;
let phrases = [];
while (keepGoing) {
let contentPage = await fetchPage(searchTerm, current);
let result = await extract(contentPage);
phrases.push(...result.phrases);
if (options.max !== undefined && phrases.length > options.max) {
phrases = phrases.slice(0, options.max);
keepGoing = false;
}
if (result.next === false) {
keepGoing = false;
}
current = current + 1;
}
return { total: phrases.length, searchTerm, phrases };
async function fetchPage(searchTerm, current = 1) {
return new Promise((resolve, reject) => {
fetch(`${baseUrl}/${searchTerm}/${current}`)
.then(res => res.arrayBuffer())
.then(arrayBuffer =>
iconv.decode(Buffer.from(arrayBuffer), "utf-8").toString()
)
.then(body => resolve(body))
.catch(err => reject(err));
});
}
async function extract(htmlContent) {
return new Promise((resolve, reject) => {
try {
const phrases = [];
const $ = cheerio.load(htmlContent);
$(".thought-card").each(function(i, e) {
phrases.push({
author: $(this)
.find("a")
.first()
.text(),
text: $(this)
.find("p")
.first()
.text()
.replace(/\n/g, "")
});
});
let next = false;
$("#paginacao").each(function(i, e) {
if (
$(this)
.find(".nav")
.last()
.text()
.includes("xima")
) {
next = true;
}
});
resolve({ phrases, next });
} catch (err) {
reject(err);
}
});
}
};
function _throw(m) {
throw m;
}