-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathindex.js
174 lines (140 loc) · 5.67 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import { writeFile, mkdir } from 'fs/promises'; // File System access via promises
import { existsSync } from 'fs'; // File System
import axios from 'axios'; // HTTP client
import { resolve } from 'path'; // Easier directory path handling
import { authenticate } from '@google-cloud/local-auth'; // Google Authentication Library
import csv from 'csvtojson' // Convert CSV to JSON
import moment from 'moment'; // Handle dates
import { parse } from 'json2csv' // Convert JSON to CSV
// Variables
const endpoint = 'https://searchconsole.googleapis.com/v1/urlInspection/index:inspect'
const folder = 'RESULTS' // Name of the folder (change to an appropriate name)
const file = './urls.csv' // File to add URLs
const chunkNum = 20 // Break URL list into chunks to prevent API errors
const test = { inspectionUrl: "https://jlhernando.com/blog/how-to-install-node-for-seo/", siteUrl: "https://jlhernando.com/" } // Testing object
// Create results folder
existsSync(`./${folder}/`)
? console.log(`${folder} folder exists`)
: mkdir(`${folder}`);
// Custom function to read URLs file
const readUrls = async (f) => csv().fromFile(f)
// Custom function to extract data from API
const getData = async (inspectionUrl, siteUrl, authToken) => {
// Construct data object to send to API
const body = {
inspectionUrl,
siteUrl
}
const { data } = await axios({
method: 'post',
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': `Bearer ${authToken}`
},
url: endpoint,
data: body,
})
return data
};
// Custom functions to get oAuth credentials from Google
const getCredentials = async () => {
const { credentials } = await authenticate({
keyfilePath: resolve('client-secret.json'),
scopes: ['https://www.googleapis.com/auth/webmasters'],
})
return credentials
}
// Authenticated Request Function
(async () => {
// Start timer
console.time()
// Get URLs from file
const urls = await readUrls(file)
// Store data from API
const data = []
const errors = []
// Start counter to inform user
let counter = 1
const totalChunks = Math.ceil(urls.length / chunkNum)
// Obtain user credentials to use for the request (pop up authentication)
console.log('Athenticating...');
const credentials = await getCredentials().catch(err => {
console.log('FAILED TO AUTHENTICATE USER. Check if your Google account has access to the requested url/property or if your credential-secret.json is correct')
process.exit()
})
console.log('Success authenticating user');
while (urls.length) {
// Inform user of number of batches remaining
console.log(`###### Requesting batch ${counter} of ${totalChunks} ######`);
// Get chunk of URLs files
const chunk = urls.splice(0, chunkNum)
// Create batch of promises (array)
const promises = chunk.map(({ url, property }) => getData(url, property, credentials.access_token));
// Send all requests in parallel
const rawBatchResults = await Promise.allSettled(promises);
// Filter data from batch response
const fulfilled = rawBatchResults.filter(({ status }) => status === 'fulfilled')
const rejected = rawBatchResults.filter(({ status }) => status === 'rejected')
// If any api call fails push errors to array
if (rejected) {
const rejectedUrls = rejected.map(({ reason }) => {
const { inspectionUrl } = JSON.parse(reason.config.data)
return inspectionUrl
})
errors.push(...rejectedUrls)
}
// Process fulfilled requests
if (fulfilled) {
fulfilled.map(({ value }, index) => {
// Create object from response
const inspection = value
// Log progress with results
console.log(`Batch ${counter} -> ${chunk[index].url}: ${JSON.stringify(value.inspectionResult.indexStatusResult.coverageState)}`)
// Add URL to object
inspection.url = chunk[index].url
// Push to store data
data.push(inspection)
});
}
counter++
}
// Write results to JSON
if (data.length) {
writeFile(`./${folder}/coverage.json`, JSON.stringify(data, null, 2))
// Transform JSON to ideal CSV
const output = data.map(({ url, inspectionResult: { indexStatusResult } }) => {
const cleanObj = {
url,
verdict: indexStatusResult.verdict,
coverageState: indexStatusResult.coverageState,
'Crawl allowed?': indexStatusResult.robotsTxtState,
'Indexing allowed?': indexStatusResult.indexingState,
'Last crawl': indexStatusResult.lastCrawlTime === '1970-01-01T00:00:00Z' ? 'Not crawled' : moment(indexStatusResult.lastCrawlTime).format('YYYY-MM-DD HH:mm:ss'),
'Page fetch': indexStatusResult.pageFetchState,
'User-declared canonical': indexStatusResult?.userCanonical ?? 'No User-declared canonical',
'Google-selected canonical': indexStatusResult?.googleCanonical ?? 'Inspected URL'
}
if (indexStatusResult.sitemap) {
for (const [index, sitemap] of indexStatusResult.sitemap.entries()) {
cleanObj[`sitemap-${index + 1}`] = sitemap
}
}
if (indexStatusResult.referringUrls) {
for (const [index, refUrl] of indexStatusResult.referringUrls.entries()) {
cleanObj[`referringUrl-${index + 1}`] = refUrl
}
}
return cleanObj
})
// Write transformed data to CSV
writeFile(`./${folder}/coverage.csv`, parse(output))
}
// Write URLs that have failed
if (errors.length) {
writeFile(`./${folder}/errors.json`, JSON.stringify(errors, null, 2))
}
// Final message
console.log(`Retrieved Indexing status for ${data.length} URLs & encountered ${errors.length} errors`);
console.timeEnd()
})()