forked from owid/owid-grapher
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGrapherBaker.tsx
543 lines (493 loc) · 18.2 KB
/
GrapherBaker.tsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
import React from "react"
import { GrapherPage } from "../site/GrapherPage.js"
import { DataPageV2 } from "../site/DataPageV2.js"
import { renderToHtmlPage } from "../baker/siteRenderers.js"
import {
excludeUndefined,
urlToSlug,
without,
deserializeJSONFromHTML,
uniq,
keyBy,
mergePartialGrapherConfigs,
compact,
partition,
} from "@ourworldindata/utils"
import fs from "fs-extra"
import * as lodash from "lodash"
import { bakeGraphersToPngs } from "./GrapherImageBaker.js"
import {
OPTIMIZE_SVG_EXPORTS,
BAKED_BASE_URL,
BAKED_GRAPHER_URL,
} from "../settings/serverSettings.js"
import * as db from "../db/db.js"
import { glob } from "glob"
import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js"
import {
getPostEnrichedBySlug,
getPostIdFromSlug,
getPostRelatedCharts,
getRelatedArticles,
getRelatedResearchAndWritingForVariable,
} from "../db/model/Post.js"
import {
JsonError,
GrapherInterface,
OwidVariableDataMetadataDimensions,
DimensionProperty,
OwidVariableWithSource,
OwidChartDimensionInterface,
EnrichedFaq,
FaqEntryData,
FaqDictionary,
ImageMetadata,
OwidGdocBaseInterface,
} from "@ourworldindata/types"
import ProgressBar from "progress"
import {
getVariableData,
getMergedGrapherConfigForVariable,
getVariableOfDatapageIfApplicable,
} from "../db/model/Variable.js"
import { getDatapageDataV2, getDatapageGdoc } from "./DatapageHelpers.js"
import { Image, getAllImages } from "../db/model/Image.js"
import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js"
import { parseFaqs } from "../db/model/Gdoc/rawToEnriched.js"
import { getShortPageCitation } from "../site/gdocs/utils.js"
import { getSlugForTopicTag, getTagToSlugMap } from "./GrapherBakingUtils.js"
import { knexRaw } from "../db/db.js"
import { getRelatedChartsForVariable } from "../db/model/Chart.js"
import pMap from "p-map"
import { getGdocBaseObjectBySlug } from "../db/model/Gdoc/GdocFactory.js"
const renderDatapageIfApplicable = async (
grapher: GrapherInterface,
isPreviewing: boolean,
knex: db.KnexReadWriteTransaction,
imageMetadataDictionary?: Record<string, Image>
) => {
const variable = await getVariableOfDatapageIfApplicable(grapher)
if (!variable) return undefined
// When baking from `bakeSingleGrapherChart`, we cache imageMetadata to avoid fetching every image for every chart
// But when rendering a datapage from the mockSiteRouter we want to be able to fetch imageMetadata on the fly
// And this function is the point in the two paths where it makes sense to do so
if (!imageMetadataDictionary) {
imageMetadataDictionary = await getAllImages(knex).then((images) =>
keyBy(images, "filename")
)
}
return await renderDataPageV2(
{
variableId: variable.id,
variableMetadata: variable.metadata,
isPreviewing: isPreviewing,
useIndicatorGrapherConfigs: false,
pageGrapher: grapher,
imageMetadataDictionary,
},
knex
)
}
/**
*
* Render a datapage if available, otherwise render a grapher page.
*/
// TODO: this transaction is only RW because somewhere inside it we fetch images
export const renderDataPageOrGrapherPage = async (
grapher: GrapherInterface,
knex: db.KnexReadWriteTransaction,
imageMetadataDictionary?: Record<string, Image>
): Promise<string> => {
const datapage = await renderDatapageIfApplicable(
grapher,
false,
knex,
imageMetadataDictionary
)
if (datapage) return datapage
return renderGrapherPage(grapher, knex)
}
type EnrichedFaqLookupError = {
type: "error"
error: string
}
type EnrichedFaqLookupSuccess = {
type: "success"
enrichedFaq: EnrichedFaq
}
type EnrichedFaqLookupResult = EnrichedFaqLookupError | EnrichedFaqLookupSuccess
export async function renderDataPageV2(
{
variableId,
variableMetadata,
isPreviewing,
useIndicatorGrapherConfigs,
pageGrapher,
imageMetadataDictionary = {},
}: {
variableId: number
variableMetadata: OwidVariableWithSource
isPreviewing: boolean
useIndicatorGrapherConfigs: boolean
pageGrapher?: GrapherInterface
imageMetadataDictionary?: Record<string, ImageMetadata>
},
// TODO: this transaction is only RW because somewhere inside it we fetch images
knex: db.KnexReadWriteTransaction
) {
const grapherConfigForVariable = await getMergedGrapherConfigForVariable(
variableId,
knex
)
// Only merge the grapher config on the indicator if the caller tells us to do so -
// this is true for preview pages for datapages on the indicator level but false
// if we are on Grapher pages. Once we have a good way in the grapher admin for how
// to use indicator level defaults, we should reconsider how this works here.
const grapher = useIndicatorGrapherConfigs
? mergePartialGrapherConfigs(grapherConfigForVariable, pageGrapher)
: pageGrapher ?? {}
const faqDocs = compact(
uniq(variableMetadata.presentation?.faqs?.map((faq) => faq.gdocId))
)
const gdocFetchPromises = faqDocs.map((gdocId) =>
getDatapageGdoc(knex, gdocId, isPreviewing)
)
const gdocs = await Promise.all(gdocFetchPromises)
const gdocIdToFragmentIdToBlock: Record<string, FaqDictionary> = {}
gdocs.forEach((gdoc) => {
if (!gdoc) return
const faqs = parseFaqs(
("faqs" in gdoc.content && gdoc.content?.faqs) ?? [],
gdoc.id
)
gdocIdToFragmentIdToBlock[gdoc.id] = faqs.faqs
})
const resolvedFaqsResults: EnrichedFaqLookupResult[] = variableMetadata
.presentation?.faqs
? variableMetadata.presentation.faqs.map((faq) => {
const enrichedFaq = gdocIdToFragmentIdToBlock[faq.gdocId]?.[
faq.fragmentId
] as EnrichedFaq | undefined
if (!enrichedFaq)
return {
type: "error",
error: `Could not find fragment ${faq.fragmentId} in gdoc ${faq.gdocId}`,
}
return {
type: "success",
enrichedFaq,
}
})
: []
const [resolvedFaqs, faqResolveErrors] = partition(
resolvedFaqsResults,
(result) => result.type === "success"
) as [EnrichedFaqLookupSuccess[], EnrichedFaqLookupError[]]
if (faqResolveErrors.length > 0) {
for (const error of faqResolveErrors) {
await logErrorAndMaybeSendToBugsnag(
new JsonError(
`Data page error in finding FAQs for variable ${variableId}: ${error.error}`
)
)
}
}
const faqEntries: FaqEntryData = {
faqs: resolvedFaqs?.flatMap((faq) => faq.enrichedFaq.content) ?? [],
}
// If we are rendering this in the context of an indicator page preview or similar,
// then the chart config might be entirely empty. Make sure that dimensions is
// set to the variableId as a Y variable in theses cases.
if (
!grapher.dimensions ||
(grapher.dimensions as OwidChartDimensionInterface[]).length === 0
) {
const dimensions: OwidChartDimensionInterface[] = [
{
variableId: variableId,
property: DimensionProperty.y,
display: variableMetadata.display,
},
]
grapher.dimensions = dimensions
}
const datapageData = await getDatapageDataV2(
variableMetadata,
grapher ?? {}
)
const firstTopicTag = datapageData.topicTagsLinks?.[0]
let slug = ""
if (firstTopicTag) {
try {
slug = await getSlugForTopicTag(knex, firstTopicTag)
} catch (error) {
await logErrorAndMaybeSendToBugsnag(
`Datapage with variableId "${variableId}" and title "${datapageData.title.title}" is using "${firstTopicTag}" as its primary tag, which we are unable to resolve to a tag in the grapher DB`
)
}
let gdoc: OwidGdocBaseInterface | undefined = undefined
if (slug) {
gdoc = await getGdocBaseObjectBySlug(knex, slug, true)
}
if (gdoc) {
const citation = getShortPageCitation(
gdoc.content.authors,
gdoc.content.title ?? "",
gdoc?.publishedAt
)
datapageData.primaryTopic = {
topicTag: firstTopicTag,
citation,
}
} else {
const post = await getPostEnrichedBySlug(knex, slug)
if (post) {
const authors = post.authors
const citation = getShortPageCitation(
authors ?? [],
post.title,
post.published_at
)
datapageData.primaryTopic = {
topicTag: firstTopicTag,
citation,
}
}
}
}
// Get the charts this variable is being used in (aka "related charts")
// and exclude the current chart to avoid duplicates
datapageData.allCharts = await getRelatedChartsForVariable(
knex,
variableId,
grapher && "id" in grapher ? [grapher.id as number] : []
)
datapageData.relatedResearch =
await getRelatedResearchAndWritingForVariable(knex, variableId)
const relatedResearchFilenames = datapageData.relatedResearch
.map((r) => r.imageUrl)
.filter((f): f is string => !!f)
const imageMetadata = lodash.pick(
imageMetadataDictionary,
uniq(relatedResearchFilenames)
)
const tagToSlugMap = await getTagToSlugMap(knex)
return renderToHtmlPage(
<DataPageV2
grapher={grapher}
datapageData={datapageData}
baseUrl={BAKED_BASE_URL}
baseGrapherUrl={BAKED_GRAPHER_URL}
isPreviewing={isPreviewing}
imageMetadata={imageMetadata}
faqEntries={faqEntries}
tagToSlugMap={tagToSlugMap}
/>
)
}
/**
*
* Similar to renderDataPageOrGrapherPage(), but for admin previews
*/
export const renderPreviewDataPageOrGrapherPage = async (
grapher: GrapherInterface,
// TODO: this transaction is only RW because somewhere inside it we fetch images
knex: db.KnexReadWriteTransaction
) => {
const datapage = await renderDatapageIfApplicable(grapher, true, knex)
if (datapage) return datapage
return renderGrapherPage(grapher, knex)
}
const renderGrapherPage = async (
grapher: GrapherInterface,
knex: db.KnexReadonlyTransaction
) => {
const postSlug = urlToSlug(grapher.originUrl || "") as string | undefined
// TODO: update this to use gdocs posts
const postId = postSlug
? await getPostIdFromSlug(knex, postSlug)
: undefined
const relatedCharts = postId
? await getPostRelatedCharts(knex, postId)
: undefined
const relatedArticles = grapher.id
? await getRelatedArticles(knex, grapher.id)
: undefined
return renderToHtmlPage(
<GrapherPage
grapher={grapher}
relatedCharts={relatedCharts}
relatedArticles={relatedArticles}
baseUrl={BAKED_BASE_URL}
baseGrapherUrl={BAKED_GRAPHER_URL}
/>
)
}
const chartIsSameVersion = async (
htmlPath: string,
grapherVersion: number | undefined
): Promise<boolean> => {
if (fs.existsSync(htmlPath)) {
// If the chart is the same version, we can potentially skip baking the data and exports (which is by far the slowest part)
const html = await fs.readFile(htmlPath, "utf8")
const savedVersion = deserializeJSONFromHTML(html)
return savedVersion?.version === grapherVersion
} else {
return false
}
}
const bakeGrapherPageAndVariablesPngAndSVGIfChanged = async (
bakedSiteDir: string,
imageMetadataDictionary: Record<string, Image>,
grapher: GrapherInterface,
knex: db.KnexReadWriteTransaction
) => {
const htmlPath = `${bakedSiteDir}/grapher/${grapher.slug}.html`
const isSameVersion = await chartIsSameVersion(htmlPath, grapher.version)
// Need to set up the connection for using TypeORM in
// renderDataPageOrGrapherPage() when baking using multiple worker threads
// (MAX_NUM_BAKE_PROCESSES > 1). It could be done in
// renderDataPageOrGrapherPage() too, but given that this render function is also used
// for rendering a datapage preview in the admin where worker threads are
// not used, lifting the connection set up here seems more appropriate.
// Always bake the html for every chart; it's cheap to do so
const outPath = `${bakedSiteDir}/grapher/${grapher.slug}.html`
await fs.writeFile(
outPath,
await renderDataPageOrGrapherPage(
grapher,
knex,
imageMetadataDictionary
)
)
console.log(outPath)
const variableIds = lodash.uniq(
grapher.dimensions?.map((d) => d.variableId)
)
if (!variableIds.length) return
await fs.mkdirp(`${bakedSiteDir}/grapher/exports/`)
const svgPath = `${bakedSiteDir}/grapher/exports/${grapher.slug}.svg`
const pngPath = `${bakedSiteDir}/grapher/exports/${grapher.slug}.png`
if (!isSameVersion || !fs.existsSync(svgPath) || !fs.existsSync(pngPath)) {
const loadDataMetadataPromises: Promise<OwidVariableDataMetadataDimensions>[] =
variableIds.map(getVariableData)
const variableDataMetadata = await Promise.all(loadDataMetadataPromises)
const variableDataMedadataMap = new Map(
variableDataMetadata.map((item) => [item.metadata.id, item])
)
await bakeGraphersToPngs(
`${bakedSiteDir}/grapher/exports`,
grapher,
variableDataMedadataMap,
OPTIMIZE_SVG_EXPORTS
)
}
}
const deleteOldGraphers = async (bakedSiteDir: string, newSlugs: string[]) => {
// Delete any that are missing from the database
const oldSlugs = glob
.sync(`${bakedSiteDir}/grapher/*.html`)
.map((slug) =>
slug.replace(`${bakedSiteDir}/grapher/`, "").replace(".html", "")
)
const toRemove = without(oldSlugs, ...newSlugs)
// do not delete grapher slugs redirected to explorers
.filter((slug) => !isPathRedirectedToExplorer(`/grapher/${slug}`))
for (const slug of toRemove) {
console.log(`DELETING ${slug}`)
try {
const paths = [
`${bakedSiteDir}/grapher/${slug}.html`,
`${bakedSiteDir}/grapher/exports/${slug}.png`,
] //, `${BAKED_SITE_DIR}/grapher/exports/${slug}.svg`]
await Promise.all(paths.map((p) => fs.unlink(p)))
paths.map((p) => console.log(p))
} catch (err) {
console.error(err)
}
}
}
export interface BakeSingleGrapherChartArguments {
id: number
config: string
bakedSiteDir: string
slug: string
imageMetadataDictionary: Record<string, Image>
}
export const bakeSingleGrapherChart = async (
args: BakeSingleGrapherChartArguments,
// TODO: this transaction is only RW because somewhere inside it we fetch images
knex: db.KnexReadWriteTransaction
) => {
const grapher: GrapherInterface = JSON.parse(args.config)
grapher.id = args.id
// Avoid baking paths that have an Explorer redirect.
// Redirects take precedence.
if (isPathRedirectedToExplorer(`/grapher/${grapher.slug}`)) {
console.log(`⏩ ${grapher.slug} redirects to explorer`)
return
}
await bakeGrapherPageAndVariablesPngAndSVGIfChanged(
args.bakedSiteDir,
args.imageMetadataDictionary,
grapher,
knex
)
return args
}
export const bakeAllChangedGrapherPagesVariablesPngSvgAndDeleteRemovedGraphers =
// TODO: this transaction is only RW because somewhere inside it we fetch images
async (bakedSiteDir: string, knex: db.KnexReadWriteTransaction) => {
const chartsToBake: { id: number; config: string; slug: string }[] =
await knexRaw(
knex,
`-- sql
SELECT
id, config, config->>'$.slug' as slug
FROM charts WHERE JSON_EXTRACT(config, "$.isPublished")=true
ORDER BY JSON_EXTRACT(config, "$.slug") ASC
`
)
const newSlugs = chartsToBake.map((row) => row.slug)
await fs.mkdirp(bakedSiteDir + "/grapher")
// Prefetch imageMetadata instead of each grapher page fetching
// individually. imageMetadata is used by the google docs powering rich
// text (including images) in data pages.
const imageMetadataDictionary = await getAllImages(knex).then(
(images) => keyBy(images, "filename")
)
const jobs: BakeSingleGrapherChartArguments[] = chartsToBake.map(
(row) => ({
id: row.id,
config: row.config,
bakedSiteDir: bakedSiteDir,
slug: row.slug,
imageMetadataDictionary,
})
)
const progressBar = new ProgressBar(
"bake grapher page [:bar] :current/:total :elapseds :rate/s :etas :name\n",
{
width: 20,
total: chartsToBake.length + 1,
renderThrottle: 0,
}
)
await pMap(
jobs,
async (job) => {
// We want to run this code on multiple threads, so we need to
// be able to use multiple transactions so that we can use
// multiple connections to the database.
// Read-write consistency is not a concern here, thankfully.
await db.knexReadWriteTransaction(
async (knex) => await bakeSingleGrapherChart(job, knex),
db.TransactionCloseMode.KeepOpen
)
progressBar.tick({ name: `slug ${job.slug}` })
},
{ concurrency: 10 }
)
await deleteOldGraphers(bakedSiteDir, excludeUndefined(newSlugs))
progressBar.tick({ name: `✅ Deleted old graphers` })
}