-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
75 lines (68 loc) · 3 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env node
const libxmljs = require('libxmljs')
const fs = require('fs')
const he = require('he')
const TurndownService = require('turndown')
const turndownService = new TurndownService()
const an2md = (xmlData) => {
const debateSection = xmlDoc.get('//debateSection')
const heading = debateSection.get('//heading').text().trim()
const colon = CheckEn(heading) ? ':' : ':'
/* if (CheckEn(heading) === true) {
info = `:::info\n🌐 This is a collaborative editor for the meeting transcript. If you want to adjust your own speech, please click on the "Pencil" icon at the top left corner to start editing. The system automatically saves each edit. It is scheduled to be released on 2020-01-09 and will be published at https://pdis.nat.gov.tw/track/ to the [public domain](https://github.com/audreyt/archive.tw/blob/gh-pages/LICENSE). Thank you for your contribution to the commons.\n:::\n\n`
colon = ':'
} else {
info = `【說明】\n\n:::info\n🏡此為逐字稿共筆頁面,如有任何欲調整自己發言之文字,敬請直接點選於此頁面左上方圖示「筆」進行編修,呈現黑底畫面即可直接編寫,內容無須存檔,系統會自動保存。預定於於10年年月月1日公開,將公開於於 https://pdis.nat.gov.tw/track/ ,非常感謝。\n:::\n\n【以下開始記錄】\n\n`
colon = ':'
} */
let md = `# ${heading}\n\n`
debateSection.childNodes().map((child) => {
switch (child.name()) {
case 'speech':
const speaker = child.attr('by').value().replace('#', '')
const content = turndownService.turndown(child.child(1).toString())
const speech = `### ${speaker}${colon}\n${content}\n\n`
md += speech
break
case 'narrative':
/* let narrative = ''
let text = ''
let source = child.toString().match(/<i>(.*?)<\/i>/gs)[0]
if (/<a href="/.test(source)) {
source.match(/<a(.*?)<\/a>/gs).map((s) => {
let link = `[${s
.match(/">(.*?)<\/a>/gs)[0]
.replace(/">/, '')
.replace(/<\/a>/, '')
.trim()}](${s.match(/<a href="(.*?)">/)[1]}) `
source = source.replace(s, link)
})
text = source
.replace(/<i>/, '')
.replace(/<\/i>/, '')
.replace(/\s/g, '')
narrative = `> ${text}\n\n`
} else {
text = child.text().trim()
narrative = `> ${text}\n\n`
} */
let text = ''
if (child.child(1) !== null) {
text = child.child(1).toString()
} else {
text = child.child(0).toString()
}
md += `> ${turndownService.turndown(text).replace(/_/g, '')}\n\n`
break
}
})
fs.writeFileSync(process.argv[3], md)
}
const CheckEn = (str) => {
const reg = /^[a-zA-Z0-9$@$!%*?&#^\-_. +]+$/
return reg.test(str)
}
let xml = fs.readFileSync(process.argv[2], 'utf-8')
xml = xml.replace(/&/g, '&')
const xmlDoc = libxmljs.parseXml(xml)
an2md(xmlDoc)