Skip to content
This repository has been archived by the owner on Jul 20, 2024. It is now read-only.

Commit

Permalink
data processing improved
Browse files Browse the repository at this point in the history
  • Loading branch information
JensKrumsieck committed Aug 5, 2020
1 parent e8de910 commit 9c86134
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 19 deletions.
12 changes: 8 additions & 4 deletions index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import axios, { AxiosResponse } from 'axios';
import axios from 'axios';
import { parse } from 'node-html-parser';
import * as fs from 'fs';

Expand Down Expand Up @@ -48,20 +48,24 @@ async function requestData(i: number): Promise<Element> {

element.Name = captionParts[0];
element.Symbol = captionParts[1].replace(/\d*/, "")
element.AtomicNumber = i;

var tbody = infobox.querySelector("tbody");
tbody.childNodes.forEach(s => {
var tr = (s as any) as HTMLElement;
if (tr.querySelector("th")?.hasAttribute("scope")) {

var td = tr.querySelector("td");
var th = tr.querySelector("th");
head = decode(th?.innerHTML)
body = decode(td?.innerHTML);
if (head.includes("Standard atomic weight")) head = "Standard atomic weight";

if (head.includes("atomic weight") || head.includes("Mass number")) head = "AtomicWeight";
if (fields.has(head)) {
head = fields.get(head) as string;
body = regex.get(head)?.exec(body)?.toString() as string;
(element as any)[head] = body;
body = body.match(regex.get(head) != null ? regex.get(head) as RegExp : /.+/g)?.values().next().value as string;

(element as any)[head] = (isNaN(+body) ? body : Number.parseFloat(body));
}
}
});
Expand Down
28 changes: 13 additions & 15 deletions mappings.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,33 @@
//used fields mapping
var fields = new Map<string, string>();
fields.set("Mass number", "AtomicWeight");
fields.set("Standard atomic weight", "AtomicWeight");
fields.set("Atomic number (Z)", "AtomicNumber");
fields.set("AtomicWeight", "AtomicWeight");
fields.set("Appearance", "Appearance");
fields.set("Group", "Group"),
fields.set("Period", "Period");
fields.set("Group", "Group");
fields.set("Period", "Period");
fields.set("Block", "Block");
fields.set("Element category", "Category");
fields.set("Electron configuration", "ElectronConfiguration");
fields.set("Electrons per shell", "ElectronsPerShell");
fields.set("Electronegativity", "Electronegativity");
fields.set("Atomic radius", "AtomicRadius");
fields.set("Covalent radius", "CovalentRadius");
fields.set("Van der Waals radius", "AtomicRadius");
fields.set("Van der Waals radius", "VdWRadius");
fields.set("CAS Number", "CAS");

var regex = new Map<string, RegExp>();
regex.set("AtomicWeight", /\d+[.]?\d*/g);
regex.set("AtomicNumber", /\d*/g);
regex.set("Appearance", /\w*/g);
regex.set("Appearance", /.+/g);
regex.set("Group", /\d/g);
regex.set("Period", /\d/g);
regex.set("Block", /.*/g);
regex.set("Category", /.*/g);
regex.set("ElectronConfiguration", /.*/g);
regex.set("ElectronsPerShell", /.*/g);
regex.set("Block", /.+/g);
regex.set("Category", /.+/g);
regex.set("ElectronConfiguration", /.+/g);
regex.set("ElectronsPerShell", /.+/g);
regex.set("Electronegativity", /(?!Pauling scale: ?)\d+[.]?\d*/g);
regex.set("CovalentRadius", /\d*/g);
regex.set("AtomicRadius", /\d*/g);
regex.set("AtomicRadius", /\d*/g);
regex.set("CAS", /\d*-\d*-\d*/g);
regex.set("CovalentRadius", /\d+/g);
regex.set("AtomicRadius", /\d+/g);
regex.set("VdWRadius", /\d+/g);
regex.set("CAS", /\d+-\d+-\d+/g);

export { fields, regex };

0 comments on commit 9c86134

Please sign in to comment.