Skip to content

Commit

Permalink
include utf8 and utf16 bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-nowak committed Jun 20, 2024
1 parent bc5df9e commit 2a6c1d0
Showing 1 changed file with 35 additions and 3 deletions.
38 changes: 35 additions & 3 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,26 @@
table {
border-collapse: collapse;
border: 2px solid black;
table-layout: auto;
}

th, td {
border: 1px solid black;
width: 4rem;
}

td {
font-family: monospace;
}

.wide {
width: 16rem;
width: 32%;
}
.u16 {
width: 9ch;
}

.u8 {
width: 11ch;
}
</style>
</head>
Expand All @@ -59,6 +70,8 @@ <h1>Unicode inspector</h1>
<th>Codepoint</th>
<th class="wide">Name</th>
<th class="wide">Extra</th>
<th class="u16">UTF-16</th>
<th class="u8">UTF-8</th>
</tr>
</thead>
<tbody id="the-table">
Expand All @@ -77,6 +90,20 @@ <h1>Unicode inspector</h1>
return [codepointN, codepoint, name, extra];
}));

const calcU16 = (s) => {
const out = [];
for (let i = 0; i < s.length * 3; i++) {
const x = s.charCodeAt(i);
if (isNaN(x)) break;
out.push(zeropad(x.toString(16), 4));
}
return out;
};

const textencoder = new TextEncoder();

const zeropad = (s, n) => '0'.repeat(Math.max(0, n - s.length)) + s;

const onSubmit = () => {
const tbody = document.getElementById('the-table');

Expand All @@ -96,7 +123,12 @@ <h1>Unicode inspector</h1>
const codepoint = char.codePointAt(0);
const match = unicodeData.find(u => u[0] === codepoint);

const rowItems = [char, ...match.slice(1, 4)];
const u16 = calcU16(char).join(' ');
const u8 = Array.from(textencoder.encode(char))
.map(b => zeropad(b.toString(16), 2))
.join(' ');

const rowItems = [char, ...match.slice(1, 4), u16, u8];

for (const item of rowItems) {
const td = document.createElement('td');
Expand Down

0 comments on commit 2a6c1d0

Please sign in to comment.