Skip to content

Commit

Permalink
feat: add dutch corpus (#136)
Browse files Browse the repository at this point in the history
Inclues US/QWERTY layout (used in the Netherlands) ad AZERTY layout (used in Belgium).
  • Loading branch information
martijn authored Feb 24, 2024
1 parent c294246 commit c0926d6
Show file tree
Hide file tree
Showing 15 changed files with 3,174 additions and 0 deletions.
3,002 changes: 3,002 additions & 0 deletions packages/keybr-content-words/lib/data/words-nl.json

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions packages/keybr-content-words/lib/load.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ export async function loadWordList(language: Language): Promise<WordList> {
"./data/words-it.json"
)
).default as unknown as WordList;
case Language.NL:
return (
await import(
/* webpackChunkName: "words-nl" */
"./data/words-nl.json"
)
).default as unknown as WordList;
case Language.PL:
return (
await import(
Expand Down
8 changes: 8 additions & 0 deletions packages/keybr-keyboard-generator/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ const files: readonly [input: string | LayoutConfig, output: string][] = [
"cldr-keyboards-43.0/keyboards/windows/it-t-k0-windows.xml",
"../keybr-keyboard/lib/data/layout/it_it-win.ts",
],
[
"cldr-keyboards-43.0/keyboards/windows/fr-t-k0-windows.xml",
"../keybr-keyboard/lib/data/layout/nl_be-win.ts",
],
[
"cldr-keyboards-43.0/keyboards/windows/en-t-k0-windows.xml",
"../keybr-keyboard/lib/data/layout/nl_nl-win.ts",
],
[
"cldr-keyboards-43.0/keyboards/windows/pl-t-k0-windows-extended.xml",
"../keybr-keyboard/lib/data/layout/pl_pl-win.ts",
Expand Down
2 changes: 2 additions & 0 deletions packages/keybr-keyboard/lib/data/layout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ export { LAYOUT_FR_OPTIMOT_ERGO } from "./layout/fr_optimot_ergo.ts";
export { LAYOUT_HE_IL_WIN } from "./layout/he_il-win.ts";
export { LAYOUT_IT_IT_WIN } from "./layout/it_it-win.ts";
export { LAYOUT_JA_JP_JIS } from "./layout/ja_jp_jis.ts";
export { LAYOUT_NL_BE_WIN } from "./layout/nl_be-win.ts";
export { LAYOUT_NL_NL_WIN } from "./layout/nl_nl-win.ts";
export { LAYOUT_PL_PL_WIN } from "./layout/pl_pl-win.ts";
export { LAYOUT_PT_BR_WIN } from "./layout/pt_br-win.ts";
export { LAYOUT_PT_PT_WIN } from "./layout/pt_pt-win.ts";
Expand Down
55 changes: 55 additions & 0 deletions packages/keybr-keyboard/lib/data/layout/nl_be-win.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Generated file, do not edit.

import { type CodePointDict } from "../../types.ts";

// prettier-ignore
export const LAYOUT_NL_BE_WIN: CodePointDict = {
Backquote: [/* ² */ 0x00b2],
Digit1: [/* & */ 0x0026, /* 1 */ 0x0031],
Digit2: [/* é */ 0x00e9, /* 2 */ 0x0032, /* DEAD TILDE */ 0x0303],
Digit3: [/* " */ 0x0022, /* 3 */ 0x0033, /* # */ 0x0023],
Digit4: [/* ' */ 0x0027, /* 4 */ 0x0034, /* { */ 0x007b],
Digit5: [/* ( */ 0x0028, /* 5 */ 0x0035, /* [ */ 0x005b],
Digit6: [/* - */ 0x002d, /* 6 */ 0x0036, /* | */ 0x007c],
Digit7: [/* è */ 0x00e8, /* 7 */ 0x0037, /* DEAD GRAVE ACCENT */ 0x0300],
Digit8: [/* _ */ 0x005f, /* 8 */ 0x0038, /* \ */ 0x005c],
Digit9: [/* ç */ 0x00e7, /* 9 */ 0x0039, /* ^ */ 0x005e],
Digit0: [/* à */ 0x00e0, /* 0 */ 0x0030, /* @ */ 0x0040],
Minus: [/* ) */ 0x0029, /* ° */ 0x00b0, /* ] */ 0x005d],
Equal: [/* = */ 0x003d, /* + */ 0x002b, /* } */ 0x007d],
KeyQ: [/* a */ 0x0061, /* A */ 0x0041],
KeyW: [/* z */ 0x007a, /* Z */ 0x005a],
KeyE: [/* e */ 0x0065, /* E */ 0x0045, /* € */ 0x20ac],
KeyR: [/* r */ 0x0072, /* R */ 0x0052],
KeyT: [/* t */ 0x0074, /* T */ 0x0054],
KeyY: [/* y */ 0x0079, /* Y */ 0x0059],
KeyU: [/* u */ 0x0075, /* U */ 0x0055],
KeyI: [/* i */ 0x0069, /* I */ 0x0049],
KeyO: [/* o */ 0x006f, /* O */ 0x004f],
KeyP: [/* p */ 0x0070, /* P */ 0x0050],
BracketLeft: [/* DEAD CIRCUMFLEX ACCENT */ 0x0302, /* DEAD DIAERESIS */ 0x0308],
BracketRight: [/* $ */ 0x0024, /* £ */ 0x00a3, /* ¤ */ 0x00a4],
Backslash: [/* * */ 0x002a, /* µ */ 0x00b5],
KeyA: [/* q */ 0x0071, /* Q */ 0x0051],
KeyS: [/* s */ 0x0073, /* S */ 0x0053],
KeyD: [/* d */ 0x0064, /* D */ 0x0044],
KeyF: [/* f */ 0x0066, /* F */ 0x0046],
KeyG: [/* g */ 0x0067, /* G */ 0x0047],
KeyH: [/* h */ 0x0068, /* H */ 0x0048],
KeyJ: [/* j */ 0x006a, /* J */ 0x004a],
KeyK: [/* k */ 0x006b, /* K */ 0x004b],
KeyL: [/* l */ 0x006c, /* L */ 0x004c],
Semicolon: [/* m */ 0x006d, /* M */ 0x004d],
Quote: [/* ù */ 0x00f9, /* % */ 0x0025],
IntlBackslash: [/* < */ 0x003c, /* > */ 0x003e],
KeyZ: [/* w */ 0x0077, /* W */ 0x0057],
KeyX: [/* x */ 0x0078, /* X */ 0x0058],
KeyC: [/* c */ 0x0063, /* C */ 0x0043],
KeyV: [/* v */ 0x0076, /* V */ 0x0056],
KeyB: [/* b */ 0x0062, /* B */ 0x0042],
KeyN: [/* n */ 0x006e, /* N */ 0x004e],
KeyM: [/* , */ 0x002c, /* ? */ 0x003f],
Comma: [/* ; */ 0x003b, /* . */ 0x002e],
Period: [/* : */ 0x003a, /* / */ 0x002f],
Slash: [/* ! */ 0x0021, /* § */ 0x00a7],
};
55 changes: 55 additions & 0 deletions packages/keybr-keyboard/lib/data/layout/nl_nl-win.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Generated file, do not edit.

import { type CodePointDict } from "../../types.ts";

// prettier-ignore
export const LAYOUT_NL_NL_WIN: CodePointDict = {
Backquote: [/* ` */ 0x0060, /* ~ */ 0x007e],
Digit1: [/* 1 */ 0x0031, /* ! */ 0x0021],
Digit2: [/* 2 */ 0x0032, /* @ */ 0x0040],
Digit3: [/* 3 */ 0x0033, /* # */ 0x0023],
Digit4: [/* 4 */ 0x0034, /* $ */ 0x0024],
Digit5: [/* 5 */ 0x0035, /* % */ 0x0025],
Digit6: [/* 6 */ 0x0036, /* ^ */ 0x005e],
Digit7: [/* 7 */ 0x0037, /* & */ 0x0026],
Digit8: [/* 8 */ 0x0038, /* * */ 0x002a],
Digit9: [/* 9 */ 0x0039, /* ( */ 0x0028],
Digit0: [/* 0 */ 0x0030, /* ) */ 0x0029],
Minus: [/* - */ 0x002d, /* _ */ 0x005f],
Equal: [/* = */ 0x003d, /* + */ 0x002b],
KeyQ: [/* q */ 0x0071, /* Q */ 0x0051],
KeyW: [/* w */ 0x0077, /* W */ 0x0057],
KeyE: [/* e */ 0x0065, /* E */ 0x0045],
KeyR: [/* r */ 0x0072, /* R */ 0x0052],
KeyT: [/* t */ 0x0074, /* T */ 0x0054],
KeyY: [/* y */ 0x0079, /* Y */ 0x0059],
KeyU: [/* u */ 0x0075, /* U */ 0x0055],
KeyI: [/* i */ 0x0069, /* I */ 0x0049],
KeyO: [/* o */ 0x006f, /* O */ 0x004f],
KeyP: [/* p */ 0x0070, /* P */ 0x0050],
BracketLeft: [/* [ */ 0x005b, /* { */ 0x007b],
BracketRight: [/* ] */ 0x005d, /* } */ 0x007d],
Backslash: [/* \ */ 0x005c, /* | */ 0x007c],
KeyA: [/* a */ 0x0061, /* A */ 0x0041],
KeyS: [/* s */ 0x0073, /* S */ 0x0053],
KeyD: [/* d */ 0x0064, /* D */ 0x0044],
KeyF: [/* f */ 0x0066, /* F */ 0x0046],
KeyG: [/* g */ 0x0067, /* G */ 0x0047],
KeyH: [/* h */ 0x0068, /* H */ 0x0048],
KeyJ: [/* j */ 0x006a, /* J */ 0x004a],
KeyK: [/* k */ 0x006b, /* K */ 0x004b],
KeyL: [/* l */ 0x006c, /* L */ 0x004c],
Semicolon: [/* ; */ 0x003b, /* : */ 0x003a],
Quote: [/* ' */ 0x0027, /* " */ 0x0022],
IntlBackslash: [/* \ */ 0x005c, /* | */ 0x007c],
KeyZ: [/* z */ 0x007a, /* Z */ 0x005a],
KeyX: [/* x */ 0x0078, /* X */ 0x0058],
KeyC: [/* c */ 0x0063, /* C */ 0x0043],
KeyV: [/* v */ 0x0076, /* V */ 0x0056],
KeyB: [/* b */ 0x0062, /* B */ 0x0042],
KeyN: [/* n */ 0x006e, /* N */ 0x004e],
KeyM: [/* m */ 0x006d, /* M */ 0x004d],
Comma: [/* , */ 0x002c, /* < */ 0x003c],
Period: [/* . */ 0x002e, /* > */ 0x003e],
Slash: [/* / */ 0x002f, /* ? */ 0x003f],
};
4 changes: 4 additions & 0 deletions packages/keybr-keyboard/lib/load.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ import {
LAYOUT_HE_IL_WIN,
LAYOUT_IT_IT_WIN,
LAYOUT_JA_JP_JIS,
LAYOUT_NL_BE_WIN,
LAYOUT_NL_NL_WIN,
LAYOUT_PL_PL_WIN,
LAYOUT_PT_BR_WIN,
LAYOUT_PT_PT_WIN,
Expand Down Expand Up @@ -77,6 +79,8 @@ const layoutDict = new Map<Layout, CodePointDict>([
[Layout.FR_OPTIMOT_ERGO, LAYOUT_FR_OPTIMOT_ERGO],
[Layout.HE_IL, LAYOUT_HE_IL_WIN],
[Layout.IT_IT, LAYOUT_IT_IT_WIN],
[Layout.NL_BE, LAYOUT_NL_BE_WIN],
[Layout.NL_NL, LAYOUT_NL_NL_WIN],
[Layout.PL_PL, LAYOUT_PL_PL_WIN],
[Layout.PT_BR, LAYOUT_PT_BR_WIN],
[Layout.PT_PT, LAYOUT_PT_PT_WIN],
Expand Down
2 changes: 2 additions & 0 deletions packages/keybr-layout/lib/language.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export class Language implements EnumItem {
static readonly FR = new Language("fr", "latin", "ltr");
static readonly HE = new Language("he", "hebrew", "rtl");
static readonly IT = new Language("it", "latin", "ltr");
static readonly NL = new Language("nl", "latin", "ltr");
static readonly PL = new Language("pl", "latin", "ltr");
static readonly PT = new Language("pt", "latin", "ltr");
static readonly RU = new Language("ru", "cyrillic", "ltr");
Expand All @@ -27,6 +28,7 @@ export class Language implements EnumItem {
Language.FR,
Language.HE,
Language.IT,
Language.NL,
Language.PL,
Language.PT,
Language.RU,
Expand Down
32 changes: 32 additions & 0 deletions packages/keybr-layout/lib/layout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,36 @@ export class Layout implements XEnumItem {
Geometry.MATRIX,
),
);
static readonly NL_BE = new Layout(
/* id= */ "nl-azerty",
/* xid= */ 0x91,
/* name= */ "Belgium",
/* family= */ LayoutFamily.AZERTY,
/* language= */ Language.NL,
/* emulate= */ false,
/* geometries= */ new Enum(
Geometry.STANDARD_101,
Geometry.STANDARD_101_FULL,
Geometry.STANDARD_102,
Geometry.STANDARD_102_FULL,
Geometry.MATRIX,
),
);
static readonly NL_NL = new Layout(
/* id= */ "nl",
/* xid= */ 0x90,
/* name= */ "US",
/* family= */ LayoutFamily.QWERTY,
/* language= */ Language.NL,
/* emulate= */ false,
/* geometries= */ new Enum(
Geometry.STANDARD_101,
Geometry.STANDARD_101_FULL,
Geometry.STANDARD_102,
Geometry.STANDARD_102_FULL,
Geometry.MATRIX,
),
);
static readonly ES_ES = new Layout(
/* id= */ "es",
/* xid= */ 0x50,
Expand Down Expand Up @@ -514,6 +544,8 @@ export class Layout implements XEnumItem {
Layout.FR_OPTIMOT_ERGO,
Layout.HE_IL,
Layout.IT_IT,
Layout.NL_BE,
Layout.NL_NL,
Layout.ES_ES,
Layout.PL_PL,
Layout.PT_BR,
Expand Down
3 changes: 3 additions & 0 deletions packages/keybr-phonetic-model-loader/lib/assets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import langEsData from "@keybr/phonetic-model/assets/lang-es.data";
import langFrData from "@keybr/phonetic-model/assets/lang-fr.data";
import langHeData from "@keybr/phonetic-model/assets/lang-he.data";
import langItData from "@keybr/phonetic-model/assets/lang-it.data";
import langNlData from "@keybr/phonetic-model/assets/lang-nl.data";
import langPlData from "@keybr/phonetic-model/assets/lang-pl.data";
import langPtData from "@keybr/phonetic-model/assets/lang-pt.data";
import langRuData from "@keybr/phonetic-model/assets/lang-ru.data";
Expand Down Expand Up @@ -35,6 +36,8 @@ export function modelAssetPath(language: Language): string {
return langHeData;
case Language.IT:
return langItData;
case Language.NL:
return langNlData;
case Language.PL:
return langPlData;
case Language.PT:
Expand Down
Binary file added packages/keybr-phonetic-model/assets/lang-nl.data
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
2 changes: 2 additions & 0 deletions packages/keybr-phonetic-model/lib/blacklist/blacklist.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import blacklistEn from "./blacklist-en.json";
import blacklistEs from "./blacklist-es.json";
import blacklistFr from "./blacklist-fr.json";
import blacklistIt from "./blacklist-it.json";
import blacklistNl from "./blacklist-nl.json";
import blacklistPl from "./blacklist-pl.json";
import blacklistPt from "./blacklist-pt.json";
import blacklistRu from "./blacklist-ru.json";
Expand All @@ -25,6 +26,7 @@ const blacklistByLanguage = ((items: [Language, string[]][]) =>
[Language.ES, blacklistEs],
[Language.FR, blacklistFr],
[Language.IT, blacklistIt],
[Language.NL, blacklistNl],
[Language.PL, blacklistPl],
[Language.PT, blacklistPt],
[Language.RU, blacklistRu],
Expand Down
Binary file not shown.
1 change: 1 addition & 0 deletions packages/keybr-phonetic-model/lib/generate/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export const languages: readonly Language[] = [
{ id: "fr", alphabet: "aàbcçdeéèfghijlmnopqrstuvxyz" },
{ id: "he", alphabet: "אבגדהוזחטיכךלמםנןסעפףצץקרשת" },
{ id: "it", alphabet: "abcdefghijklmnopqrstuvwxyz" },
{ id: "nl", alphabet: "abcdefghijklmnopqrstuvwxyz" },
{ id: "pl", alphabet: "aąbcćdeęfghijklłmnńoóprsśtuwyzźż" },
{ id: "pt", alphabet: "aáâãàbcçdeéêfghiíjklmnoóôõpqrstuúvwxyz" },
{ id: "ru", alphabet: "абвгдежзийклмнопрстуфхцчшщъыьэюя" },
Expand Down

0 comments on commit c0926d6

Please sign in to comment.