diff --git a/.gitignore b/.gitignore index cf7c2bd..26e7826 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,4 @@ __pycache__/ *.py[cdo] *.egg-info/ *.so -.swiftpm/ diff --git a/Package.swift b/Package.swift index 47fee47..fcc0f5f 100644 --- a/Package.swift +++ b/Package.swift @@ -1,4 +1,4 @@ -// swift-tools-version: 5.8 +// swift-tools-version:5.1 import PackageDescription @@ -6,24 +6,29 @@ let package = Package( name: "uklatn", products: [ .library( - name: "UkrainianLatin", - targets: ["UkrainianLatin"]), - .executable( - name: "uklatn", - targets: ["cli"]), + name: "UKLatn", + targets: ["UKLatn"]), ], targets: [ .target( - name: "UkrainianLatin", - path: "swift/Sources/UkrainianLatin"), + name: "UKLatn", + dependencies: ["_uklatn"], + path: "swift/Sources/UKLatn"), + .target( + name: "_uklatn", + path: "swift/Sources/_uklatn", + cSettings: [ + .headerSearchPath("../../../c/include"), + ], + linkerSettings: [ + .linkedLibrary("icuuc"), + .linkedLibrary("icui18n"), + ]), .testTarget( name: "UKLatnTests", - dependencies: ["UkrainianLatin"], - path: "swift/Tests/UkrainianLatinTests"), - .executableTarget( - name: "cli", - dependencies: ["UkrainianLatin"], - path: "swift/Sources/cli"), + dependencies: ["UKLatn"], + path: "swift/Tests/UKLatnTests"), ], swiftLanguageVersions: [.v5] ) + diff --git a/readme.md b/readme.md index 04bd55d..c2dc688 100644 --- a/readme.md +++ b/readme.md @@ -5,7 +5,7 @@ Ukrainian Cyrillic transliteration to Latin script. [![standwithukraine](docs/StandWithUkraine.svg)](https://ukrainewar.carrd.co/) [![](https://github.com/paiv/uklatn/actions/workflows/test-builds.yml/badge.svg)](https://github.com/paiv/uklatn/actions) -[JavaScript](#javascript-package) | [Python](#python-module) | [C](c/) | [Swift](#swift-package) +[JavaScript](#javascript-package) | [Python](#python-module) | [C](#c-library) | [Swift](#swift-package) Supported transliteration schemes: - [DSTU 9112:2021](https://uk.wikipedia.org/wiki/ДСТУ_9112:2021) @@ -46,6 +46,11 @@ uklatn.decode("Paljanycja") ``` +C library +-- +- [uklatn C library](c/) + + Swift package -- - [uklatn Swift package](swift/) @@ -53,7 +58,11 @@ Swift package Add package dependency: ```sh swift package add-dependency 'https://github.com/paiv/uklatn.git' --from '1.0.0' -swift package add-target-dependency --package uklatn UkrainianLatin +``` + +Use in target dependencies in `Package.swift`: +```swift +.product(name: "UKLatn", package: "uklatn") ``` diff --git a/swift/Sources/UKLatn/UKLatn.swift b/swift/Sources/UKLatn/UKLatn.swift new file mode 100644 index 0000000..a7a66c6 --- /dev/null +++ b/swift/Sources/UKLatn/UKLatn.swift @@ -0,0 +1,43 @@ +import _uklatn + + +public struct UKLatnTable { + public static let DSTU_9112_A = Int(_uklatn.UklatnTable_DSTU_9112_A.rawValue) + public static let DSTU_9112_B = Int(_uklatn.UklatnTable_DSTU_9112_B.rawValue) + public static let KMU_55 = Int(_uklatn.UklatnTable_KMU_55.rawValue) +} + + +public enum UKLatnError: Error { + case failed(code: Int) +} + + +public func encode(_ text: String, table: Int = 0) throws -> String { + let n = text.utf8.count + let dst = UnsafeMutableBufferPointer.allocate(capacity: n * 3) + defer { + dst.deallocate() + } + let err = _uklatn.uklatn_encode(text, Int32(table), dst.baseAddress, Int32(dst.count)) + if err != 0 { + throw UKLatnError.failed(code: Int(err)) + } + let res = String(cString: dst.baseAddress!) + return res +} + + +public func decode(_ text: String, table: Int = 0) throws -> String { + let n = text.utf8.count + let dst = UnsafeMutableBufferPointer.allocate(capacity: n * 3) + defer { + dst.deallocate() + } + let err = _uklatn.uklatn_decode(text, Int32(table), dst.baseAddress, Int32(dst.count)) + if err != 0 { + throw UKLatnError.failed(code: Int(err)) + } + let res = String(cString: dst.baseAddress!) + return res +} diff --git a/swift/Sources/UkrainianLatin/UKLatn.swift b/swift/Sources/UkrainianLatin/UKLatn.swift deleted file mode 100644 index 0b6006d..0000000 --- a/swift/Sources/UkrainianLatin/UKLatn.swift +++ /dev/null @@ -1,215 +0,0 @@ -/* Generated by gentables.py, do not edit. */ - -/* uklatn - https://github.com/paiv/uklatn */ -import Foundation - - -public enum UKLatnError: Error { - case invalidTable(Int) -} - - -/// Transliterates a string of Ukrainian Cyrillic to Latin script. -/// -/// - Parameters: -/// - text: the text to transliterate -/// - table: transliteration system, one of: -/// - `DSTU_9112_A`: DSTU 9112:2021 System A -/// - `DSTU_9112_B`: DSTU 9112:2021 System B -/// - `KMU_55`: KMU 55:2010 -/// - Returns: The transliterated string. -public func encode(_ text: String, table: UKLatnTable = .DSTU_9112_A) throws -> String { - guard let transform = _UklatnTables[table]?.encode - else { - throw UKLatnError.invalidTable(table.rawValue) - } - return try transform(text) -} - - -/// Re-transliterates a string of Ukrainian Latin to Cyrillic script. -/// -/// - Parameters: -/// - text: the text to transliterate -/// - table: transliteration system, one of: -/// - `DSTU_9112_A`: DSTU 9112:2021 System A -/// - `DSTU_9112_B`: DSTU 9112:2021 System B -/// - Returns: The transliterated string. -public func decode(_ text: String, table: UKLatnTable = .DSTU_9112_A) throws -> String { - guard let transform = _UklatnTables[table]?.decode - else { - throw UKLatnError.invalidTable(table.rawValue) - } - return try transform(text) -} - - -private extension String { - - func replacing(_ rx: NSRegularExpression, with replacement: (Int,String) -> String) -> String { - var so = "" - rx.enumerateMatches(in: self, range: NSRange(startIndex ..< endIndex, in: self)) { (result: NSTextCheckingResult?, flags: NSRegularExpression.MatchingFlags, stop: UnsafeMutablePointer) in - if let result { - for i in 1.. String) - let encode: Transform? - let decode: Transform? -} - - -private let _Uklatn_uk_uk_Latn_DSTU_9112_A: () -> _UKLatnCodec.Transform = { - let _rx1 = try! NSRegularExpression(pattern: #"\b([Ьь])|([Ьь](?=[АаЕеУу])|[ЄЮЯ](?=\u0301?[а-щьюяєіїґ’])|(?<=[Б-ДЖЗК-НП-ТФ-Щб-джзк-нп-тф-щҐґ])[Йй])|([ЁЄІЇЎА-яёєіїўҐґ’])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) - let _maps1:[[String:String]] = [[:], - ["Ь":"Ĵ","ь":"ĵ"], - ["Ь":"J'","ь":"j'","Є":"Je","Ю":"Ju","Я":"Ja","Й":"'J","й":"'j"], - ["А":"A","а":"a","Б":"B","б":"b","В":"V","в":"v","Г":"Ğ","г":"ğ","Ґ":"G","ґ":"g","Д":"D","д":"d","Е":"E","е":"e","Є":"JE","є":"je","Ж":"Ž","ж":"ž","З":"Z","з":"z","И":"Y","и":"y","І":"I","і":"i","Ї":"Ï","ї":"ï","К":"K","к":"k","Л":"L","л":"l","М":"M","м":"m","Н":"N","н":"n","О":"O","о":"o","П":"P","п":"p","Р":"R","р":"r","С":"S","с":"s","Т":"T","т":"t","У":"U","у":"u","Ф":"F","ф":"f","Х":"X","х":"x","Ц":"C","ц":"c","Ч":"Č","ч":"č","Ш":"Š","ш":"š","Щ":"Ŝ","щ":"ŝ","Ю":"JU","ю":"ju","Я":"JA","я":"ja","Ь":"J","ь":"j","Й":"J","й":"j","’":"'","Ё":"Ö","ё":"ö","Ў":"Ŭ","ў":"ŭ","Ъ":"Ǒ","ъ":"ǒ","Ы":"Ȳ","ы":"ȳ","Э":"Ē","э":"ē"], - [:], - ] - func transform(_ text: String) throws -> String { - var text = text - text = text.precomposedStringWithCanonicalMapping // NFC - text = text.replacing(_rx1) { (i, match) in - _maps1[i][match] ?? match - } - text = text.precomposedStringWithCanonicalMapping // NFC - return text - } - return transform -} - -private let _Uklatn_uk_uk_Latn_DSTU_9112_B: () -> _UKLatnCodec.Transform = { - let _rx1 = try! NSRegularExpression(pattern: #"([Ьь](?=[АаЕеІіУу])|(?<=[Б-ДЖЗК-НП-ТФ-Щб-джзк-нп-тф-щҐґ])[Йй])|([ГЄЖЇХЩШЧЮЯЁЎЪЫЭ](?=\u0301?[а-яёєіїўґ’])|\b[Ьь])|([ЁЄІЇЎА-яёєіїўҐґ’])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) - let _maps1:[[String:String]] = [[:], - ["Ь":"J'","ь":"j'","Й":"'J","й":"'j"], - ["Г":"Gh","Є":"Je","Ж":"Zh","Ї":"Ji","Х":"Kh","Щ":"Shch","Ш":"Sh","Ч":"Ch","Ю":"Ju","Я":"Ja","Ё":"Jow","Ў":"Uh","Ъ":"Oh","Ы":"Yw","Э":"Ehw","Ь":"Hj","ь":"hj"], - ["А":"A","а":"a","Б":"B","б":"b","В":"V","в":"v","Г":"GH","г":"gh","Ґ":"G","ґ":"g","Д":"D","д":"d","Е":"E","е":"e","Є":"JE","є":"je","Ж":"ZH","ж":"zh","З":"Z","з":"z","И":"Y","и":"y","І":"I","і":"i","Ї":"JI","ї":"ji","Х":"KH","х":"kh","К":"K","к":"k","Л":"L","л":"l","М":"M","м":"m","Н":"N","н":"n","О":"O","о":"o","П":"P","п":"p","Р":"R","р":"r","Щ":"SHCH","щ":"shch","Ш":"SH","ш":"sh","С":"S","с":"s","Т":"T","т":"t","У":"U","у":"u","Ф":"F","ф":"f","Ч":"CH","ч":"ch","Ц":"C","ц":"c","Ю":"JU","ю":"ju","Я":"JA","я":"ja","Й":"J","й":"j","Ь":"J","ь":"j","’":"'","Ё":"JOW","ё":"jow","Ў":"UH","ў":"uh","Ъ":"OH","ъ":"oh","Ы":"YW","ы":"yw","Э":"EHW","э":"ehw"], - [:], - ] - func transform(_ text: String) throws -> String { - var text = text - text = text.precomposedStringWithCanonicalMapping // NFC - text = text.replacing(_rx1) { (i, match) in - _maps1[i][match] ?? match - } - text = text.precomposedStringWithCanonicalMapping // NFC - return text - } - return transform -} - -private let _Uklatn_uk_uk_Latn_KMU_55: () -> _UKLatnCodec.Transform = { - let _rx1 = try! NSRegularExpression(pattern: #"(?<=[ЁЄІЇЎА-яёєіїўҐґ])([’\u0027])(?=[ЁЄІЇЎА-яёєіїўҐґ])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) - let _maps1:[[String:String]] = [[:], - ["’":"","'":""], - [:], - ] - let _rx2 = try! NSRegularExpression(pattern: #"\b([ЄЇЮЯ])(?=\u0301?[а-яёєіїўґ’])|\b([ЙйЄЇЮЯєїюя])|([Зз]Г|[ЖХЦЩШЧЄЇЮЯ])(?=\u0301?[а-яёєіїўґ’])|([Зз][Гг]|[ЄІЇА-ЩЬЮ-щьюяєіїҐґ’])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) - let _maps2:[[String:String]] = [[:], - ["Є":"Ye","Ї":"Yi","Ю":"Yu","Я":"Ya"], - ["Й":"Y","й":"y","Є":"YE","є":"ye","Ї":"YI","ї":"yi","Ю":"YU","ю":"yu","Я":"YA","я":"ya"], - ["ЗГ":"ZGh","зГ":"zGh","Ж":"Zh","Х":"Kh","Ц":"Ts","Щ":"Shch","Ш":"Sh","Ч":"Ch","Є":"Ie","Ї":"I","Ю":"Iu","Я":"Ia"], - ["ЗГ":"ZGH","Зг":"Zgh","зГ":"zGH","зг":"zgh","А":"A","а":"a","Б":"B","б":"b","В":"V","в":"v","Г":"H","г":"h","Ґ":"G","ґ":"g","Д":"D","д":"d","Е":"E","е":"e","Є":"IE","є":"ie","Ж":"ZH","ж":"zh","З":"Z","з":"z","И":"Y","и":"y","І":"I","і":"i","Ї":"I","ї":"i","Х":"KH","х":"kh","К":"K","к":"k","Л":"L","л":"l","М":"M","м":"m","Н":"N","н":"n","О":"O","о":"o","П":"P","п":"p","Р":"R","р":"r","Щ":"SHCH","щ":"shch","Ш":"SH","ш":"sh","С":"S","с":"s","Т":"T","т":"t","У":"U","у":"u","Ф":"F","ф":"f","Ч":"CH","ч":"ch","Ц":"TS","ц":"ts","Ю":"IU","ю":"iu","Я":"IA","я":"ia","Й":"I","й":"i","Ь":"","ь":"","’":""], - [:], - ] - func transform(_ text: String) throws -> String { - var text = text - text = text.precomposedStringWithCanonicalMapping // NFC - text = text.replacing(_rx1) { (i, match) in - _maps1[i][match] ?? match - } - text = text.replacing(_rx2) { (i, match) in - _maps2[i][match] ?? match - } - text = text.precomposedStringWithCanonicalMapping // NFC - return text - } - return transform -} - -private let _Uklatn_uk_Latn_DSTU_9112_A_uk: () -> _UKLatnCodec.Transform = { - let _rx1 = try! NSRegularExpression(pattern: #"([ÁáÉéÍíÓóÚúÝýḮḯ])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) - let _maps1:[[String:String]] = [[:], - ["Á":"Á","á":"á","É":"É","é":"é","Í":"Í","í":"í","Ó":"Ó","ó":"ó","Ú":"Ú","ú":"ú","Ý":"Ý","ý":"ý","Ḯ":"Ḯ","ḯ":"ḯ"], - [:], - ] - let _rx2 = try! NSRegularExpression(pattern: #"(J[Ee]|j[Ee]|J[Uu]|j[Uu]|J[Aa]|j[Aa]|[A-GIK-PR-VXYZa-gik-pr-vxyzÏÖïöČčĒēĞğĴĵŜŝŠšŬŭŽžǑǒȲȳ])|(?<=[BbCcDdFfGgKkLlMmNnPpRrSsTtVvXxZzČčĞğŜŝŠšŽž])([Jj]\u0027(?=[AaEeUu])|[Jj])|(\u0027[Jj](?![AaEeIiUu])|\u0027(?=[Jj])|[Jj])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) - let _maps2:[[String:String]] = [[:], - ["A":"А","a":"а","B":"Б","b":"б","V":"В","v":"в","Ğ":"Г","ğ":"г","G":"Ґ","g":"ґ","D":"Д","d":"д","E":"Е","e":"е","JE":"Є","Je":"Є","jE":"є","je":"є","Ž":"Ж","ž":"ж","Z":"З","z":"з","Y":"И","y":"и","I":"І","i":"і","Ï":"Ї","ï":"ї","K":"К","k":"к","L":"Л","l":"л","M":"М","m":"м","N":"Н","n":"н","O":"О","o":"о","P":"П","p":"п","R":"Р","r":"р","S":"С","s":"с","T":"Т","t":"т","U":"У","u":"у","F":"Ф","f":"ф","X":"Х","x":"х","C":"Ц","c":"ц","Č":"Ч","č":"ч","Š":"Ш","š":"ш","Ŝ":"Щ","ŝ":"щ","JU":"Ю","Ju":"Ю","jU":"ю","ju":"ю","JA":"Я","Ja":"Я","jA":"я","ja":"я","Ĵ":"Ь","ĵ":"ь","Ö":"Ё","ö":"ё","Ŭ":"Ў","ŭ":"ў","Ǒ":"Ъ","ǒ":"ъ","Ȳ":"Ы","ȳ":"ы","Ē":"Э","ē":"э"], - ["J":"Ь","j":"ь","J'":"Ь","j'":"ь"], - ["'J":"Й","'j":"й","'":"’","J":"Й","j":"й"], - [:], - ] - func transform(_ text: String) throws -> String { - var text = text - text = text.precomposedStringWithCanonicalMapping // NFC - text = text.replacing(_rx1) { (i, match) in - _maps1[i][match] ?? match - } - text = text.replacing(_rx2) { (i, match) in - _maps2[i][match] ?? match - } - text = text.precomposedStringWithCanonicalMapping // NFC - return text - } - return transform -} - -private let _Uklatn_uk_Latn_DSTU_9112_B_uk: () -> _UKLatnCodec.Transform = { - let _rx1 = try! NSRegularExpression(pattern: #"([ÁáÉéÍíÓóÚúÝý])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) - let _maps1:[[String:String]] = [[:], - ["Á":"Á","á":"á","É":"É","é":"é","Í":"Í","í":"í","Ó":"Ó","ó":"ó","Ú":"Ú","ú":"ú","Ý":"Ý","ý":"ý"], - [:], - ] - let _rx2 = try! NSRegularExpression(pattern: #"([Jj][Oo][Ww]|[Ss][Hh][Cc][Hh]|[CcGgKkSsZzUuOo][Hh]|[Yy][Ww]|[Ee][Hh][Ww]|[Jj][EeIiUuAa]|[Hh][Jj]|[A-GIK-PR-VYZa-gik-pr-vyz])|(?<=[Ss][Hh][Cc][Hh])([Jj]\u0027(?=[AaEeIiUu])|[Jj])|(?<=[CcGgKkSsZz][Hh])([Jj]\u0027(?=[AaEeIiUu])|[Jj])|(?<=[BCDFGKLMNPRSTVZbcdfgklmnprstvzv])([Jj]\u0027(?=[AaEeIiUu])|[Jj])|(\u0027[Jj](?![AaEeIiUu])|\u0027(?=[Jj])|[Jj])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) - let _maps2:[[String:String]] = [[:], - ["A":"А","a":"а","B":"Б","b":"б","V":"В","v":"в","GH":"Г","Gh":"Г","gH":"г","gh":"г","G":"Ґ","g":"ґ","D":"Д","d":"д","E":"Е","e":"е","JE":"Є","Je":"Є","jE":"є","je":"є","ZH":"Ж","Zh":"Ж","zH":"ж","zh":"ж","Z":"З","z":"з","Y":"И","y":"и","I":"І","i":"і","JI":"Ї","Ji":"Ї","jI":"ї","ji":"ї","KH":"Х","Kh":"Х","kH":"х","kh":"х","K":"К","k":"к","L":"Л","l":"л","M":"М","m":"м","N":"Н","n":"н","O":"О","o":"о","P":"П","p":"п","R":"Р","r":"р","SHCH":"Щ","SHCh":"Щ","SHcH":"Щ","SHch":"Щ","ShCH":"Щ","ShCh":"Щ","ShcH":"Щ","Shch":"Щ","sHCH":"щ","sHCh":"щ","sHcH":"щ","sHch":"щ","shCH":"щ","shCh":"щ","shcH":"щ","shch":"щ","SH":"Ш","Sh":"Ш","sH":"ш","sh":"ш","S":"С","s":"с","T":"Т","t":"т","U":"У","u":"у","F":"Ф","f":"ф","CH":"Ч","Ch":"Ч","cH":"ч","ch":"ч","C":"Ц","c":"ц","JU":"Ю","Ju":"Ю","jU":"ю","ju":"ю","JA":"Я","Ja":"Я","jA":"я","ja":"я","HJ":"Ь","Hj":"Ь","hJ":"ь","hj":"ь","JOW":"Ё","JOw":"Ё","JoW":"Ё","Jow":"Ё","jOW":"ё","jOw":"ё","joW":"ё","jow":"ё","UH":"Ў","Uh":"Ў","uH":"ў","uh":"ў","OH":"Ъ","Oh":"Ъ","oH":"ъ","oh":"ъ","YW":"Ы","Yw":"Ы","yW":"ы","yw":"ы","EHW":"Э","EHw":"Э","EhW":"Э","Ehw":"Э","eHW":"э","eHw":"э","ehW":"э","ehw":"э"], - ["J":"Ь","j":"ь","J'":"Ь","j'":"ь"], - ["J":"Ь","j":"ь","J'":"Ь","j'":"ь"], - ["J":"Ь","j":"ь","J'":"Ь","j'":"ь"], - ["'J":"Й","'j":"й","'":"’","J":"Й","j":"й"], - [:], - ] - func transform(_ text: String) throws -> String { - var text = text - text = text.precomposedStringWithCanonicalMapping // NFC - text = text.replacing(_rx1) { (i, match) in - _maps1[i][match] ?? match - } - text = text.replacing(_rx2) { (i, match) in - _maps2[i][match] ?? match - } - text = text.precomposedStringWithCanonicalMapping // NFC - return text - } - return transform -} - -private let _UklatnTables: [UKLatnTable:_UKLatnCodec] = [ - .DSTU_9112_A: _UKLatnCodec(encode: _Uklatn_uk_uk_Latn_DSTU_9112_A(), decode: _Uklatn_uk_Latn_DSTU_9112_A_uk()), - .DSTU_9112_B: _UKLatnCodec(encode: _Uklatn_uk_uk_Latn_DSTU_9112_B(), decode: _Uklatn_uk_Latn_DSTU_9112_B_uk()), - .KMU_55: _UKLatnCodec(encode: _Uklatn_uk_uk_Latn_KMU_55(), decode: nil), -] diff --git a/swift/Sources/_uklatn/_uklatn.c b/swift/Sources/_uklatn/_uklatn.c new file mode 100644 index 0000000..3ba3461 --- /dev/null +++ b/swift/Sources/_uklatn/_uklatn.c @@ -0,0 +1,2 @@ +#include "include/_uklatn/_uklatn.h" +#include "../../../c/uklatn.c" diff --git a/swift/Sources/_uklatn/include/_uklatn/_uklatn.h b/swift/Sources/_uklatn/include/_uklatn/_uklatn.h new file mode 100644 index 0000000..9290d6a --- /dev/null +++ b/swift/Sources/_uklatn/include/_uklatn/_uklatn.h @@ -0,0 +1,3 @@ +#pragma once +#include "../../../../../c/include/uklatn.h" + diff --git a/swift/Sources/cli/uklatn.swift b/swift/Sources/cli/uklatn.swift deleted file mode 100644 index c912ade..0000000 --- a/swift/Sources/cli/uklatn.swift +++ /dev/null @@ -1,211 +0,0 @@ -import Foundation -import UkrainianLatin - - -@main -struct MyApp { - - static func main() { - var stderr = ErrorStream() - - do { - let args = try AppArgs.parse(CommandLine.arguments) - if args.printHelp { - AppArgs.printHelp() - } - else if let file = args.file { - try transformFile(file, direction: args.direction, table: args.table) - } - else if let text = args.text { - try transformText(text, direction: args.direction, table: args.table) - } - } - catch let error as AppArgs.ParseError { - AppArgs.printError(error, to: &stderr) - } - catch UKLatnError.invalidTable(let table) { - AppArgs.printError(.invalidTable("\(table)"), to: &stderr) - } - catch { - print(error, to: &stderr) - } - } - - private static func transformText(_ text: String, direction: AppArgs.TransformDirection, table: UKLatnTable) throws { - let value: String - switch direction { - case .cyr2lat: - value = try encode(text, table: table) - case .lat2cyr: - value = try decode(text, table: table) - } - print("\(value)") - } - - private static func transformFile(_ file: String, direction: AppArgs.TransformDirection, table: UKLatnTable) throws { - if file == "-" { - while let text = readLine() { - try transformText(text, direction: direction, table: table) - } - } - else { - var encoding: String.Encoding = .utf8 - let text = try String(contentsOfFile: file, usedEncoding: &encoding) - try transformText(text, direction: direction, table: table) - } - } -} - - -private struct AppArgs { - var executable: String = "" - var printHelp: Bool = false - var text: String? - var file: String? - var table: UKLatnTable = .DSTU_9112_A - var direction: TransformDirection = .cyr2lat - - static let _usage = "usage: uklatn [-h] [-t TABLE] [-c] [-l] [-f FILE] [text ...]" - - static let _help = _usage + - """ - - - arguments: - text text to transliterate - - options: - -h, --help show this help message and exit - -t, --table {DSTU_9112_A,DSTU_9112_B,KMU_55} - transliteration system (default: DSTU_9112_A) - -l, --lat, --latin convert to Latin script (default) - -c, --cyr, --cyrillic convert to Cyrillic script - -f, --file FILE read text from file - """ - - enum TransformDirection { - case cyr2lat - case lat2cyr - } - - enum ParseError: Error { - case unknownArgument(String) - case invalidTable(String) - case missingTableValue - case missingFileValue - case missingRequiredTextOrFile - } - - static func printUsage(to output: inout S) where S:TextOutputStream { - print("\(_usage)", to: &output) - } - - static func printHelp() { - print("\(_help)") - } - - static func printHelp(to output: inout S) where S:TextOutputStream { - print("\(_help)", to: &output) - } - - static func printError(_ error: ParseError, to output: inout S) where S:TextOutputStream { - let message: String - switch error { - case .unknownArgument(let arg): - message = "unrecognized arguments: \(arg)" - case .invalidTable(let table): - message = "invalid table: \(table)" - case .missingTableValue: - message = "argument -t/--table expected table name" - case .missingFileValue: - message = "argument -f/--file expected file name" - case .missingRequiredTextOrFile: - message = "missing required arguments: text or file" - } - printUsage(to: &output) - print("error: \(message)", to: &output) - } - - static func parse(_ argv: [String]) throws -> AppArgs { - var args = AppArgs() - args.executable = argv[0] - var state: Int = 0 - - for iarg in 1 ..< argv.count { - let arg = argv[iarg] - switch state { - - case 0: - if arg.first == "-" { - switch arg { - case "-h", "-help", "--help": - args.printHelp = true - return args - case "-c", "--cyr", "--cyrillic": - args.direction = .lat2cyr - case "-l", "--lat", "--latin": - args.direction = .cyr2lat - case "-t", "--table": - state = 1 - case "-f", "--file": - state = 2 - default: - throw ParseError.unknownArgument(arg) - } - } - else if !arg.isEmpty { - args.text = args.text.map({ $0 + " " + arg }) ?? arg - } - - case 1: - if let table = parseTable(arg) { - args.table = table - state = 0 - } - else { - throw ParseError.invalidTable(arg) - } - - case 2: - args.file = arg - state = 0 - - default: - fatalError() - } - } - - switch state { - case 1: - throw ParseError.missingTableValue - case 2: - throw ParseError.missingFileValue - default: - break - } - - if args.text == nil && args.file == nil { - throw ParseError.missingRequiredTextOrFile - } - - return args - } - - private static func parseTable(_ value: String) -> UKLatnTable? { - switch value { - case "DSTU_9112_A": return .DSTU_9112_A - case "DSTU_9112_B": return .DSTU_9112_B - case "KMU_55": return .KMU_55 - default: return nil - } - } -} - - -private struct ErrorStream: TextOutputStream { - func write(_ text: String) { - if let data = text.data(using: .utf8) { - FileHandle.standardError.write(data) - } - } -} diff --git a/swift/Tests/UKLatnTests/UKLatnTests.swift b/swift/Tests/UKLatnTests/UKLatnTests.swift new file mode 100644 index 0000000..3488803 --- /dev/null +++ b/swift/Tests/UKLatnTests/UKLatnTests.swift @@ -0,0 +1,31 @@ +import Testing +@testable import UKLatn + + +@Test func encode_DSTU_A() async throws { + let cyr = "Доброго вечора, ми з України!" + let lat = try encode(cyr) + #expect(lat == "Dobroğo večora, my z Ukraïny!") + let t = try decode(lat) + #expect(t == cyr) +} + + +@Test func encode_DSTU_B() async throws { + let cyr = "Доброго вечора, ми з України!" + let lat = try encode(cyr, table: UKLatnTable.DSTU_9112_B) + #expect(lat == "Dobrogho vechora, my z Ukrajiny!") + let t = try decode(lat, table: UKLatnTable.DSTU_9112_B) + #expect(t == cyr) +} + + +@Test func encode_KMU() async throws { + let cyr = "Доброго вечора, ми з України!" + let lat = try encode(cyr, table: UKLatnTable.KMU_55) + #expect(lat == "Dobroho vechora, my z Ukrainy!") + #expect(throws: UKLatnError.self) { + try decode(lat, table: UKLatnTable.KMU_55) + } +} + diff --git a/swift/Tests/UkrainianLatinTests/UKLatnTests.swift b/swift/Tests/UkrainianLatinTests/UKLatnTests.swift deleted file mode 100644 index 01b3d55..0000000 --- a/swift/Tests/UkrainianLatinTests/UKLatnTests.swift +++ /dev/null @@ -1,329 +0,0 @@ -/* Generated by gentests.py, do not edit. */ - -import Testing -@testable import UkrainianLatin - - -@Suite("DSTU_9112_A") struct Dstu9112ATests { - - @Test func c2lr_DSTU_9112_A() throws { - let data: [(String, String)] = [ - ( - "Україна, Хмельницький", - "Ukraïna, Xmeljnycjkyj" - ), - ( - "Щастям б’єш жук їх глицю в фон й ґедзь пріч.", - "Ŝastjam b'ješ žuk ïx ğlycju v fon j gedzj prič." - ), - ( - "ь Ь ль льє льї лью лья лье льі льу льа льйо льо", - "ĵ Ĵ lj ljje ljï ljju ljja lj'e lji lj'u lj'a ljjo ljo" - ), - ( - "Єл Їл Юл Ял", - "Jel Ïl Jul Jal" - ), - ( - "бь вь гь ґь дь жь зь кь ль мь нь пь рь сь ть фь хь ць чь шь щь", - "bj vj ğj gj dj žj zj kj lj mj nj pj rj sj tj fj xj cj čj šj ŝj" - ), - ( - "бя вя гя ґя дя жя зя кя ля мя ня пя ря ся тя фя хя ця чя шя щя", - "bja vja ğja gja dja žja zja kja lja mja nja pja rja sja tja fja xja cja čja šja ŝja" - ), - ( - "б’я в’я г’я ґ’я д’я ж’я з’я к’я л’я м’я н’я п’я р’я с’я т’я ф’я х’я ц’я ч’я ш’я щ’я", - "b'ja v'ja ğ'ja g'ja d'ja ž'ja z'ja k'ja l'ja m'ja n'ja p'ja r'ja s'ja t'ja f'ja x'ja c'ja č'ja š'ja ŝ'ja" - ), - ( - "бй бйо вй гй ґй дй жй зй кй лй мй нй пй рй сй тй фй хй цй чй шй щй", - "b'j b'jo v'j ğ'j g'j d'j ž'j z'j k'j l'j m'j n'j p'j r'j s'j t'j f'j x'j c'j č'j š'j ŝ'j" - ), - ( - "ня ньа н’я нь'н ньн", - "nja nj'a n'ja nj'n njn" - ), - ( - "рос дыня эзёдынъ. бр кроў.", - "ros dȳnja ēzödȳnǒ. br kroŭ." - ), - ( - "А́ а́ Е́ е́ Є́ є́ И́ и́ І́ і́ Ї́ ї́ О́ о́ У́ у́ Ю́ ю́ Я́ я́", - "Á á É é JÉ jé Ý ý Í í Ḯ ḯ Ó ó Ú ú JÚ jú JÁ já" - ), - ( - "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", - "Jés sJés jés sjés Ḯs sḮs ḯs sḯs Jús sJús jús sjús Jás sJás jás sjás" - ), - ( - "' ім’я 'жук' \"жук\" ' '", - "' im'ja 'žuk' \"žuk\" ' '" - ), - ( - "Сонце світить майже білим світлом, однак через сильніше розсіювання і поглинання короткохвильової частини спектра атмосферою Землі пряме світло Сонця біля поверхні нашої планети набуває певного жовтого відтінку. Якщо небо ясне, то блакитний відтінок розсіяного світла складається з жовтуватим прямим сонячним світлом і загальне освітлення об’єктів на Землі стає білим.", - "Sonce svitytj majže bilym svitlom, odnak čerez syljniše rozsijuvannja i poğlynannja korotkoxvyljovoï častyny spektra atmosferoju Zemli prjame svitlo Soncja bilja poverxni našoï planety nabuvaje pevnoğo žovtoğo vidtinku. Jakŝo nebo jasne, to blakytnyj vidtinok rozsijanoğo svitla skladajetjsja z žovtuvatym prjamym sonjačnym svitlom i zağaljne osvitlennja ob'jektiv na Zemli staje bilym." - ), - ( - "дуб!дуб\"дуб#дуб$дуб%дуб&дуб'дуб(дуб)дуб*дуб+дуб,дуб-дуб.дуб/дуб:дуб;дуб<дуб=дуб>дуб?дуб@дуб[дуб\\дуб]дуб^дуб_дуб`дуб{дуб|дуб}дуб~дуб", - "dub!dub\"dub#dub$dub%dub&dub'dub(dub)dub*dub+dub,dub-dub.dub/dub:dub;dubdub?dub@dub[dub\\dub]dub^dub_dub`dub{dub|dub}dub~dub" - ), - ( - "бод бод\tбод\nбод\rбод", - "bod bod\tbod\nbod\rbod" - ) ] - - for (cyr,lat) in data { - let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_A) - #expect(lat == enc) - let dec = try decode(lat, table: UKLatnTable.DSTU_9112_A) - #expect(cyr == dec) - } - } - - @Test func c2l_DSTU_9112_A() throws { - let data: [(String, String)] = [ - ( - "в’я в'я", - "v'ja v'ja" - ), - ( - "Ї ї Й й Ё ё Ў ў", - "Ï ï J j Ö ö Ŭ ŭ" - ) ] - - for (cyr,lat) in data { - let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_A) - #expect(lat == enc) - } - } - - @Test func l2c_DSTU_9112_A() throws { - let data: [(String, String)] = [ - ( - "я є ю", - "jA jE jU" - ), - ( - "Ї ї Ь ь Ч ч Г г Щ щ Ш ш Ж ж", - "Ï ï Ĵ ĵ Č č Ğ ğ Ŝ ŝ Š š Ž ž" - ), - ( - "Ё ё Ў ў Ъ ъ Ы ы Э э", - "Ö ö Ŭ ŭ Ǒ ǒ Ȳ ȳ Ē ē" - ), - ( - "А́ а́ Е́ е́ Є́ Є́ є́ є́ И́ и́ І́ і́ Ї́ ї́ О́ о́ У́ у́ Ю́ Ю́ ю́ ю́ Я́ Я́ я́ я́", - "Á á É é JÉ Jé jÉ jé Ý ý Í í Ḯ ḯ Ó ó Ú ú JÚ Jú jÚ jú JÁ Já jÁ já" - ), - ( - "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", - "Jés sJés jés sjés Ḯs sḮs ḯs sḯs Jús sJús jús sjús Jás sJás jás sjás" - ) ] - - for (cyr,lat) in data { - let dec = try decode(lat, table: UKLatnTable.DSTU_9112_A) - #expect(cyr == dec) - } - } -} - - -@Suite("DSTU_9112_B") struct Dstu9112BTests { - - @Test func c2lr_DSTU_9112_B() throws { - let data: [(String, String)] = [ - ( - "Україна, Хмельницький", - "Ukrajina, Khmeljnycjkyj" - ), - ( - "Щастям б’єш жук їх глицю в фон й ґедзь пріч.", - "Shchastjam b'jesh zhuk jikh ghlycju v fon j gedzj prich." - ), - ( - "ь Ь ль льє льї лью лья лье льі льу льа льйо льо", - "hj Hj lj ljje ljji ljju ljja lj'e lj'i lj'u lj'a ljjo ljo" - ), - ( - "Єл Їл Юл Ял", - "Jel Jil Jul Jal" - ), - ( - "бь вь гь ґь дь жь зь кь ль мь нь пь рь сь ть фь хь ць чь шь щь", - "bj vj ghj gj dj zhj zj kj lj mj nj pj rj sj tj fj khj cj chj shj shchj" - ), - ( - "бя вя гя ґя дя жя зя кя ля мя ня пя ря ся тя фя хя ця чя шя щя", - "bja vja ghja gja dja zhja zja kja lja mja nja pja rja sja tja fja khja cja chja shja shchja" - ), - ( - "б’я в’я г’я ґ’я д’я ж’я з’я к’я л’я м’я н’я п’я р’я с’я т’я ф’я х’я ц’я ч’я ш’я щ’я", - "b'ja v'ja gh'ja g'ja d'ja zh'ja z'ja k'ja l'ja m'ja n'ja p'ja r'ja s'ja t'ja f'ja kh'ja c'ja ch'ja sh'ja shch'ja" - ), - ( - "бй бйо вй гй ґй дй жй зй кй лй мй нй пй рй сй тй фй хй цй чй шй щй", - "b'j b'jo v'j gh'j g'j d'j zh'j z'j k'j l'j m'j n'j p'j r'j s'j t'j f'j kh'j c'j ch'j sh'j shch'j" - ), - ( - "ня ньа н’я нь'н ньн", - "nja nj'a n'ja nj'n njn" - ), - ( - "рос дыня эзёдынъ. бр кроў.", - "ros dywnja ehwzjowdywnoh. br krouh." - ), - ( - "А́ а́ Е́ е́ Є́ є́ И́ и́ І́ і́ Ї́ ї́ О́ о́ У́ у́ Ю́ ю́ Я́ я́", - "Á á É é JÉ jé Ý ý Í í JÍ jí Ó ó Ú ú JÚ jú JÁ já" - ), - ( - "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", - "Jés sJés jés sjés Jís sJís jís sjís Jús sJús jús sjús Jás sJás jás sjás" - ), - ( - "' ім’я 'жук' \"жук\" ' '", - "' im'ja 'zhuk' \"zhuk\" ' '" - ), - ( - "Сонце світить майже білим світлом, однак через сильніше розсіювання і поглинання короткохвильової частини спектра атмосферою Землі пряме світло Сонця біля поверхні нашої планети набуває певного жовтого відтінку. Якщо небо ясне, то блакитний відтінок розсіяного світла складається з жовтуватим прямим сонячним світлом і загальне освітлення об’єктів на Землі стає білим.", - "Sonce svitytj majzhe bilym svitlom, odnak cherez syljnishe rozsijuvannja i poghlynannja korotkokhvyljovoji chastyny spektra atmosferoju Zemli prjame svitlo Soncja bilja poverkhni nashoji planety nabuvaje pevnogho zhovtogho vidtinku. Jakshcho nebo jasne, to blakytnyj vidtinok rozsijanogho svitla skladajetjsja z zhovtuvatym prjamym sonjachnym svitlom i zaghaljne osvitlennja ob'jektiv na Zemli staje bilym." - ), - ( - "дуб!дуб\"дуб#дуб$дуб%дуб&дуб'дуб(дуб)дуб*дуб+дуб,дуб-дуб.дуб/дуб:дуб;дуб<дуб=дуб>дуб?дуб@дуб[дуб\\дуб]дуб^дуб_дуб`дуб{дуб|дуб}дуб~дуб", - "dub!dub\"dub#dub$dub%dub&dub'dub(dub)dub*dub+dub,dub-dub.dub/dub:dub;dubdub?dub@dub[dub\\dub]dub^dub_dub`dub{dub|dub}dub~dub" - ), - ( - "бод бод\tбод\nбод\rбод", - "bod bod\tbod\nbod\rbod" - ) ] - - for (cyr,lat) in data { - let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_B) - #expect(lat == enc) - let dec = try decode(lat, table: UKLatnTable.DSTU_9112_B) - #expect(cyr == dec) - } - } - - @Test func c2l_DSTU_9112_B() throws { - let data: [(String, String)] = [ - ( - "в’я в'я", - "v'ja v'ja" - ), - ( - "Ї ї Й й Ё ё Ў ў", - "JI ji J j JOW jow UH uh" - ) ] - - for (cyr,lat) in data { - let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_B) - #expect(lat == enc) - } - } - - @Test func l2c_DSTU_9112_B() throws { - let data: [(String, String)] = [ - ( - "я ї є ю г ж х щ ш ч ь", - "jA jI jE jU gH zH kH sHcH sH cH hJ" - ), - ( - "А́ а́ Е́ е́ Є́ Є́ є́ є́ И́ и́ І́ і́ Ї́ Ї́ ї́ ї́ О́ о́ У́ у́ Ю́ Ю́ ю́ ю́ Я́ Я́ я́ я́", - "Á á É é JÉ Jé jÉ jé Ý ý Í í JÍ Jí jÍ jí Ó ó Ú ú JÚ Jú jÚ jú JÁ Já jÁ já" - ), - ( - "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", - "Jés sJés jés sjés Jís sJís jís sjís Jús sJús jús sjús Jás sJás jás sjás" - ) ] - - for (cyr,lat) in data { - let dec = try decode(lat, table: UKLatnTable.DSTU_9112_B) - #expect(cyr == dec) - } - } -} - - -@Suite("KMU_55") struct Kmu55Tests { - - @Test func c2l_KMU_55() throws { - let data: [(String, String)] = [ - ( - "Україна, Хмельницький", - "Ukraina, Khmelnytskyi" - ), - ( - "Щастям б’єш жук їх глицю в фон й ґедзь пріч.", - "Shchastiam biesh zhuk yikh hlytsiu v fon y gedz prich." - ), - ( - "згин зГ зГин Згин Зг ЗГ ЗГИН", - "zghyn zGH zGhyn Zghyn Zgh ZGH ZGHYN" - ), - ( - "ь Ь ль льє льї лью лья лье льі льу льа льйо льо", - " l lie li liu lia le li lu la lio lo" - ), - ( - "Єл Їл Юл Ял", - "Yel Yil Yul Yal" - ), - ( - "бь вь гь ґь дь жь зь кь ль мь нь пь рь сь ть фь хь ць чь шь щь", - "b v h g d zh z k l m n p r s t f kh ts ch sh shch" - ), - ( - "бя вя гя ґя дя жя зя кя ля мя ня пя ря ся тя фя хя ця чя шя щя", - "bia via hia gia dia zhia zia kia lia mia nia pia ria sia tia fia khia tsia chia shia shchia" - ), - ( - "б’я в’я г’я ґ’я д’я ж’я з’я к’я л’я м’я н’я п’я р’я с’я т’я ф’я х’я ц’я ч’я ш’я щ’я", - "bia via hia gia dia zhia zia kia lia mia nia pia ria sia tia fia khia tsia chia shia shchia" - ), - ( - "бй бйо вй гй ґй дй жй зй кй лй мй нй пй рй сй тй фй хй цй чй шй щй", - "bi bio vi hi gi di zhi zi ki li mi ni pi ri si ti fi khi tsi chi shi shchi" - ), - ( - "А́ а́ Е́ е́ Є́ є́ И́ и́ І́ і́ Ї́ ї́ О́ о́ У́ у́ Ю́ ю́ Я́ я́", - "Á á É é YÉ yé Ý ý Í í YÍ yí Ó ó Ú ú YÚ yú YÁ yá" - ), - ( - "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", - "Yés sIés yés siés Yís sÍs yís sís Yús sIús yús siús Yás sIás yás siás" - ), - ( - "' ім’я 'жук' \"жук\" ' '", - "' imia 'zhuk' \"zhuk\" ' '" - ), - ( - "Сонце світить майже білим світлом, однак через сильніше розсіювання і поглинання короткохвильової частини спектра атмосферою Землі пряме світло Сонця біля поверхні нашої планети набуває певного жовтого відтінку. Якщо небо ясне, то блакитний відтінок розсіяного світла складається з жовтуватим прямим сонячним світлом і загальне освітлення об’єктів на Землі стає білим.", - "Sontse svityt maizhe bilym svitlom, odnak cherez sylnishe rozsiiuvannia i pohlynannia korotkokhvylovoi chastyny spektra atmosferoiu Zemli priame svitlo Sontsia bilia poverkhni nashoi planety nabuvaie pevnoho zhovtoho vidtinku. Yakshcho nebo yasne, to blakytnyi vidtinok rozsiianoho svitla skladaietsia z zhovtuvatym priamym soniachnym svitlom i zahalne osvitlennia obiektiv na Zemli staie bilym." - ), - ( - "в’я в'я", - "via via" - ), - ( - "дуб!дуб\"дуб#дуб$дуб%дуб&дуб'дуб(дуб)дуб*дуб+дуб,дуб-дуб.дуб/дуб:дуб;дуб<дуб=дуб>дуб?дуб@дуб[дуб\\дуб]дуб^дуб_дуб`дуб{дуб|дуб}дуб~дуб", - "dub!dub\"dub#dub$dub%dub&dubdub(dub)dub*dub+dub,dub-dub.dub/dub:dub;dubdub?dub@dub[dub\\dub]dub^dub_dub`dub{dub|dub}dub~dub" - ), - ( - "бод бод\tбод\nбод\rбод", - "bod bod\tbod\nbod\rbod" - ) ] - - for (cyr,lat) in data { - let enc = try encode(cyr, table: UKLatnTable.KMU_55) - #expect(lat == enc) - } - } - - @Test func decode_KMU_55_throws() throws { - #expect(throws: UKLatnError.self) { - try decode("lat", table: UKLatnTable.KMU_55) - } - } -} diff --git a/swift/readme.md b/swift/readme.md index 2c78063..19da872 100644 --- a/swift/readme.md +++ b/swift/readme.md @@ -2,15 +2,10 @@ uklatn == Ukrainian Cyrillic transliteration to Latin script. -Supported transliteration schemes: -- [DSTU 9112:2021](https://uk.wikipedia.org/wiki/ДСТУ_9112:2021) -- [KMU 55:2010](https://zakon.rada.gov.ua/laws/show/55-2010-п) - - ```swift -import UkrainianLatin +import UKLatn let s = try! encode("Доброго вечора!") -let t = try! decode("Paljanycja") +let t = try! decocde("Paljanycja") print(s, t) ``` @@ -19,36 +14,3 @@ Select a transliteration scheme: try encode("Борщ", table: UKLatnTable.DSTU_9112_A) ``` - -Notes --- -Input is assumed to be in Ukrainian (Cyrillic or Latin script), and will be processed in full. -If your data has mixed languages, do preprocessing to extract Ukrainian chunks. - - -Command-line executable --- - -```sh -uklatn 'моє щастя' -``` - -Running executable from a package: -```sh -swift run uklatn 'моє щастя' -``` - -```txt -usage: uklatn [-h] [-t TABLE] [-c] [-l] [-f FILE] [text ...] - -arguments: - text text to transliterate - -options: - -h, --help show this help message and exit - -t, --table {DSTU_9112_A,DSTU_9112_B,KMU_55} - transliteration system (default: DSTU_9112_A) - -l, --lat, --latin convert to Latin script (default) - -c, --cyr, --cyrillic convert to Cyrillic script - -f, --file FILE read text from file -``` diff --git a/tools/Makefile b/tools/Makefile index c316ffc..f1d9227 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -2,11 +2,10 @@ .PHONY: c .PHONY: js .PHONY: py -.PHONY: swift PYTHON ?= python -all: c js py swift +all: c js py c: @echo "/* Generated by gentables.py, do not edit. */\n" > ../c/_tables.c @@ -26,9 +25,3 @@ py: @echo "# Generated by gentests.py, do not edit.\n" > ../python/tests/uklatn_tests.py @$(PYTHON) gentests.py py >> ../python/tests/uklatn_tests.py -swift: - @echo "/* Generated by gentables.py, do not edit. */\n" > ../swift/Sources/UkrainianLatin/UKLatn.swift - @$(PYTHON) gentables.py swift >> ../swift/Sources/UkrainianLatin/UKLatn.swift - @echo "/* Generated by gentests.py, do not edit. */\n" > ../swift/Tests/UkrainianLatinTests/UKLatnTests.swift - @$(PYTHON) gentests.py swift >> ../swift/Tests/UkrainianLatinTests/UKLatnTests.swift - diff --git a/tools/gen/gen_swift.py b/tools/gen/gen_swift.py deleted file mode 100755 index 0dce8e0..0000000 --- a/tools/gen/gen_swift.py +++ /dev/null @@ -1,252 +0,0 @@ -import io -import json -import logging -import re -from pathlib import Path - - -logger = logging.getLogger(Path(__file__).stem) - - -def gen_tests(fns): - def _parse_tests(fn): - def parse_kind(s): - match s.lower().split(): - case ['cyr', '<>', 'lat']: return 'c2lr' - case ['lat', '<>', 'cyr']: return 'l2cr' - case ['cyr', '>', 'lat']: return 'c2l' - case ['lat', '>', 'cyr']: return 'l2c' - case _: - raise Exception(f'unknown test kind: {s!r}') - with fn.open() as fp: - data = json.load(fp) - return [[parse_kind(obj['test']), obj['cyr'], obj['lat']] for obj in data] - - def table_name(s): - return re.sub(r'test_', '', s, flags=re.I).replace('-', '_') - def class_name(s): - return re.sub(r'test|_', '', s.title(), flags=re.I) - def _j(s): - return json.dumps(s, ensure_ascii=False) - def all_c2l(data): - return all(k == 'c2l' for k,_,_ in data) - def _emit_tests(kind, data, table, file): - data = [(cyr,lat) for k,cyr,lat in data if k == kind] - if not data: return - print('', file=file) - print(f' @Test func {kind}_{table}() throws {{', file=file) - vs, ws = ' ' * 8, ' ' * 12 - dump = '[\n' + ',\n'.join(f'{vs}(\n{ws}{_j(cyr)},\n{ws}{_j(lat)}\n{vs})' for cyr,lat in data) + ' ]\n' - print(f' let data: [(String, String)] = {dump}', file=file) - print(' for (cyr,lat) in data {', file=file) - if kind[0] == 'c': - print(f' let enc = try encode(cyr, table: UKLatnTable.{table})', file=file) - print(f' #expect(lat == enc)', file=file) - else: - print(f' let dec = try decode(lat, table: UKLatnTable.{table})', file=file) - print(f' #expect(cyr == dec)', file=file) - if kind[-1] == 'r': - if kind[0] == 'c': - print(f' let dec = try decode(lat, table: UKLatnTable.{table})', file=file) - print(f' #expect(cyr == dec)', file=file) - else: - print(f' let enc = try encode(cyr, table: UKLatnTable.{table})', file=file) - print(f' #expect(lat == enc)', file=file) - print(' }', file=file) - print(' }', file=file) - def _emit_decode_throws(table, file): - print(f''' - @Test func decode_{table}_throws() throws {{ - #expect(throws: UKLatnError.self) {{ - try decode("lat", table: UKLatnTable.{table}) - }} - }}''', file=file) - - with io.StringIO() as so: - print('import Testing', file=so) - print('@testable import UkrainianLatin', file=so) - for fn in fns: - logger.info(f'processing {fn!s}') - name = fn.stem - table = table_name(name) - cname = class_name(name) - data = _parse_tests(fn) - print(f'\n\n@Suite({_j(table)}) struct {cname}Tests {{', file=so) - _emit_tests('c2lr', data, table, file=so) - _emit_tests('l2cr', data, table, file=so) - _emit_tests('c2l', data, table, file=so) - _emit_tests('l2c', data, table, file=so) - if all_c2l(data): - _emit_decode_throws(table, file=so) - print('}', file=so) - - return so.getvalue() - - -def gen_transforms(fns, default_table=None): - def table_name(s): - s, = re.findall(r'uk_Latn_(.*?)(?:-uk)?\s*$', s, flags=re.I) - return s.replace('-', '_') - def class_name(s): - return '_Uklatn_' + s.replace('-', '_') - def _j(s): - return json.dumps(s, ensure_ascii=False) - def _isdec(s): - return s.startswith('uk_Latn_') - def _load_rules(data): - return [s if isinstance(s, str) else [ - '|'.join(r['regex'] for r in s) + '|(.)', - [r['map'] for r in s] + [dict()] - ] for s in data] - - def _emit_tr(cname, rules, file): - rules = _load_rules(rules) - norms = dict(zip('NFC NFD NFKC NFKD'.split(), ''' - precomposedStringWithCanonicalMapping - decomposedStringWithCanonicalMapping - precomposedStringWithCompatibilityMapping - decomposedStringWithCompatibilityMapping - '''.split())) - print(f'private let {cname}: () -> _UKLatnCodec.Transform = {{', file=file) - for sid, section in enumerate(rules): - if not isinstance(section, str): - rx, maps = section - gn = len(maps) - print(f' let _rx{sid} = try! NSRegularExpression(pattern: #"{rx}"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries])', file=file) - print(f' let _maps{sid}:[[String:String]] = [[:], ', file=file) - for d in maps: - ds = '[' + ','.join(f'{_j(k)}:{_j(v)}' for k,v in d.items()) + ']' if d else '[:]' - print(f' {ds},', file=file) - print(f' ]', file=file) - print(' func transform(_ text: String) throws -> String {', file=file) - print(' var text = text', file=file) - for sid, section in enumerate(rules): - if isinstance(section, str): - norm = norms[section] - print(f' text = text.{norm} // {section}', file=file) - else: - rx, maps = section - print(f' text = text.replacing(_rx{sid}) {{ (i, match) in', file=file) - print(f' _maps{sid}[i][match] ?? match', file=file) - print(f' }}', file=file) - print(''' return text - } - return transform -} -''', file=file) - - context = dict() - tables = dict() - with io.StringIO() as so: - for fn in fns: - logger.info(f'processing {fn!s}') - with fn.open() as fp: - rules = json.load(fp) - name = fn.stem - table = table_name(name) - cname = class_name(name) - if table not in tables: - tables[table] = [None, None] - tables[table][_isdec(name)] = cname - _emit_tr(cname, rules, so) - classdefs_tables = so.getvalue() - - with io.StringIO() as so: - print('private let _UklatnTables: [UKLatnTable:_UKLatnCodec] = [', file=so) - for tid, (table, (enc, dec)) in enumerate(tables.items(), 1): - enc = f'{enc}()' if enc else 'nil' - dec = f'{dec}()' if dec else 'nil' - print(f' .{table}: _UKLatnCodec(encode: {enc}, decode: {dec}),', file=so) - print(']', end='', file=so) - tabledef = so.getvalue() - - context['global_tables'] = classdefs_tables + tabledef - context['default_table'] = default_table - context['tables_enum'] = '\n'.join(f' case {t} = {i}' for i,t in enumerate(tables, 1)) - - context['string_replacing'] = '''private extension String { - - func replacing(_ rx: NSRegularExpression, with replacement: (Int,String) -> String) -> String { - var so = "" - rx.enumerateMatches(in: self, range: NSRange(startIndex ..< endIndex, in: self)) { (result: NSTextCheckingResult?, flags: NSRegularExpression.MatchingFlags, stop: UnsafeMutablePointer) in - if let result { - for i in 1.. String {{ - guard let transform = _UklatnTables[table]?.encode - else {{ - throw UKLatnError.invalidTable(table.rawValue) - }} - return try transform(text) -}} - - -/// Re-transliterates a string of Ukrainian Latin to Cyrillic script. -/// -/// - Parameters: -/// - text: the text to transliterate -/// - table: transliteration system, one of: -/// - `DSTU_9112_A`: DSTU 9112:2021 System A -/// - `DSTU_9112_B`: DSTU 9112:2021 System B -/// - Returns: The transliterated string. -public func decode(_ text: String, table: UKLatnTable = .{default_table}) throws -> String {{ - guard let transform = _UklatnTables[table]?.decode - else {{ - throw UKLatnError.invalidTable(table.rawValue) - }} - return try transform(text) -}} - - -{string_replacing} - - -public enum UKLatnTable : Int {{ -{tables_enum} -}} - - -private struct _UKLatnCodec {{ - typealias Transform = ((String) throws -> String) - let encode: Transform? - let decode: Transform? -}} - - -{global_tables} -''' - text = template.format(**context) - return text - diff --git a/tools/gentables.py b/tools/gentables.py index 0eee18c..8823b6c 100755 --- a/tools/gentables.py +++ b/tools/gentables.py @@ -62,17 +62,6 @@ def gen_py(src): logger.info('PY generator end') -def gen_swift(src): - logger.info('Swift generator start') - from gen import gen_swift - - source = _basegen(args, 'src/regex', 'uk*.json', gen_swift.gen_transforms) - for text in source: - print(text, end='') - - logger.info('Swift generator end') - - if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Generate source code for the transform tables.') @@ -91,10 +80,6 @@ def gen_swift(src): parse_py.add_argument('source', nargs='*', help='source directory') parse_py.set_defaults(func=gen_py) - parse_swift = subpar.add_parser('swift', help='Swift code generator') - parse_swift.add_argument('source', nargs='*', help='source directory') - parse_swift.set_defaults(func=gen_swift) - args = parser.parse_args() level = logging.DEBUG if args.verbose else logging.INFO diff --git a/tools/gentests.py b/tools/gentests.py index 4963d7f..d153fed 100755 --- a/tools/gentests.py +++ b/tools/gentests.py @@ -59,17 +59,6 @@ def gen_py(args): logger.info('PY generator end') -def gen_swift(args): - logger.info('Swift generator start') - from gen import gen_swift - - source = _basegen(args, 'src/tests', 'test*.json', gen_swift.gen_tests) - for text in source: - print(text, end='') - - logger.info('Swift generator end') - - if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Generate test code for the transform tables.') @@ -88,10 +77,6 @@ def gen_swift(args): parse_py.add_argument('source', nargs='*', help='source directory') parse_py.set_defaults(func=gen_py) - parse_swift = subpar.add_parser('swift', help='Swfit code generator') - parse_swift.add_argument('source', nargs='*', help='source directory') - parse_swift.set_defaults(func=gen_swift) - args = parser.parse_args() level = logging.DEBUG if args.verbose else logging.INFO