From b7858d8fe2950c09316a5bf4aef0e4033d1adb47 Mon Sep 17 00:00:00 2001 From: Pavlo Ivashkov Date: Tue, 12 Nov 2024 21:37:53 +0200 Subject: [PATCH 1/6] add swift native package --- .github/workflows/test-builds.yml | 14 +- .gitignore | 1 + Package.swift | 33 +- readme.md | 13 +- swift/Sources/UKLatn/UKLatn.swift | 43 --- swift/Sources/UkrainianLatin/UKLatn.swift | 215 ++++++++++++ swift/Sources/_uklatn/_uklatn.c | 2 - .../Sources/_uklatn/include/_uklatn/_uklatn.h | 3 - swift/Sources/cli/uklatn.swift | 211 +++++++++++ swift/Tests/UKLatnTests/UKLatnTests.swift | 31 -- .../UkrainianLatinTests/UKLatnTests.swift | 329 ++++++++++++++++++ swift/readme.md | 42 ++- tools/Makefile | 9 +- tools/gen/gen_swift.py | 252 ++++++++++++++ tools/gentables.py | 15 + tools/gentests.py | 15 + 16 files changed, 1108 insertions(+), 120 deletions(-) delete mode 100644 swift/Sources/UKLatn/UKLatn.swift create mode 100644 swift/Sources/UkrainianLatin/UKLatn.swift delete mode 100644 swift/Sources/_uklatn/_uklatn.c delete mode 100644 swift/Sources/_uklatn/include/_uklatn/_uklatn.h create mode 100644 swift/Sources/cli/uklatn.swift delete mode 100644 swift/Tests/UKLatnTests/UKLatnTests.swift create mode 100644 swift/Tests/UkrainianLatinTests/UKLatnTests.swift create mode 100755 tools/gen/gen_swift.py diff --git a/.github/workflows/test-builds.yml b/.github/workflows/test-builds.yml index ea14b3b..e3cb8a7 100644 --- a/.github/workflows/test-builds.yml +++ b/.github/workflows/test-builds.yml @@ -81,22 +81,20 @@ jobs: build-swift: + name: Swift ${{matrix.platform}} ${{matrix.build_type}} strategy: matrix: - BUILD_TYPE: [debug, release] + build_type: [debug, release] + platform: [macos-latest, ubuntu-latest] - runs-on: ubuntu-latest + runs-on: ${{matrix.platform}} steps: - uses: actions/checkout@v4 - - name: Install ICU - run: | - sudo apt-get install -y libicu-dev ${{env.LIBICU}} - - name: Build - run: swift build -c ${{matrix.BUILD_TYPE}} + run: swift build -c ${{matrix.build_type}} - name: Test - run: swift test -c ${{matrix.BUILD_TYPE}} + run: swift test -c ${{matrix.build_type}} diff --git a/.gitignore b/.gitignore index fd883bf..dd92a00 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ __pycache__/ *.py[cdo] *.egg-info/ *.so +.swiftpm/ diff --git a/Package.swift b/Package.swift index fcc0f5f..47fee47 100644 --- a/Package.swift +++ b/Package.swift @@ -1,4 +1,4 @@ -// swift-tools-version:5.1 +// swift-tools-version: 5.8 import PackageDescription @@ -6,29 +6,24 @@ let package = Package( name: "uklatn", products: [ .library( - name: "UKLatn", - targets: ["UKLatn"]), + name: "UkrainianLatin", + targets: ["UkrainianLatin"]), + .executable( + name: "uklatn", + targets: ["cli"]), ], targets: [ .target( - name: "UKLatn", - dependencies: ["_uklatn"], - path: "swift/Sources/UKLatn"), - .target( - name: "_uklatn", - path: "swift/Sources/_uklatn", - cSettings: [ - .headerSearchPath("../../../c/include"), - ], - linkerSettings: [ - .linkedLibrary("icuuc"), - .linkedLibrary("icui18n"), - ]), + name: "UkrainianLatin", + path: "swift/Sources/UkrainianLatin"), .testTarget( name: "UKLatnTests", - dependencies: ["UKLatn"], - path: "swift/Tests/UKLatnTests"), + dependencies: ["UkrainianLatin"], + path: "swift/Tests/UkrainianLatinTests"), + .executableTarget( + name: "cli", + dependencies: ["UkrainianLatin"], + path: "swift/Sources/cli"), ], swiftLanguageVersions: [.v5] ) - diff --git a/readme.md b/readme.md index c2dc688..04bd55d 100644 --- a/readme.md +++ b/readme.md @@ -5,7 +5,7 @@ Ukrainian Cyrillic transliteration to Latin script. [![standwithukraine](docs/StandWithUkraine.svg)](https://ukrainewar.carrd.co/) [![](https://github.com/paiv/uklatn/actions/workflows/test-builds.yml/badge.svg)](https://github.com/paiv/uklatn/actions) -[JavaScript](#javascript-package) | [Python](#python-module) | [C](#c-library) | [Swift](#swift-package) +[JavaScript](#javascript-package) | [Python](#python-module) | [C](c/) | [Swift](#swift-package) Supported transliteration schemes: - [DSTU 9112:2021](https://uk.wikipedia.org/wiki/ДСТУ_9112:2021) @@ -46,11 +46,6 @@ uklatn.decode("Paljanycja") ``` -C library --- -- [uklatn C library](c/) - - Swift package -- - [uklatn Swift package](swift/) @@ -58,11 +53,7 @@ Swift package Add package dependency: ```sh swift package add-dependency 'https://github.com/paiv/uklatn.git' --from '1.0.0' -``` - -Use in target dependencies in `Package.swift`: -```swift -.product(name: "UKLatn", package: "uklatn") +swift package add-target-dependency --package uklatn UkrainianLatin ``` diff --git a/swift/Sources/UKLatn/UKLatn.swift b/swift/Sources/UKLatn/UKLatn.swift deleted file mode 100644 index a7a66c6..0000000 --- a/swift/Sources/UKLatn/UKLatn.swift +++ /dev/null @@ -1,43 +0,0 @@ -import _uklatn - - -public struct UKLatnTable { - public static let DSTU_9112_A = Int(_uklatn.UklatnTable_DSTU_9112_A.rawValue) - public static let DSTU_9112_B = Int(_uklatn.UklatnTable_DSTU_9112_B.rawValue) - public static let KMU_55 = Int(_uklatn.UklatnTable_KMU_55.rawValue) -} - - -public enum UKLatnError: Error { - case failed(code: Int) -} - - -public func encode(_ text: String, table: Int = 0) throws -> String { - let n = text.utf8.count - let dst = UnsafeMutableBufferPointer.allocate(capacity: n * 3) - defer { - dst.deallocate() - } - let err = _uklatn.uklatn_encode(text, Int32(table), dst.baseAddress, Int32(dst.count)) - if err != 0 { - throw UKLatnError.failed(code: Int(err)) - } - let res = String(cString: dst.baseAddress!) - return res -} - - -public func decode(_ text: String, table: Int = 0) throws -> String { - let n = text.utf8.count - let dst = UnsafeMutableBufferPointer.allocate(capacity: n * 3) - defer { - dst.deallocate() - } - let err = _uklatn.uklatn_decode(text, Int32(table), dst.baseAddress, Int32(dst.count)) - if err != 0 { - throw UKLatnError.failed(code: Int(err)) - } - let res = String(cString: dst.baseAddress!) - return res -} diff --git a/swift/Sources/UkrainianLatin/UKLatn.swift b/swift/Sources/UkrainianLatin/UKLatn.swift new file mode 100644 index 0000000..4d8bf7f --- /dev/null +++ b/swift/Sources/UkrainianLatin/UKLatn.swift @@ -0,0 +1,215 @@ +/* Generated by gentables.py, do not edit. */ + +/* uklatn - https://github.com/paiv/uklatn */ +import Foundation + + +public enum UKLatnError: Error { + case invalidTable(Int) +} + + +/// Transliterates a string of Ukrainian Cyrillic to Latin script. +/// +/// - Parameters: +/// - text: the text to transliterate +/// - table: transliteration system, one of: +/// - `DSTU_9112_A`: DSTU 9112:2021 System A +/// - `DSTU_9112_B`: DSTU 9112:2021 System B +/// - `KMU_55`: KMU 55:2010 +/// - Returns: The transliterated string. +public func encode(_ text: String, table: UKLatnTable = .DSTU_9112_A) throws -> String { + guard let transform = _UklatnTables[table]?.encode + else { + throw UKLatnError.invalidTable(table.rawValue) + } + return try transform(text) +} + + +/// Re-transliterates a string of Ukrainian Latin to Cyrillic script. +/// +/// - Parameters: +/// - text: the text to transliterate +/// - table: transliteration system, one of: +/// - `DSTU_9112_A`: DSTU 9112:2021 System A +/// - `DSTU_9112_B`: DSTU 9112:2021 System B +/// - Returns: The transliterated string. +public func decode(_ text: String, table: UKLatnTable = .DSTU_9112_A) throws -> String { + guard let transform = _UklatnTables[table]?.decode + else { + throw UKLatnError.invalidTable(table.rawValue) + } + return try transform(text) +} + + +private extension String { + + func replacing(_ rx: NSRegularExpression, with replacement: @escaping (Int,String) -> String) -> String { + var so = "" + rx.enumerateMatches(in: self, range: NSRange(startIndex ..< endIndex, in: self)) { (result: NSTextCheckingResult?, flags: NSRegularExpression.MatchingFlags, stop: UnsafeMutablePointer) in + if let result { + for i in 1.. String) + let encode: Transform? + let decode: Transform? +} + + +private let _Uklatn_uk_uk_Latn_DSTU_9112_A: () -> _UKLatnCodec.Transform = { + let _rx1 = try! NSRegularExpression(pattern: #"\b([Ьь])|([Ьь](?=[АаЕеУу])|[ЄЮЯ](?=\u0301?[а-щьюяєіїґ’])|(?<=[Б-ДЖЗК-НП-ТФ-Щб-джзк-нп-тф-щҐґ])[Йй])|([ЁЄІЇЎА-яёєіїўҐґ’])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) + let _maps1:[[String:String]] = [[:], + ["Ь":"Ĵ","ь":"ĵ"], + ["Ь":"J'","ь":"j'","Є":"Je","Ю":"Ju","Я":"Ja","Й":"'J","й":"'j"], + ["А":"A","а":"a","Б":"B","б":"b","В":"V","в":"v","Г":"Ğ","г":"ğ","Ґ":"G","ґ":"g","Д":"D","д":"d","Е":"E","е":"e","Є":"JE","є":"je","Ж":"Ž","ж":"ž","З":"Z","з":"z","И":"Y","и":"y","І":"I","і":"i","Ї":"Ï","ї":"ï","К":"K","к":"k","Л":"L","л":"l","М":"M","м":"m","Н":"N","н":"n","О":"O","о":"o","П":"P","п":"p","Р":"R","р":"r","С":"S","с":"s","Т":"T","т":"t","У":"U","у":"u","Ф":"F","ф":"f","Х":"X","х":"x","Ц":"C","ц":"c","Ч":"Č","ч":"č","Ш":"Š","ш":"š","Щ":"Ŝ","щ":"ŝ","Ю":"JU","ю":"ju","Я":"JA","я":"ja","Ь":"J","ь":"j","Й":"J","й":"j","’":"'","Ё":"Ö","ё":"ö","Ў":"Ŭ","ў":"ŭ","Ъ":"Ǒ","ъ":"ǒ","Ы":"Ȳ","ы":"ȳ","Э":"Ē","э":"ē"], + [:], + ] + func transform(_ text: String) throws -> String { + var text = text + text = text.precomposedStringWithCanonicalMapping // NFC + text = text.replacing(_rx1) { (i, match) in + _maps1[i][match] ?? match + } + text = text.precomposedStringWithCanonicalMapping // NFC + return text + } + return transform +} + +private let _Uklatn_uk_uk_Latn_DSTU_9112_B: () -> _UKLatnCodec.Transform = { + let _rx1 = try! NSRegularExpression(pattern: #"([Ьь](?=[АаЕеІіУу])|(?<=[Б-ДЖЗК-НП-ТФ-Щб-джзк-нп-тф-щҐґ])[Йй])|([ГЄЖЇХЩШЧЮЯЁЎЪЫЭ](?=\u0301?[а-яёєіїўґ’])|\b[Ьь])|([ЁЄІЇЎА-яёєіїўҐґ’])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) + let _maps1:[[String:String]] = [[:], + ["Ь":"J'","ь":"j'","Й":"'J","й":"'j"], + ["Г":"Gh","Є":"Je","Ж":"Zh","Ї":"Ji","Х":"Kh","Щ":"Shch","Ш":"Sh","Ч":"Ch","Ю":"Ju","Я":"Ja","Ё":"Jow","Ў":"Uh","Ъ":"Oh","Ы":"Yw","Э":"Ehw","Ь":"Hj","ь":"hj"], + ["А":"A","а":"a","Б":"B","б":"b","В":"V","в":"v","Г":"GH","г":"gh","Ґ":"G","ґ":"g","Д":"D","д":"d","Е":"E","е":"e","Є":"JE","є":"je","Ж":"ZH","ж":"zh","З":"Z","з":"z","И":"Y","и":"y","І":"I","і":"i","Ї":"JI","ї":"ji","Х":"KH","х":"kh","К":"K","к":"k","Л":"L","л":"l","М":"M","м":"m","Н":"N","н":"n","О":"O","о":"o","П":"P","п":"p","Р":"R","р":"r","Щ":"SHCH","щ":"shch","Ш":"SH","ш":"sh","С":"S","с":"s","Т":"T","т":"t","У":"U","у":"u","Ф":"F","ф":"f","Ч":"CH","ч":"ch","Ц":"C","ц":"c","Ю":"JU","ю":"ju","Я":"JA","я":"ja","Й":"J","й":"j","Ь":"J","ь":"j","’":"'","Ё":"JOW","ё":"jow","Ў":"UH","ў":"uh","Ъ":"OH","ъ":"oh","Ы":"YW","ы":"yw","Э":"EHW","э":"ehw"], + [:], + ] + func transform(_ text: String) throws -> String { + var text = text + text = text.precomposedStringWithCanonicalMapping // NFC + text = text.replacing(_rx1) { (i, match) in + _maps1[i][match] ?? match + } + text = text.precomposedStringWithCanonicalMapping // NFC + return text + } + return transform +} + +private let _Uklatn_uk_uk_Latn_KMU_55: () -> _UKLatnCodec.Transform = { + let _rx1 = try! NSRegularExpression(pattern: #"(?<=[ЁЄІЇЎА-яёєіїўҐґ])([’\u0027])(?=[ЁЄІЇЎА-яёєіїўҐґ])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) + let _maps1:[[String:String]] = [[:], + ["’":"","'":""], + [:], + ] + let _rx2 = try! NSRegularExpression(pattern: #"\b([ЄЇЮЯ])(?=\u0301?[а-яёєіїўґ’])|\b([ЙйЄЇЮЯєїюя])|([Зз]Г|[ЖХЦЩШЧЄЇЮЯ])(?=\u0301?[а-яёєіїўґ’])|([Зз][Гг]|[ЄІЇА-ЩЬЮ-щьюяєіїҐґ’])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) + let _maps2:[[String:String]] = [[:], + ["Є":"Ye","Ї":"Yi","Ю":"Yu","Я":"Ya"], + ["Й":"Y","й":"y","Є":"YE","є":"ye","Ї":"YI","ї":"yi","Ю":"YU","ю":"yu","Я":"YA","я":"ya"], + ["ЗГ":"ZGh","зГ":"zGh","Ж":"Zh","Х":"Kh","Ц":"Ts","Щ":"Shch","Ш":"Sh","Ч":"Ch","Є":"Ie","Ї":"I","Ю":"Iu","Я":"Ia"], + ["ЗГ":"ZGH","Зг":"Zgh","зГ":"zGH","зг":"zgh","А":"A","а":"a","Б":"B","б":"b","В":"V","в":"v","Г":"H","г":"h","Ґ":"G","ґ":"g","Д":"D","д":"d","Е":"E","е":"e","Є":"IE","є":"ie","Ж":"ZH","ж":"zh","З":"Z","з":"z","И":"Y","и":"y","І":"I","і":"i","Ї":"I","ї":"i","Х":"KH","х":"kh","К":"K","к":"k","Л":"L","л":"l","М":"M","м":"m","Н":"N","н":"n","О":"O","о":"o","П":"P","п":"p","Р":"R","р":"r","Щ":"SHCH","щ":"shch","Ш":"SH","ш":"sh","С":"S","с":"s","Т":"T","т":"t","У":"U","у":"u","Ф":"F","ф":"f","Ч":"CH","ч":"ch","Ц":"TS","ц":"ts","Ю":"IU","ю":"iu","Я":"IA","я":"ia","Й":"I","й":"i","Ь":"","ь":"","’":""], + [:], + ] + func transform(_ text: String) throws -> String { + var text = text + text = text.precomposedStringWithCanonicalMapping // NFC + text = text.replacing(_rx1) { (i, match) in + _maps1[i][match] ?? match + } + text = text.replacing(_rx2) { (i, match) in + _maps2[i][match] ?? match + } + text = text.precomposedStringWithCanonicalMapping // NFC + return text + } + return transform +} + +private let _Uklatn_uk_Latn_DSTU_9112_A_uk: () -> _UKLatnCodec.Transform = { + let _rx1 = try! NSRegularExpression(pattern: #"([ÁáÉéÍíÓóÚúÝýḮḯ])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) + let _maps1:[[String:String]] = [[:], + ["Á":"Á","á":"á","É":"É","é":"é","Í":"Í","í":"í","Ó":"Ó","ó":"ó","Ú":"Ú","ú":"ú","Ý":"Ý","ý":"ý","Ḯ":"Ḯ","ḯ":"ḯ"], + [:], + ] + let _rx2 = try! NSRegularExpression(pattern: #"(J[Ee]|j[Ee]|J[Uu]|j[Uu]|J[Aa]|j[Aa]|[A-GIK-PR-VXYZa-gik-pr-vxyzÏÖïöČčĒēĞğĴĵŜŝŠšŬŭŽžǑǒȲȳ])|(?<=[BbCcDdFfGgKkLlMmNnPpRrSsTtVvXxZzČčĞğŜŝŠšŽž])([Jj]\u0027(?=[AaEeUu])|[Jj])|(\u0027[Jj](?![AaEeIiUu])|\u0027(?=[Jj])|[Jj])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) + let _maps2:[[String:String]] = [[:], + ["A":"А","a":"а","B":"Б","b":"б","V":"В","v":"в","Ğ":"Г","ğ":"г","G":"Ґ","g":"ґ","D":"Д","d":"д","E":"Е","e":"е","JE":"Є","Je":"Є","jE":"є","je":"є","Ž":"Ж","ž":"ж","Z":"З","z":"з","Y":"И","y":"и","I":"І","i":"і","Ï":"Ї","ï":"ї","K":"К","k":"к","L":"Л","l":"л","M":"М","m":"м","N":"Н","n":"н","O":"О","o":"о","P":"П","p":"п","R":"Р","r":"р","S":"С","s":"с","T":"Т","t":"т","U":"У","u":"у","F":"Ф","f":"ф","X":"Х","x":"х","C":"Ц","c":"ц","Č":"Ч","č":"ч","Š":"Ш","š":"ш","Ŝ":"Щ","ŝ":"щ","JU":"Ю","Ju":"Ю","jU":"ю","ju":"ю","JA":"Я","Ja":"Я","jA":"я","ja":"я","Ĵ":"Ь","ĵ":"ь","Ö":"Ё","ö":"ё","Ŭ":"Ў","ŭ":"ў","Ǒ":"Ъ","ǒ":"ъ","Ȳ":"Ы","ȳ":"ы","Ē":"Э","ē":"э"], + ["J":"Ь","j":"ь","J'":"Ь","j'":"ь"], + ["'J":"Й","'j":"й","'":"’","J":"Й","j":"й"], + [:], + ] + func transform(_ text: String) throws -> String { + var text = text + text = text.precomposedStringWithCanonicalMapping // NFC + text = text.replacing(_rx1) { (i, match) in + _maps1[i][match] ?? match + } + text = text.replacing(_rx2) { (i, match) in + _maps2[i][match] ?? match + } + text = text.precomposedStringWithCanonicalMapping // NFC + return text + } + return transform +} + +private let _Uklatn_uk_Latn_DSTU_9112_B_uk: () -> _UKLatnCodec.Transform = { + let _rx1 = try! NSRegularExpression(pattern: #"([ÁáÉéÍíÓóÚúÝý])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) + let _maps1:[[String:String]] = [[:], + ["Á":"Á","á":"á","É":"É","é":"é","Í":"Í","í":"í","Ó":"Ó","ó":"ó","Ú":"Ú","ú":"ú","Ý":"Ý","ý":"ý"], + [:], + ] + let _rx2 = try! NSRegularExpression(pattern: #"([Jj][Oo][Ww]|[Ss][Hh][Cc][Hh]|[CcGgKkSsZzUuOo][Hh]|[Yy][Ww]|[Ee][Hh][Ww]|[Jj][EeIiUuAa]|[Hh][Jj]|[A-GIK-PR-VYZa-gik-pr-vyz])|(?<=[Ss][Hh][Cc][Hh])([Jj]\u0027(?=[AaEeIiUu])|[Jj])|(?<=[CcGgKkSsZz][Hh])([Jj]\u0027(?=[AaEeIiUu])|[Jj])|(?<=[BCDFGKLMNPRSTVZbcdfgklmnprstvzv])([Jj]\u0027(?=[AaEeIiUu])|[Jj])|(\u0027[Jj](?![AaEeIiUu])|\u0027(?=[Jj])|[Jj])|(.)"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries]) + let _maps2:[[String:String]] = [[:], + ["A":"А","a":"а","B":"Б","b":"б","V":"В","v":"в","GH":"Г","Gh":"Г","gH":"г","gh":"г","G":"Ґ","g":"ґ","D":"Д","d":"д","E":"Е","e":"е","JE":"Є","Je":"Є","jE":"є","je":"є","ZH":"Ж","Zh":"Ж","zH":"ж","zh":"ж","Z":"З","z":"з","Y":"И","y":"и","I":"І","i":"і","JI":"Ї","Ji":"Ї","jI":"ї","ji":"ї","KH":"Х","Kh":"Х","kH":"х","kh":"х","K":"К","k":"к","L":"Л","l":"л","M":"М","m":"м","N":"Н","n":"н","O":"О","o":"о","P":"П","p":"п","R":"Р","r":"р","SHCH":"Щ","SHCh":"Щ","SHcH":"Щ","SHch":"Щ","ShCH":"Щ","ShCh":"Щ","ShcH":"Щ","Shch":"Щ","sHCH":"щ","sHCh":"щ","sHcH":"щ","sHch":"щ","shCH":"щ","shCh":"щ","shcH":"щ","shch":"щ","SH":"Ш","Sh":"Ш","sH":"ш","sh":"ш","S":"С","s":"с","T":"Т","t":"т","U":"У","u":"у","F":"Ф","f":"ф","CH":"Ч","Ch":"Ч","cH":"ч","ch":"ч","C":"Ц","c":"ц","JU":"Ю","Ju":"Ю","jU":"ю","ju":"ю","JA":"Я","Ja":"Я","jA":"я","ja":"я","HJ":"Ь","Hj":"Ь","hJ":"ь","hj":"ь","JOW":"Ё","JOw":"Ё","JoW":"Ё","Jow":"Ё","jOW":"ё","jOw":"ё","joW":"ё","jow":"ё","UH":"Ў","Uh":"Ў","uH":"ў","uh":"ў","OH":"Ъ","Oh":"Ъ","oH":"ъ","oh":"ъ","YW":"Ы","Yw":"Ы","yW":"ы","yw":"ы","EHW":"Э","EHw":"Э","EhW":"Э","Ehw":"Э","eHW":"э","eHw":"э","ehW":"э","ehw":"э"], + ["J":"Ь","j":"ь","J'":"Ь","j'":"ь"], + ["J":"Ь","j":"ь","J'":"Ь","j'":"ь"], + ["J":"Ь","j":"ь","J'":"Ь","j'":"ь"], + ["'J":"Й","'j":"й","'":"’","J":"Й","j":"й"], + [:], + ] + func transform(_ text: String) throws -> String { + var text = text + text = text.precomposedStringWithCanonicalMapping // NFC + text = text.replacing(_rx1) { (i, match) in + _maps1[i][match] ?? match + } + text = text.replacing(_rx2) { (i, match) in + _maps2[i][match] ?? match + } + text = text.precomposedStringWithCanonicalMapping // NFC + return text + } + return transform +} + +private let _UklatnTables: [UKLatnTable:_UKLatnCodec] = [ + .DSTU_9112_A: _UKLatnCodec(encode: _Uklatn_uk_uk_Latn_DSTU_9112_A(), decode: _Uklatn_uk_Latn_DSTU_9112_A_uk()), + .DSTU_9112_B: _UKLatnCodec(encode: _Uklatn_uk_uk_Latn_DSTU_9112_B(), decode: _Uklatn_uk_Latn_DSTU_9112_B_uk()), + .KMU_55: _UKLatnCodec(encode: _Uklatn_uk_uk_Latn_KMU_55(), decode: nil), +] diff --git a/swift/Sources/_uklatn/_uklatn.c b/swift/Sources/_uklatn/_uklatn.c deleted file mode 100644 index 3ba3461..0000000 --- a/swift/Sources/_uklatn/_uklatn.c +++ /dev/null @@ -1,2 +0,0 @@ -#include "include/_uklatn/_uklatn.h" -#include "../../../c/uklatn.c" diff --git a/swift/Sources/_uklatn/include/_uklatn/_uklatn.h b/swift/Sources/_uklatn/include/_uklatn/_uklatn.h deleted file mode 100644 index 9290d6a..0000000 --- a/swift/Sources/_uklatn/include/_uklatn/_uklatn.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once -#include "../../../../../c/include/uklatn.h" - diff --git a/swift/Sources/cli/uklatn.swift b/swift/Sources/cli/uklatn.swift new file mode 100644 index 0000000..c912ade --- /dev/null +++ b/swift/Sources/cli/uklatn.swift @@ -0,0 +1,211 @@ +import Foundation +import UkrainianLatin + + +@main +struct MyApp { + + static func main() { + var stderr = ErrorStream() + + do { + let args = try AppArgs.parse(CommandLine.arguments) + if args.printHelp { + AppArgs.printHelp() + } + else if let file = args.file { + try transformFile(file, direction: args.direction, table: args.table) + } + else if let text = args.text { + try transformText(text, direction: args.direction, table: args.table) + } + } + catch let error as AppArgs.ParseError { + AppArgs.printError(error, to: &stderr) + } + catch UKLatnError.invalidTable(let table) { + AppArgs.printError(.invalidTable("\(table)"), to: &stderr) + } + catch { + print(error, to: &stderr) + } + } + + private static func transformText(_ text: String, direction: AppArgs.TransformDirection, table: UKLatnTable) throws { + let value: String + switch direction { + case .cyr2lat: + value = try encode(text, table: table) + case .lat2cyr: + value = try decode(text, table: table) + } + print("\(value)") + } + + private static func transformFile(_ file: String, direction: AppArgs.TransformDirection, table: UKLatnTable) throws { + if file == "-" { + while let text = readLine() { + try transformText(text, direction: direction, table: table) + } + } + else { + var encoding: String.Encoding = .utf8 + let text = try String(contentsOfFile: file, usedEncoding: &encoding) + try transformText(text, direction: direction, table: table) + } + } +} + + +private struct AppArgs { + var executable: String = "" + var printHelp: Bool = false + var text: String? + var file: String? + var table: UKLatnTable = .DSTU_9112_A + var direction: TransformDirection = .cyr2lat + + static let _usage = "usage: uklatn [-h] [-t TABLE] [-c] [-l] [-f FILE] [text ...]" + + static let _help = _usage + + """ + + + arguments: + text text to transliterate + + options: + -h, --help show this help message and exit + -t, --table {DSTU_9112_A,DSTU_9112_B,KMU_55} + transliteration system (default: DSTU_9112_A) + -l, --lat, --latin convert to Latin script (default) + -c, --cyr, --cyrillic convert to Cyrillic script + -f, --file FILE read text from file + """ + + enum TransformDirection { + case cyr2lat + case lat2cyr + } + + enum ParseError: Error { + case unknownArgument(String) + case invalidTable(String) + case missingTableValue + case missingFileValue + case missingRequiredTextOrFile + } + + static func printUsage(to output: inout S) where S:TextOutputStream { + print("\(_usage)", to: &output) + } + + static func printHelp() { + print("\(_help)") + } + + static func printHelp(to output: inout S) where S:TextOutputStream { + print("\(_help)", to: &output) + } + + static func printError(_ error: ParseError, to output: inout S) where S:TextOutputStream { + let message: String + switch error { + case .unknownArgument(let arg): + message = "unrecognized arguments: \(arg)" + case .invalidTable(let table): + message = "invalid table: \(table)" + case .missingTableValue: + message = "argument -t/--table expected table name" + case .missingFileValue: + message = "argument -f/--file expected file name" + case .missingRequiredTextOrFile: + message = "missing required arguments: text or file" + } + printUsage(to: &output) + print("error: \(message)", to: &output) + } + + static func parse(_ argv: [String]) throws -> AppArgs { + var args = AppArgs() + args.executable = argv[0] + var state: Int = 0 + + for iarg in 1 ..< argv.count { + let arg = argv[iarg] + switch state { + + case 0: + if arg.first == "-" { + switch arg { + case "-h", "-help", "--help": + args.printHelp = true + return args + case "-c", "--cyr", "--cyrillic": + args.direction = .lat2cyr + case "-l", "--lat", "--latin": + args.direction = .cyr2lat + case "-t", "--table": + state = 1 + case "-f", "--file": + state = 2 + default: + throw ParseError.unknownArgument(arg) + } + } + else if !arg.isEmpty { + args.text = args.text.map({ $0 + " " + arg }) ?? arg + } + + case 1: + if let table = parseTable(arg) { + args.table = table + state = 0 + } + else { + throw ParseError.invalidTable(arg) + } + + case 2: + args.file = arg + state = 0 + + default: + fatalError() + } + } + + switch state { + case 1: + throw ParseError.missingTableValue + case 2: + throw ParseError.missingFileValue + default: + break + } + + if args.text == nil && args.file == nil { + throw ParseError.missingRequiredTextOrFile + } + + return args + } + + private static func parseTable(_ value: String) -> UKLatnTable? { + switch value { + case "DSTU_9112_A": return .DSTU_9112_A + case "DSTU_9112_B": return .DSTU_9112_B + case "KMU_55": return .KMU_55 + default: return nil + } + } +} + + +private struct ErrorStream: TextOutputStream { + func write(_ text: String) { + if let data = text.data(using: .utf8) { + FileHandle.standardError.write(data) + } + } +} diff --git a/swift/Tests/UKLatnTests/UKLatnTests.swift b/swift/Tests/UKLatnTests/UKLatnTests.swift deleted file mode 100644 index 3488803..0000000 --- a/swift/Tests/UKLatnTests/UKLatnTests.swift +++ /dev/null @@ -1,31 +0,0 @@ -import Testing -@testable import UKLatn - - -@Test func encode_DSTU_A() async throws { - let cyr = "Доброго вечора, ми з України!" - let lat = try encode(cyr) - #expect(lat == "Dobroğo večora, my z Ukraïny!") - let t = try decode(lat) - #expect(t == cyr) -} - - -@Test func encode_DSTU_B() async throws { - let cyr = "Доброго вечора, ми з України!" - let lat = try encode(cyr, table: UKLatnTable.DSTU_9112_B) - #expect(lat == "Dobrogho vechora, my z Ukrajiny!") - let t = try decode(lat, table: UKLatnTable.DSTU_9112_B) - #expect(t == cyr) -} - - -@Test func encode_KMU() async throws { - let cyr = "Доброго вечора, ми з України!" - let lat = try encode(cyr, table: UKLatnTable.KMU_55) - #expect(lat == "Dobroho vechora, my z Ukrainy!") - #expect(throws: UKLatnError.self) { - try decode(lat, table: UKLatnTable.KMU_55) - } -} - diff --git a/swift/Tests/UkrainianLatinTests/UKLatnTests.swift b/swift/Tests/UkrainianLatinTests/UKLatnTests.swift new file mode 100644 index 0000000..01b3d55 --- /dev/null +++ b/swift/Tests/UkrainianLatinTests/UKLatnTests.swift @@ -0,0 +1,329 @@ +/* Generated by gentests.py, do not edit. */ + +import Testing +@testable import UkrainianLatin + + +@Suite("DSTU_9112_A") struct Dstu9112ATests { + + @Test func c2lr_DSTU_9112_A() throws { + let data: [(String, String)] = [ + ( + "Україна, Хмельницький", + "Ukraïna, Xmeljnycjkyj" + ), + ( + "Щастям б’єш жук їх глицю в фон й ґедзь пріч.", + "Ŝastjam b'ješ žuk ïx ğlycju v fon j gedzj prič." + ), + ( + "ь Ь ль льє льї лью лья лье льі льу льа льйо льо", + "ĵ Ĵ lj ljje ljï ljju ljja lj'e lji lj'u lj'a ljjo ljo" + ), + ( + "Єл Їл Юл Ял", + "Jel Ïl Jul Jal" + ), + ( + "бь вь гь ґь дь жь зь кь ль мь нь пь рь сь ть фь хь ць чь шь щь", + "bj vj ğj gj dj žj zj kj lj mj nj pj rj sj tj fj xj cj čj šj ŝj" + ), + ( + "бя вя гя ґя дя жя зя кя ля мя ня пя ря ся тя фя хя ця чя шя щя", + "bja vja ğja gja dja žja zja kja lja mja nja pja rja sja tja fja xja cja čja šja ŝja" + ), + ( + "б’я в’я г’я ґ’я д’я ж’я з’я к’я л’я м’я н’я п’я р’я с’я т’я ф’я х’я ц’я ч’я ш’я щ’я", + "b'ja v'ja ğ'ja g'ja d'ja ž'ja z'ja k'ja l'ja m'ja n'ja p'ja r'ja s'ja t'ja f'ja x'ja c'ja č'ja š'ja ŝ'ja" + ), + ( + "бй бйо вй гй ґй дй жй зй кй лй мй нй пй рй сй тй фй хй цй чй шй щй", + "b'j b'jo v'j ğ'j g'j d'j ž'j z'j k'j l'j m'j n'j p'j r'j s'j t'j f'j x'j c'j č'j š'j ŝ'j" + ), + ( + "ня ньа н’я нь'н ньн", + "nja nj'a n'ja nj'n njn" + ), + ( + "рос дыня эзёдынъ. бр кроў.", + "ros dȳnja ēzödȳnǒ. br kroŭ." + ), + ( + "А́ а́ Е́ е́ Є́ є́ И́ и́ І́ і́ Ї́ ї́ О́ о́ У́ у́ Ю́ ю́ Я́ я́", + "Á á É é JÉ jé Ý ý Í í Ḯ ḯ Ó ó Ú ú JÚ jú JÁ já" + ), + ( + "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", + "Jés sJés jés sjés Ḯs sḮs ḯs sḯs Jús sJús jús sjús Jás sJás jás sjás" + ), + ( + "' ім’я 'жук' \"жук\" ' '", + "' im'ja 'žuk' \"žuk\" ' '" + ), + ( + "Сонце світить майже білим світлом, однак через сильніше розсіювання і поглинання короткохвильової частини спектра атмосферою Землі пряме світло Сонця біля поверхні нашої планети набуває певного жовтого відтінку. Якщо небо ясне, то блакитний відтінок розсіяного світла складається з жовтуватим прямим сонячним світлом і загальне освітлення об’єктів на Землі стає білим.", + "Sonce svitytj majže bilym svitlom, odnak čerez syljniše rozsijuvannja i poğlynannja korotkoxvyljovoï častyny spektra atmosferoju Zemli prjame svitlo Soncja bilja poverxni našoï planety nabuvaje pevnoğo žovtoğo vidtinku. Jakŝo nebo jasne, to blakytnyj vidtinok rozsijanoğo svitla skladajetjsja z žovtuvatym prjamym sonjačnym svitlom i zağaljne osvitlennja ob'jektiv na Zemli staje bilym." + ), + ( + "дуб!дуб\"дуб#дуб$дуб%дуб&дуб'дуб(дуб)дуб*дуб+дуб,дуб-дуб.дуб/дуб:дуб;дуб<дуб=дуб>дуб?дуб@дуб[дуб\\дуб]дуб^дуб_дуб`дуб{дуб|дуб}дуб~дуб", + "dub!dub\"dub#dub$dub%dub&dub'dub(dub)dub*dub+dub,dub-dub.dub/dub:dub;dubdub?dub@dub[dub\\dub]dub^dub_dub`dub{dub|dub}dub~dub" + ), + ( + "бод бод\tбод\nбод\rбод", + "bod bod\tbod\nbod\rbod" + ) ] + + for (cyr,lat) in data { + let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_A) + #expect(lat == enc) + let dec = try decode(lat, table: UKLatnTable.DSTU_9112_A) + #expect(cyr == dec) + } + } + + @Test func c2l_DSTU_9112_A() throws { + let data: [(String, String)] = [ + ( + "в’я в'я", + "v'ja v'ja" + ), + ( + "Ї ї Й й Ё ё Ў ў", + "Ï ï J j Ö ö Ŭ ŭ" + ) ] + + for (cyr,lat) in data { + let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_A) + #expect(lat == enc) + } + } + + @Test func l2c_DSTU_9112_A() throws { + let data: [(String, String)] = [ + ( + "я є ю", + "jA jE jU" + ), + ( + "Ї ї Ь ь Ч ч Г г Щ щ Ш ш Ж ж", + "Ï ï Ĵ ĵ Č č Ğ ğ Ŝ ŝ Š š Ž ž" + ), + ( + "Ё ё Ў ў Ъ ъ Ы ы Э э", + "Ö ö Ŭ ŭ Ǒ ǒ Ȳ ȳ Ē ē" + ), + ( + "А́ а́ Е́ е́ Є́ Є́ є́ є́ И́ и́ І́ і́ Ї́ ї́ О́ о́ У́ у́ Ю́ Ю́ ю́ ю́ Я́ Я́ я́ я́", + "Á á É é JÉ Jé jÉ jé Ý ý Í í Ḯ ḯ Ó ó Ú ú JÚ Jú jÚ jú JÁ Já jÁ já" + ), + ( + "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", + "Jés sJés jés sjés Ḯs sḮs ḯs sḯs Jús sJús jús sjús Jás sJás jás sjás" + ) ] + + for (cyr,lat) in data { + let dec = try decode(lat, table: UKLatnTable.DSTU_9112_A) + #expect(cyr == dec) + } + } +} + + +@Suite("DSTU_9112_B") struct Dstu9112BTests { + + @Test func c2lr_DSTU_9112_B() throws { + let data: [(String, String)] = [ + ( + "Україна, Хмельницький", + "Ukrajina, Khmeljnycjkyj" + ), + ( + "Щастям б’єш жук їх глицю в фон й ґедзь пріч.", + "Shchastjam b'jesh zhuk jikh ghlycju v fon j gedzj prich." + ), + ( + "ь Ь ль льє льї лью лья лье льі льу льа льйо льо", + "hj Hj lj ljje ljji ljju ljja lj'e lj'i lj'u lj'a ljjo ljo" + ), + ( + "Єл Їл Юл Ял", + "Jel Jil Jul Jal" + ), + ( + "бь вь гь ґь дь жь зь кь ль мь нь пь рь сь ть фь хь ць чь шь щь", + "bj vj ghj gj dj zhj zj kj lj mj nj pj rj sj tj fj khj cj chj shj shchj" + ), + ( + "бя вя гя ґя дя жя зя кя ля мя ня пя ря ся тя фя хя ця чя шя щя", + "bja vja ghja gja dja zhja zja kja lja mja nja pja rja sja tja fja khja cja chja shja shchja" + ), + ( + "б’я в’я г’я ґ’я д’я ж’я з’я к’я л’я м’я н’я п’я р’я с’я т’я ф’я х’я ц’я ч’я ш’я щ’я", + "b'ja v'ja gh'ja g'ja d'ja zh'ja z'ja k'ja l'ja m'ja n'ja p'ja r'ja s'ja t'ja f'ja kh'ja c'ja ch'ja sh'ja shch'ja" + ), + ( + "бй бйо вй гй ґй дй жй зй кй лй мй нй пй рй сй тй фй хй цй чй шй щй", + "b'j b'jo v'j gh'j g'j d'j zh'j z'j k'j l'j m'j n'j p'j r'j s'j t'j f'j kh'j c'j ch'j sh'j shch'j" + ), + ( + "ня ньа н’я нь'н ньн", + "nja nj'a n'ja nj'n njn" + ), + ( + "рос дыня эзёдынъ. бр кроў.", + "ros dywnja ehwzjowdywnoh. br krouh." + ), + ( + "А́ а́ Е́ е́ Є́ є́ И́ и́ І́ і́ Ї́ ї́ О́ о́ У́ у́ Ю́ ю́ Я́ я́", + "Á á É é JÉ jé Ý ý Í í JÍ jí Ó ó Ú ú JÚ jú JÁ já" + ), + ( + "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", + "Jés sJés jés sjés Jís sJís jís sjís Jús sJús jús sjús Jás sJás jás sjás" + ), + ( + "' ім’я 'жук' \"жук\" ' '", + "' im'ja 'zhuk' \"zhuk\" ' '" + ), + ( + "Сонце світить майже білим світлом, однак через сильніше розсіювання і поглинання короткохвильової частини спектра атмосферою Землі пряме світло Сонця біля поверхні нашої планети набуває певного жовтого відтінку. Якщо небо ясне, то блакитний відтінок розсіяного світла складається з жовтуватим прямим сонячним світлом і загальне освітлення об’єктів на Землі стає білим.", + "Sonce svitytj majzhe bilym svitlom, odnak cherez syljnishe rozsijuvannja i poghlynannja korotkokhvyljovoji chastyny spektra atmosferoju Zemli prjame svitlo Soncja bilja poverkhni nashoji planety nabuvaje pevnogho zhovtogho vidtinku. Jakshcho nebo jasne, to blakytnyj vidtinok rozsijanogho svitla skladajetjsja z zhovtuvatym prjamym sonjachnym svitlom i zaghaljne osvitlennja ob'jektiv na Zemli staje bilym." + ), + ( + "дуб!дуб\"дуб#дуб$дуб%дуб&дуб'дуб(дуб)дуб*дуб+дуб,дуб-дуб.дуб/дуб:дуб;дуб<дуб=дуб>дуб?дуб@дуб[дуб\\дуб]дуб^дуб_дуб`дуб{дуб|дуб}дуб~дуб", + "dub!dub\"dub#dub$dub%dub&dub'dub(dub)dub*dub+dub,dub-dub.dub/dub:dub;dubdub?dub@dub[dub\\dub]dub^dub_dub`dub{dub|dub}dub~dub" + ), + ( + "бод бод\tбод\nбод\rбод", + "bod bod\tbod\nbod\rbod" + ) ] + + for (cyr,lat) in data { + let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_B) + #expect(lat == enc) + let dec = try decode(lat, table: UKLatnTable.DSTU_9112_B) + #expect(cyr == dec) + } + } + + @Test func c2l_DSTU_9112_B() throws { + let data: [(String, String)] = [ + ( + "в’я в'я", + "v'ja v'ja" + ), + ( + "Ї ї Й й Ё ё Ў ў", + "JI ji J j JOW jow UH uh" + ) ] + + for (cyr,lat) in data { + let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_B) + #expect(lat == enc) + } + } + + @Test func l2c_DSTU_9112_B() throws { + let data: [(String, String)] = [ + ( + "я ї є ю г ж х щ ш ч ь", + "jA jI jE jU gH zH kH sHcH sH cH hJ" + ), + ( + "А́ а́ Е́ е́ Є́ Є́ є́ є́ И́ и́ І́ і́ Ї́ Ї́ ї́ ї́ О́ о́ У́ у́ Ю́ Ю́ ю́ ю́ Я́ Я́ я́ я́", + "Á á É é JÉ Jé jÉ jé Ý ý Í í JÍ Jí jÍ jí Ó ó Ú ú JÚ Jú jÚ jú JÁ Já jÁ já" + ), + ( + "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", + "Jés sJés jés sjés Jís sJís jís sjís Jús sJús jús sjús Jás sJás jás sjás" + ) ] + + for (cyr,lat) in data { + let dec = try decode(lat, table: UKLatnTable.DSTU_9112_B) + #expect(cyr == dec) + } + } +} + + +@Suite("KMU_55") struct Kmu55Tests { + + @Test func c2l_KMU_55() throws { + let data: [(String, String)] = [ + ( + "Україна, Хмельницький", + "Ukraina, Khmelnytskyi" + ), + ( + "Щастям б’єш жук їх глицю в фон й ґедзь пріч.", + "Shchastiam biesh zhuk yikh hlytsiu v fon y gedz prich." + ), + ( + "згин зГ зГин Згин Зг ЗГ ЗГИН", + "zghyn zGH zGhyn Zghyn Zgh ZGH ZGHYN" + ), + ( + "ь Ь ль льє льї лью лья лье льі льу льа льйо льо", + " l lie li liu lia le li lu la lio lo" + ), + ( + "Єл Їл Юл Ял", + "Yel Yil Yul Yal" + ), + ( + "бь вь гь ґь дь жь зь кь ль мь нь пь рь сь ть фь хь ць чь шь щь", + "b v h g d zh z k l m n p r s t f kh ts ch sh shch" + ), + ( + "бя вя гя ґя дя жя зя кя ля мя ня пя ря ся тя фя хя ця чя шя щя", + "bia via hia gia dia zhia zia kia lia mia nia pia ria sia tia fia khia tsia chia shia shchia" + ), + ( + "б’я в’я г’я ґ’я д’я ж’я з’я к’я л’я м’я н’я п’я р’я с’я т’я ф’я х’я ц’я ч’я ш’я щ’я", + "bia via hia gia dia zhia zia kia lia mia nia pia ria sia tia fia khia tsia chia shia shchia" + ), + ( + "бй бйо вй гй ґй дй жй зй кй лй мй нй пй рй сй тй фй хй цй чй шй щй", + "bi bio vi hi gi di zhi zi ki li mi ni pi ri si ti fi khi tsi chi shi shchi" + ), + ( + "А́ а́ Е́ е́ Є́ є́ И́ и́ І́ і́ Ї́ ї́ О́ о́ У́ у́ Ю́ ю́ Я́ я́", + "Á á É é YÉ yé Ý ý Í í YÍ yí Ó ó Ú ú YÚ yú YÁ yá" + ), + ( + "Є́с сЄ́с є́с сє́с Ї́с сЇ́с ї́с сї́с Ю́с сЮ́с ю́с сю́с Я́с сЯ́с я́с ся́с", + "Yés sIés yés siés Yís sÍs yís sís Yús sIús yús siús Yás sIás yás siás" + ), + ( + "' ім’я 'жук' \"жук\" ' '", + "' imia 'zhuk' \"zhuk\" ' '" + ), + ( + "Сонце світить майже білим світлом, однак через сильніше розсіювання і поглинання короткохвильової частини спектра атмосферою Землі пряме світло Сонця біля поверхні нашої планети набуває певного жовтого відтінку. Якщо небо ясне, то блакитний відтінок розсіяного світла складається з жовтуватим прямим сонячним світлом і загальне освітлення об’єктів на Землі стає білим.", + "Sontse svityt maizhe bilym svitlom, odnak cherez sylnishe rozsiiuvannia i pohlynannia korotkokhvylovoi chastyny spektra atmosferoiu Zemli priame svitlo Sontsia bilia poverkhni nashoi planety nabuvaie pevnoho zhovtoho vidtinku. Yakshcho nebo yasne, to blakytnyi vidtinok rozsiianoho svitla skladaietsia z zhovtuvatym priamym soniachnym svitlom i zahalne osvitlennia obiektiv na Zemli staie bilym." + ), + ( + "в’я в'я", + "via via" + ), + ( + "дуб!дуб\"дуб#дуб$дуб%дуб&дуб'дуб(дуб)дуб*дуб+дуб,дуб-дуб.дуб/дуб:дуб;дуб<дуб=дуб>дуб?дуб@дуб[дуб\\дуб]дуб^дуб_дуб`дуб{дуб|дуб}дуб~дуб", + "dub!dub\"dub#dub$dub%dub&dubdub(dub)dub*dub+dub,dub-dub.dub/dub:dub;dubdub?dub@dub[dub\\dub]dub^dub_dub`dub{dub|dub}dub~dub" + ), + ( + "бод бод\tбод\nбод\rбод", + "bod bod\tbod\nbod\rbod" + ) ] + + for (cyr,lat) in data { + let enc = try encode(cyr, table: UKLatnTable.KMU_55) + #expect(lat == enc) + } + } + + @Test func decode_KMU_55_throws() throws { + #expect(throws: UKLatnError.self) { + try decode("lat", table: UKLatnTable.KMU_55) + } + } +} diff --git a/swift/readme.md b/swift/readme.md index 19da872..2c78063 100644 --- a/swift/readme.md +++ b/swift/readme.md @@ -2,10 +2,15 @@ uklatn == Ukrainian Cyrillic transliteration to Latin script. +Supported transliteration schemes: +- [DSTU 9112:2021](https://uk.wikipedia.org/wiki/ДСТУ_9112:2021) +- [KMU 55:2010](https://zakon.rada.gov.ua/laws/show/55-2010-п) + + ```swift -import UKLatn +import UkrainianLatin let s = try! encode("Доброго вечора!") -let t = try! decocde("Paljanycja") +let t = try! decode("Paljanycja") print(s, t) ``` @@ -14,3 +19,36 @@ Select a transliteration scheme: try encode("Борщ", table: UKLatnTable.DSTU_9112_A) ``` + +Notes +-- +Input is assumed to be in Ukrainian (Cyrillic or Latin script), and will be processed in full. +If your data has mixed languages, do preprocessing to extract Ukrainian chunks. + + +Command-line executable +-- + +```sh +uklatn 'моє щастя' +``` + +Running executable from a package: +```sh +swift run uklatn 'моє щастя' +``` + +```txt +usage: uklatn [-h] [-t TABLE] [-c] [-l] [-f FILE] [text ...] + +arguments: + text text to transliterate + +options: + -h, --help show this help message and exit + -t, --table {DSTU_9112_A,DSTU_9112_B,KMU_55} + transliteration system (default: DSTU_9112_A) + -l, --lat, --latin convert to Latin script (default) + -c, --cyr, --cyrillic convert to Cyrillic script + -f, --file FILE read text from file +``` diff --git a/tools/Makefile b/tools/Makefile index f1d9227..c316ffc 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -2,10 +2,11 @@ .PHONY: c .PHONY: js .PHONY: py +.PHONY: swift PYTHON ?= python -all: c js py +all: c js py swift c: @echo "/* Generated by gentables.py, do not edit. */\n" > ../c/_tables.c @@ -25,3 +26,9 @@ py: @echo "# Generated by gentests.py, do not edit.\n" > ../python/tests/uklatn_tests.py @$(PYTHON) gentests.py py >> ../python/tests/uklatn_tests.py +swift: + @echo "/* Generated by gentables.py, do not edit. */\n" > ../swift/Sources/UkrainianLatin/UKLatn.swift + @$(PYTHON) gentables.py swift >> ../swift/Sources/UkrainianLatin/UKLatn.swift + @echo "/* Generated by gentests.py, do not edit. */\n" > ../swift/Tests/UkrainianLatinTests/UKLatnTests.swift + @$(PYTHON) gentests.py swift >> ../swift/Tests/UkrainianLatinTests/UKLatnTests.swift + diff --git a/tools/gen/gen_swift.py b/tools/gen/gen_swift.py new file mode 100755 index 0000000..ffee0ce --- /dev/null +++ b/tools/gen/gen_swift.py @@ -0,0 +1,252 @@ +import io +import json +import logging +import re +from pathlib import Path + + +logger = logging.getLogger(Path(__file__).stem) + + +def gen_tests(fns): + def _parse_tests(fn): + def parse_kind(s): + match s.lower().split(): + case ['cyr', '<>', 'lat']: return 'c2lr' + case ['lat', '<>', 'cyr']: return 'l2cr' + case ['cyr', '>', 'lat']: return 'c2l' + case ['lat', '>', 'cyr']: return 'l2c' + case _: + raise Exception(f'unknown test kind: {s!r}') + with fn.open() as fp: + data = json.load(fp) + return [[parse_kind(obj['test']), obj['cyr'], obj['lat']] for obj in data] + + def table_name(s): + return re.sub(r'test_', '', s, flags=re.I).replace('-', '_') + def class_name(s): + return re.sub(r'test|_', '', s.title(), flags=re.I) + def _j(s): + return json.dumps(s, ensure_ascii=False) + def all_c2l(data): + return all(k == 'c2l' for k,_,_ in data) + def _emit_tests(kind, data, table, file): + data = [(cyr,lat) for k,cyr,lat in data if k == kind] + if not data: return + print('', file=file) + print(f' @Test func {kind}_{table}() throws {{', file=file) + vs, ws = ' ' * 8, ' ' * 12 + dump = '[\n' + ',\n'.join(f'{vs}(\n{ws}{_j(cyr)},\n{ws}{_j(lat)}\n{vs})' for cyr,lat in data) + ' ]\n' + print(f' let data: [(String, String)] = {dump}', file=file) + print(' for (cyr,lat) in data {', file=file) + if kind[0] == 'c': + print(f' let enc = try encode(cyr, table: UKLatnTable.{table})', file=file) + print(f' #expect(lat == enc)', file=file) + else: + print(f' let dec = try decode(lat, table: UKLatnTable.{table})', file=file) + print(f' #expect(cyr == dec)', file=file) + if kind[-1] == 'r': + if kind[0] == 'c': + print(f' let dec = try decode(lat, table: UKLatnTable.{table})', file=file) + print(f' #expect(cyr == dec)', file=file) + else: + print(f' let enc = try encode(cyr, table: UKLatnTable.{table})', file=file) + print(f' #expect(lat == enc)', file=file) + print(' }', file=file) + print(' }', file=file) + def _emit_decode_throws(table, file): + print(f''' + @Test func decode_{table}_throws() throws {{ + #expect(throws: UKLatnError.self) {{ + try decode("lat", table: UKLatnTable.{table}) + }} + }}''', file=file) + + with io.StringIO() as so: + print('import Testing', file=so) + print('@testable import UkrainianLatin', file=so) + for fn in fns: + logger.info(f'processing {fn!s}') + name = fn.stem + table = table_name(name) + cname = class_name(name) + data = _parse_tests(fn) + print(f'\n\n@Suite({_j(table)}) struct {cname}Tests {{', file=so) + _emit_tests('c2lr', data, table, file=so) + _emit_tests('l2cr', data, table, file=so) + _emit_tests('c2l', data, table, file=so) + _emit_tests('l2c', data, table, file=so) + if all_c2l(data): + _emit_decode_throws(table, file=so) + print('}', file=so) + + return so.getvalue() + + +def gen_transforms(fns, default_table=None): + def table_name(s): + s, = re.findall(r'uk_Latn_(.*?)(?:-uk)?\s*$', s, flags=re.I) + return s.replace('-', '_') + def class_name(s): + return '_Uklatn_' + s.replace('-', '_') + def _j(s): + return json.dumps(s, ensure_ascii=False) + def _isdec(s): + return s.startswith('uk_Latn_') + def _load_rules(data): + return [s if isinstance(s, str) else [ + '|'.join(r['regex'] for r in s) + '|(.)', + [r['map'] for r in s] + [dict()] + ] for s in data] + + def _emit_tr(cname, rules, file): + rules = _load_rules(rules) + norms = dict(zip('NFC NFD NFKC NFKD'.split(), ''' + precomposedStringWithCanonicalMapping + decomposedStringWithCanonicalMapping + precomposedStringWithCompatibilityMapping + decomposedStringWithCompatibilityMapping + '''.split())) + print(f'private let {cname}: () -> _UKLatnCodec.Transform = {{', file=file) + for sid, section in enumerate(rules): + if not isinstance(section, str): + rx, maps = section + gn = len(maps) + print(f' let _rx{sid} = try! NSRegularExpression(pattern: #"{rx}"#, options: [.dotMatchesLineSeparators, .useUnicodeWordBoundaries])', file=file) + print(f' let _maps{sid}:[[String:String]] = [[:], ', file=file) + for d in maps: + ds = '[' + ','.join(f'{_j(k)}:{_j(v)}' for k,v in d.items()) + ']' if d else '[:]' + print(f' {ds},', file=file) + print(f' ]', file=file) + print(' func transform(_ text: String) throws -> String {', file=file) + print(' var text = text', file=file) + for sid, section in enumerate(rules): + if isinstance(section, str): + norm = norms[section] + print(f' text = text.{norm} // {section}', file=file) + else: + rx, maps = section + print(f' text = text.replacing(_rx{sid}) {{ (i, match) in', file=file) + print(f' _maps{sid}[i][match] ?? match', file=file) + print(f' }}', file=file) + print(''' return text + } + return transform +} +''', file=file) + + context = dict() + tables = dict() + with io.StringIO() as so: + for fn in fns: + logger.info(f'processing {fn!s}') + with fn.open() as fp: + rules = json.load(fp) + name = fn.stem + table = table_name(name) + cname = class_name(name) + if table not in tables: + tables[table] = [None, None] + tables[table][_isdec(name)] = cname + _emit_tr(cname, rules, so) + classdefs_tables = so.getvalue() + + with io.StringIO() as so: + print('private let _UklatnTables: [UKLatnTable:_UKLatnCodec] = [', file=so) + for tid, (table, (enc, dec)) in enumerate(tables.items(), 1): + enc = f'{enc}()' if enc else 'nil' + dec = f'{dec}()' if dec else 'nil' + print(f' .{table}: _UKLatnCodec(encode: {enc}, decode: {dec}),', file=so) + print(']', end='', file=so) + tabledef = so.getvalue() + + context['global_tables'] = classdefs_tables + tabledef + context['default_table'] = default_table + context['tables_enum'] = '\n'.join(f' case {t} = {i}' for i,t in enumerate(tables, 1)) + + context['string_replacing'] = '''private extension String { + + func replacing(_ rx: NSRegularExpression, with replacement: @escaping (Int,String) -> String) -> String { + var so = "" + rx.enumerateMatches(in: self, range: NSRange(startIndex ..< endIndex, in: self)) { (result: NSTextCheckingResult?, flags: NSRegularExpression.MatchingFlags, stop: UnsafeMutablePointer) in + if let result { + for i in 1.. String {{ + guard let transform = _UklatnTables[table]?.encode + else {{ + throw UKLatnError.invalidTable(table.rawValue) + }} + return try transform(text) +}} + + +/// Re-transliterates a string of Ukrainian Latin to Cyrillic script. +/// +/// - Parameters: +/// - text: the text to transliterate +/// - table: transliteration system, one of: +/// - `DSTU_9112_A`: DSTU 9112:2021 System A +/// - `DSTU_9112_B`: DSTU 9112:2021 System B +/// - Returns: The transliterated string. +public func decode(_ text: String, table: UKLatnTable = .{default_table}) throws -> String {{ + guard let transform = _UklatnTables[table]?.decode + else {{ + throw UKLatnError.invalidTable(table.rawValue) + }} + return try transform(text) +}} + + +{string_replacing} + + +public enum UKLatnTable : Int {{ +{tables_enum} +}} + + +private struct _UKLatnCodec {{ + typealias Transform = ((String) throws -> String) + let encode: Transform? + let decode: Transform? +}} + + +{global_tables} +''' + text = template.format(**context) + return text + diff --git a/tools/gentables.py b/tools/gentables.py index 8823b6c..0eee18c 100755 --- a/tools/gentables.py +++ b/tools/gentables.py @@ -62,6 +62,17 @@ def gen_py(src): logger.info('PY generator end') +def gen_swift(src): + logger.info('Swift generator start') + from gen import gen_swift + + source = _basegen(args, 'src/regex', 'uk*.json', gen_swift.gen_transforms) + for text in source: + print(text, end='') + + logger.info('Swift generator end') + + if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Generate source code for the transform tables.') @@ -80,6 +91,10 @@ def gen_py(src): parse_py.add_argument('source', nargs='*', help='source directory') parse_py.set_defaults(func=gen_py) + parse_swift = subpar.add_parser('swift', help='Swift code generator') + parse_swift.add_argument('source', nargs='*', help='source directory') + parse_swift.set_defaults(func=gen_swift) + args = parser.parse_args() level = logging.DEBUG if args.verbose else logging.INFO diff --git a/tools/gentests.py b/tools/gentests.py index d153fed..4963d7f 100755 --- a/tools/gentests.py +++ b/tools/gentests.py @@ -59,6 +59,17 @@ def gen_py(args): logger.info('PY generator end') +def gen_swift(args): + logger.info('Swift generator start') + from gen import gen_swift + + source = _basegen(args, 'src/tests', 'test*.json', gen_swift.gen_tests) + for text in source: + print(text, end='') + + logger.info('Swift generator end') + + if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Generate test code for the transform tables.') @@ -77,6 +88,10 @@ def gen_py(args): parse_py.add_argument('source', nargs='*', help='source directory') parse_py.set_defaults(func=gen_py) + parse_swift = subpar.add_parser('swift', help='Swfit code generator') + parse_swift.add_argument('source', nargs='*', help='source directory') + parse_swift.set_defaults(func=gen_swift) + args = parser.parse_args() level = logging.DEBUG if args.verbose else logging.INFO From 950e6a152eea08a2949fffc8c6af992ade8f82f5 Mon Sep 17 00:00:00 2001 From: Pavlo Ivashkov Date: Tue, 12 Nov 2024 22:05:54 +0200 Subject: [PATCH 2/6] roll back to xctest --- swift/Sources/UkrainianLatin/UKLatn.swift | 2 +- .../UkrainianLatinTests/UKLatnTests.swift | 46 ++++++++++--------- tools/gen/gen_swift.py | 22 +++++---- 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/swift/Sources/UkrainianLatin/UKLatn.swift b/swift/Sources/UkrainianLatin/UKLatn.swift index 4d8bf7f..b5b6e72 100644 --- a/swift/Sources/UkrainianLatin/UKLatn.swift +++ b/swift/Sources/UkrainianLatin/UKLatn.swift @@ -4,7 +4,7 @@ import Foundation -public enum UKLatnError: Error { +public enum UKLatnError: Error, Equatable { case invalidTable(Int) } diff --git a/swift/Tests/UkrainianLatinTests/UKLatnTests.swift b/swift/Tests/UkrainianLatinTests/UKLatnTests.swift index 01b3d55..c10a5b5 100644 --- a/swift/Tests/UkrainianLatinTests/UKLatnTests.swift +++ b/swift/Tests/UkrainianLatinTests/UKLatnTests.swift @@ -1,12 +1,12 @@ /* Generated by gentests.py, do not edit. */ -import Testing +import XCTest @testable import UkrainianLatin -@Suite("DSTU_9112_A") struct Dstu9112ATests { +class Dstu9112ATests: XCTestCase { - @Test func c2lr_DSTU_9112_A() throws { + func test_c2lr_DSTU_9112_A() throws { let data: [(String, String)] = [ ( "Україна, Хмельницький", @@ -75,13 +75,13 @@ import Testing for (cyr,lat) in data { let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_A) - #expect(lat == enc) + XCTAssertEqual(lat, enc) let dec = try decode(lat, table: UKLatnTable.DSTU_9112_A) - #expect(cyr == dec) + XCTAssertEqual(cyr, dec) } } - @Test func c2l_DSTU_9112_A() throws { + func test_c2l_DSTU_9112_A() throws { let data: [(String, String)] = [ ( "в’я в'я", @@ -94,11 +94,11 @@ import Testing for (cyr,lat) in data { let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_A) - #expect(lat == enc) + XCTAssertEqual(lat, enc) } } - @Test func l2c_DSTU_9112_A() throws { + func test_l2c_DSTU_9112_A() throws { let data: [(String, String)] = [ ( "я є ю", @@ -123,15 +123,15 @@ import Testing for (cyr,lat) in data { let dec = try decode(lat, table: UKLatnTable.DSTU_9112_A) - #expect(cyr == dec) + XCTAssertEqual(cyr, dec) } } } -@Suite("DSTU_9112_B") struct Dstu9112BTests { +class Dstu9112BTests: XCTestCase { - @Test func c2lr_DSTU_9112_B() throws { + func test_c2lr_DSTU_9112_B() throws { let data: [(String, String)] = [ ( "Україна, Хмельницький", @@ -200,13 +200,13 @@ import Testing for (cyr,lat) in data { let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_B) - #expect(lat == enc) + XCTAssertEqual(lat, enc) let dec = try decode(lat, table: UKLatnTable.DSTU_9112_B) - #expect(cyr == dec) + XCTAssertEqual(cyr, dec) } } - @Test func c2l_DSTU_9112_B() throws { + func test_c2l_DSTU_9112_B() throws { let data: [(String, String)] = [ ( "в’я в'я", @@ -219,11 +219,11 @@ import Testing for (cyr,lat) in data { let enc = try encode(cyr, table: UKLatnTable.DSTU_9112_B) - #expect(lat == enc) + XCTAssertEqual(lat, enc) } } - @Test func l2c_DSTU_9112_B() throws { + func test_l2c_DSTU_9112_B() throws { let data: [(String, String)] = [ ( "я ї є ю г ж х щ ш ч ь", @@ -240,15 +240,15 @@ import Testing for (cyr,lat) in data { let dec = try decode(lat, table: UKLatnTable.DSTU_9112_B) - #expect(cyr == dec) + XCTAssertEqual(cyr, dec) } } } -@Suite("KMU_55") struct Kmu55Tests { +class Kmu55Tests: XCTestCase { - @Test func c2l_KMU_55() throws { + func test_c2l_KMU_55() throws { let data: [(String, String)] = [ ( "Україна, Хмельницький", @@ -317,13 +317,15 @@ import Testing for (cyr,lat) in data { let enc = try encode(cyr, table: UKLatnTable.KMU_55) - #expect(lat == enc) + XCTAssertEqual(lat, enc) } } - @Test func decode_KMU_55_throws() throws { - #expect(throws: UKLatnError.self) { + func test_decode_KMU_55_throws() throws { + XCTAssertThrowsError({ try decode("lat", table: UKLatnTable.KMU_55) + }) { error in + XCTAssertEqual(error as? UKLatnError, UKLatnError.invalidTable(UKLatnTable.KMU_55.rawValue)) } } } diff --git a/tools/gen/gen_swift.py b/tools/gen/gen_swift.py index ffee0ce..1e57e74 100755 --- a/tools/gen/gen_swift.py +++ b/tools/gen/gen_swift.py @@ -34,36 +34,38 @@ def _emit_tests(kind, data, table, file): data = [(cyr,lat) for k,cyr,lat in data if k == kind] if not data: return print('', file=file) - print(f' @Test func {kind}_{table}() throws {{', file=file) + print(f' func test_{kind}_{table}() throws {{', file=file) vs, ws = ' ' * 8, ' ' * 12 dump = '[\n' + ',\n'.join(f'{vs}(\n{ws}{_j(cyr)},\n{ws}{_j(lat)}\n{vs})' for cyr,lat in data) + ' ]\n' print(f' let data: [(String, String)] = {dump}', file=file) print(' for (cyr,lat) in data {', file=file) if kind[0] == 'c': print(f' let enc = try encode(cyr, table: UKLatnTable.{table})', file=file) - print(f' #expect(lat == enc)', file=file) + print(f' XCTAssertEqual(lat, enc)', file=file) else: print(f' let dec = try decode(lat, table: UKLatnTable.{table})', file=file) - print(f' #expect(cyr == dec)', file=file) + print(f' XCTAssertEqual(cyr, dec)', file=file) if kind[-1] == 'r': if kind[0] == 'c': print(f' let dec = try decode(lat, table: UKLatnTable.{table})', file=file) - print(f' #expect(cyr == dec)', file=file) + print(f' XCTAssertEqual(cyr, dec)', file=file) else: print(f' let enc = try encode(cyr, table: UKLatnTable.{table})', file=file) - print(f' #expect(lat == enc)', file=file) + print(f' XCTAssertEqual(lat, enc)', file=file) print(' }', file=file) print(' }', file=file) def _emit_decode_throws(table, file): print(f''' - @Test func decode_{table}_throws() throws {{ - #expect(throws: UKLatnError.self) {{ + func test_decode_{table}_throws() throws {{ + XCTAssertThrowsError({{ try decode("lat", table: UKLatnTable.{table}) + }}) {{ error in + XCTAssertEqual(error as? UKLatnError, UKLatnError.invalidTable(UKLatnTable.{table}.rawValue)) }} }}''', file=file) with io.StringIO() as so: - print('import Testing', file=so) + print('import XCTest', file=so) print('@testable import UkrainianLatin', file=so) for fn in fns: logger.info(f'processing {fn!s}') @@ -71,7 +73,7 @@ def _emit_decode_throws(table, file): table = table_name(name) cname = class_name(name) data = _parse_tests(fn) - print(f'\n\n@Suite({_j(table)}) struct {cname}Tests {{', file=so) + print(f'\n\nclass {cname}Tests: XCTestCase {{', file=so) _emit_tests('c2lr', data, table, file=so) _emit_tests('l2cr', data, table, file=so) _emit_tests('c2l', data, table, file=so) @@ -190,7 +192,7 @@ def _emit_tr(cname, rules, file): import Foundation -public enum UKLatnError: Error {{ +public enum UKLatnError: Error, Equatable {{ case invalidTable(Int) }} From 3409be03958ed5cdbe9cf7affc86fa8c61c1689b Mon Sep 17 00:00:00 2001 From: Pavlo Ivashkov Date: Tue, 12 Nov 2024 22:18:21 +0200 Subject: [PATCH 3/6] test throws --- swift/Tests/UkrainianLatinTests/UKLatnTests.swift | 4 ++-- tools/gen/gen_swift.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/swift/Tests/UkrainianLatinTests/UKLatnTests.swift b/swift/Tests/UkrainianLatinTests/UKLatnTests.swift index c10a5b5..98cbece 100644 --- a/swift/Tests/UkrainianLatinTests/UKLatnTests.swift +++ b/swift/Tests/UkrainianLatinTests/UKLatnTests.swift @@ -322,9 +322,9 @@ class Kmu55Tests: XCTestCase { } func test_decode_KMU_55_throws() throws { - XCTAssertThrowsError({ + XCTAssertThrowsError( try decode("lat", table: UKLatnTable.KMU_55) - }) { error in + ) { error in XCTAssertEqual(error as? UKLatnError, UKLatnError.invalidTable(UKLatnTable.KMU_55.rawValue)) } } diff --git a/tools/gen/gen_swift.py b/tools/gen/gen_swift.py index 1e57e74..5ec1862 100755 --- a/tools/gen/gen_swift.py +++ b/tools/gen/gen_swift.py @@ -57,9 +57,9 @@ def _emit_tests(kind, data, table, file): def _emit_decode_throws(table, file): print(f''' func test_decode_{table}_throws() throws {{ - XCTAssertThrowsError({{ + XCTAssertThrowsError( try decode("lat", table: UKLatnTable.{table}) - }}) {{ error in + ) {{ error in XCTAssertEqual(error as? UKLatnError, UKLatnError.invalidTable(UKLatnTable.{table}.rawValue)) }} }}''', file=file) From c956b974c74f210fd4adf74370a1682f7e0d195e Mon Sep 17 00:00:00 2001 From: Pavlo Ivashkov Date: Tue, 12 Nov 2024 22:33:57 +0200 Subject: [PATCH 4/6] workflows --- .github/workflows/test-builds.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test-builds.yml b/.github/workflows/test-builds.yml index e3cb8a7..0819a03 100644 --- a/.github/workflows/test-builds.yml +++ b/.github/workflows/test-builds.yml @@ -92,6 +92,9 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Swift version + run: swift -version + - name: Build run: swift build -c ${{matrix.build_type}} From 67a3a72315f85d1479bbf5bc2250535a7913d14b Mon Sep 17 00:00:00 2001 From: Pavlo Ivashkov Date: Wed, 13 Nov 2024 12:05:46 +0200 Subject: [PATCH 5/6] slice unicode scalars --- swift/Sources/UkrainianLatin/UKLatn.swift | 12 ++++++++++-- tools/gen/gen_swift.py | 14 +++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/swift/Sources/UkrainianLatin/UKLatn.swift b/swift/Sources/UkrainianLatin/UKLatn.swift index b5b6e72..91add1e 100644 --- a/swift/Sources/UkrainianLatin/UKLatn.swift +++ b/swift/Sources/UkrainianLatin/UKLatn.swift @@ -44,6 +44,14 @@ public func decode(_ text: String, table: UKLatnTable = .DSTU_9112_A) throws -> } +private extension Range where Bound == String.UnicodeScalarView.Index { + + init?(_ range: NSRange, in view: String.UnicodeScalarView) { + self = view.index(view.startIndex, offsetBy: range.location) ..< view.index(view.startIndex, offsetBy: range.location + range.length) + } +} + + private extension String { func replacing(_ rx: NSRegularExpression, with replacement: @escaping (Int,String) -> String) -> String { @@ -53,8 +61,8 @@ private extension String { for i in 1.. String) -> String { var so = "" @@ -175,8 +183,8 @@ def _emit_tr(cname, rules, file): for i in 1.. Date: Wed, 13 Nov 2024 12:06:00 +0200 Subject: [PATCH 6/6] cli exit code --- swift/Sources/cli/uklatn.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/swift/Sources/cli/uklatn.swift b/swift/Sources/cli/uklatn.swift index c912ade..1149e7a 100644 --- a/swift/Sources/cli/uklatn.swift +++ b/swift/Sources/cli/uklatn.swift @@ -124,6 +124,7 @@ private struct AppArgs { } printUsage(to: &output) print("error: \(message)", to: &output) + exit(1) } static func parse(_ argv: [String]) throws -> AppArgs {