Skip to content

Commit

Permalink
Fix bug when parsing named character references that contain numbers (#4
Browse files Browse the repository at this point in the history
)
  • Loading branch information
youming-lin authored Sep 23, 2016
1 parent 3341544 commit 204a8cd
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 18 deletions.
31 changes: 17 additions & 14 deletions Sources/HTMLEntities/String+HTMLEntities.swift
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,12 @@ public extension String {
let unicodes = self.unicodeScalars

// result buffer
var str: String = ""
var str: String? = nil

// entity buffer
// use optional string since there are issues on Linux when checking
// again empty string, i.e., "\u{200C}" == "" is true; "\u{200C}" is
// the named character ‌
var entity: String = ""

// current parse state
Expand Down Expand Up @@ -139,7 +142,7 @@ public extension String {
// entity can only be a number type
state = .Number
}
else if unicode.isAlpha {
else if unicode.isAlphaNumeric {
// entity can only be named character reference type
state = .Named

Expand Down Expand Up @@ -241,10 +244,12 @@ public extension String {
let unicodeScalar = UnicodeScalar(code) {
// reached end of entity
// move unbuffered unicodes over to the result buffer
str.append(String(unicodes[leftIndex..<ampersandIndex]))
str = str == nil ? "" : str

str?.append(String(unicodes[leftIndex..<ampersandIndex]))

// append unescaped character to result buffer
str.append(Character(unicodeScalar))
str?.append(Character(unicodeScalar))

// move left index since we have buffered everything
// up to and including the current entity
Expand All @@ -260,16 +265,14 @@ public extension String {
// move currentIndex to the position of the next unicode to be consumed
currentIndex = nextIndex
}

if str == "" {
// no unescapable entity found
// return string as it is
return self

if var str = str {
// append rest of string to result buffer
str.append(String(unicodes[leftIndex..<unicodes.endIndex]))

return str
}

// append rest of string to result buffer
str.append(String(unicodes[leftIndex..<unicodes.endIndex]))

return str

return self
}
}
8 changes: 4 additions & 4 deletions Sources/HTMLEntities/Utilities.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ func invert<K, V: Hashable>(_ dict: [K: V]) -> [V: K] {
}

extension UInt32 {
var isAlpha: Bool {
// ASCII values of [A-Z] and [a-z]
return 65...90 ~= self || 97...122 ~= self
var isAlphaNumeric: Bool {
// ASCII values of [0-9], [A-Z0, [and [a-z]
return self.isNumeral || 65...90 ~= self || 97...122 ~= self
}

var isAmpersand: Bool {
Expand Down Expand Up @@ -86,7 +86,7 @@ extension UInt32 {
case .Hex:
return self.isHexNumeral
case .Named:
return self.isAlpha
return self.isAlphaNumeric
default:
return false
}
Expand Down
13 changes: 13 additions & 0 deletions Tests/HTMLEntitiesTests/HTMLEntitiesTest.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ let str3Unescaped = "Jako efektivnější se nám jeví pořádání tzv. Road S
let str3Escaped = "Jako efektivn&#x11B;j&#x161;&#xED; se n&#xE1;m jev&#xED; po&#x159;&#xE1;d&#xE1;n&#xED; tzv. Road Show prost&#x159;ednictv&#xED;m na&#x161;ich autorizovan&#xFD;ch dealer&#x16F; v &#x10C;ech&#xE1;ch a na Morav&#x11B;, kter&#xE9; prob&#x11B;hnou v pr&#x16F;b&#x11B;hu z&#xE1;&#x159;&#xED; a &#x159;&#xED;jna."

class HTMLEntitiesTests: XCTestCase {
func testNamedCharacterReferences() {
XCTAssertEqual(html4NamedCharactersDecodeMap.count, html4NamedCharactersEncodeMap.count)

for (reference, unicode) in html4NamedCharactersDecodeMap {
let unescaped = String(UnicodeScalar(unicode)!)
let escaped = reference

XCTAssertEqual(unescaped.htmlEscape(), escaped)
XCTAssertEqual(escaped.htmlUnescape(), unescaped)
}
}

func testEncode() {
XCTAssertEqual(str1Unescaped.htmlEscape(), str1Escaped)
XCTAssertEqual(str2Unescaped.htmlEscape(), str2Escaped)
Expand Down Expand Up @@ -105,6 +117,7 @@ class HTMLEntitiesTests: XCTestCase {

static var allTests : [(String, (HTMLEntitiesTests) -> () throws -> Void)] {
return [
("testNamedCharacterReferences", testNamedCharacterReferences),
("testEncode", testEncode),
("testDecode", testDecode),
("testInvertibility", testInvertibility),
Expand Down

0 comments on commit 204a8cd

Please sign in to comment.