Skip to content

Commit

Permalink
Fixed utf8 encoding detection.
Browse files Browse the repository at this point in the history
  • Loading branch information
andot committed Mar 21, 2020
1 parent 801897c commit 885bc40
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 71 deletions.
86 changes: 29 additions & 57 deletions hprose_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -627,68 +627,40 @@ static zend_always_inline zend_bool hprose_has_property(zend_class_entry *ce, zv

#define has_property(ce, obj, prop) hprose_has_property((ce), (obj), (prop) TSRMLS_CC)

static zend_always_inline zend_bool is_utf8(char *str, int32_t len) {
uint8_t * s = (uint8_t *)str;
int32_t i;
for (i = 0; i < len; ++i) {
uint8_t c = s[i];
switch (c >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
break;
case 12:
case 13:
if ((s[++i] >> 6) != 0x2) return 0;
break;
case 14:
if ((s[++i] >> 6) != 0x2) return 0;
if ((s[++i] >> 6) != 0x2) return 0;
break;
case 15: {
uint8_t b = s[++i];
if ((s[++i] >> 6) != 0x2) return 0;
if ((s[++i] >> 6) != 0x2) return 0;
if ((((c & 0xf) << 2) | ((b >> 4) & 0x3)) > 0x10) return 0;
break;
}
default:
return 0;
}
}
return 1;
}

static zend_always_inline int32_t ustrlen(char *str, int32_t len) {
static zend_always_inline int32_t utf16_length(char *str, int32_t len) {
uint8_t *s = (uint8_t *)str;
int32_t l = len, p = 0;
while (p < len) {
uint8_t a = s[p];
if (a < 0x80) {
++p;
}
else if ((a & 0xE0) == 0xC0) {
p += 2;
--l;
}
else if ((a & 0xF0) == 0xE0) {
p += 3;
l -= 2;
}
else if ((a & 0xF8) == 0xF0) {
p += 4;
l -= 2;
uint8_t c = 0, a;
int32_t n = len, i;
for (i = 0; i < len; i++) {
a = s[i];
if (c == 0) {
if ((a & 0xe0) == 0xc0) {
c = 1;
n--;
}
else if ((a & 0xf0) == 0xe0) {
c = 2;
n -= 2;
}
else if ((a & 0xf8) == 0xf0) {
c = 3;
n -= 2;
}
else if ((a & 0x80) == 0x80) {
return -1;
}
}
else {
return -1;
if ((a & 0xc0) != 0x80) {
return -1;
}
c--;
}
}
return l;
if (c != 0) {
return -1;
}
return n;
}

#if PHP_MAJOR_VERSION < 7
Expand Down
5 changes: 3 additions & 2 deletions hprose_formatter.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,12 @@ static zend_always_inline void hprose_fast_serialize(hprose_bytes_io *stream, zv
case IS_STRING: {
char * s = Z_STRVAL_P(val);
int32_t l = Z_STRLEN_P(val);
int32_t ul = utf16_length(s, l);
if (l == 0) {
_hprose_writer_write_empty(stream);
}
else if (is_utf8(s, l)) {
if (l < 4 && ustrlen(s, l) == 1) {
else if (ul > 0) {
if (ul == 1) {
_hprose_writer_write_utf8char(stream, val);
}
else {
Expand Down
5 changes: 3 additions & 2 deletions hprose_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,12 @@ void _hprose_writer_serialize(hprose_writer *_this, hprose_writer_refer *refer,
case IS_STRING: {
char * s = Z_STRVAL_P(val);
int32_t l = Z_STRLEN_P(val);
int32_t ul = utf16_length(s, l);
if (l == 0) {
_hprose_writer_write_empty(stream);
}
else if (is_utf8(s, l)) {
if (l < 4 && ustrlen(s, l) == 1) {
else if (ul > 0) {
if (ul == 1) {
_hprose_writer_write_utf8char(stream, val);
}
else {
Expand Down
13 changes: 9 additions & 4 deletions hprose_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,13 +270,18 @@ static zend_always_inline void _hprose_writer_write_utf8char(hprose_bytes_io *st
#define hprose_writer_write_utf8char(_this, val) _hprose_writer_write_utf8char((_this)->stream, (val))

static zend_always_inline void _hprose_writer_write_string(hprose_writer_refer *refer, hprose_bytes_io *stream, zval *val) {
int32_t len = ustrlen(Z_STRVAL_P(val), Z_STRLEN_P(val));
int32_t len = utf16_length(Z_STRVAL_P(val), Z_STRLEN_P(val));
if (refer) {
hprose_writer_refer_set(refer, val);
}
hprose_bytes_io_putc(stream, HPROSE_TAG_STRING);
if (len) {
hprose_bytes_io_write_int(stream, len);
if (len >= 0) {
hprose_bytes_io_putc(stream, HPROSE_TAG_STRING);
if (len) {
hprose_bytes_io_write_int(stream, len);
}
} else {
hprose_bytes_io_putc(stream, HPROSE_TAG_BYTES);
hprose_bytes_io_write_int(stream, Z_STRLEN_P(val));
}
hprose_bytes_io_putc(stream, HPROSE_TAG_QUOTE);
hprose_bytes_io_write(stream, Z_STRVAL_P(val), Z_STRLEN_P(val));
Expand Down
24 changes: 19 additions & 5 deletions package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,18 @@ http://pear.php.net/dtd/package-2.0.xsd">
<email>mabingyao@gmail.com</email>
<active>yes</active>
</lead>
<date>2020-01-23</date>
<time>13:04:34</time>
<date>2020-03-21</date>
<time>22:17:20</time>
<version>
<release>1.7.0</release>
<api>1.7.0</api>
<release>1.8.0</release>
<api>1.8.0</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="http://mit-license.org/">MIT</license>
<notes>Add TagHeader for Hprose 3.0.
<notes>Fixed utf8 encoding detection.
</notes>
<contents>
<dir name="/">
Expand Down Expand Up @@ -111,6 +111,20 @@ http://pear.php.net/dtd/package-2.0.xsd">
<extsrcrelease>
</extsrcrelease>
<changelog>
<release>
<version>
<release>1.8.0</release>
<api>1.8.0</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<date>2020-03-21</date>
<license uri="http://mit-license.org/">MIT</license>
<notes>Fixed utf8 encoding detection.
</notes>
</release>
<release>
<version>
<release>1.7.0</release>
Expand Down
2 changes: 1 addition & 1 deletion php_hprose.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ zend_module_entry hprose_module_entry;

#define PHP_HPROSE_MODULE_NAME "hprose"
#define PHP_HPROSE_BUILD_DATE __DATE__ " " __TIME__
#define PHP_HPROSE_VERSION "1.7.0"
#define PHP_HPROSE_VERSION "1.8.0"
#define PHP_HPROSE_AUTHOR "Ma Bingyao"
#define PHP_HPROSE_HOMEPAGE "https://github.com/hprose/hprose-pecl"

Expand Down

0 comments on commit 885bc40

Please sign in to comment.