From 885bc401fbc613c990a669008449000161f7547a Mon Sep 17 00:00:00 2001 From: andot Date: Sat, 21 Mar 2020 22:36:39 +0800 Subject: [PATCH] Fixed utf8 encoding detection. --- hprose_common.h | 86 ++++++++++++++++------------------------------ hprose_formatter.c | 5 +-- hprose_writer.c | 5 +-- hprose_writer.h | 13 ++++--- package.xml | 24 ++++++++++--- php_hprose.h | 2 +- 6 files changed, 64 insertions(+), 71 deletions(-) diff --git a/hprose_common.h b/hprose_common.h index c7c1dc7..1b313f5 100644 --- a/hprose_common.h +++ b/hprose_common.h @@ -627,68 +627,40 @@ static zend_always_inline zend_bool hprose_has_property(zend_class_entry *ce, zv #define has_property(ce, obj, prop) hprose_has_property((ce), (obj), (prop) TSRMLS_CC) -static zend_always_inline zend_bool is_utf8(char *str, int32_t len) { - uint8_t * s = (uint8_t *)str; - int32_t i; - for (i = 0; i < len; ++i) { - uint8_t c = s[i]; - switch (c >> 4) { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - break; - case 12: - case 13: - if ((s[++i] >> 6) != 0x2) return 0; - break; - case 14: - if ((s[++i] >> 6) != 0x2) return 0; - if ((s[++i] >> 6) != 0x2) return 0; - break; - case 15: { - uint8_t b = s[++i]; - if ((s[++i] >> 6) != 0x2) return 0; - if ((s[++i] >> 6) != 0x2) return 0; - if ((((c & 0xf) << 2) | ((b >> 4) & 0x3)) > 0x10) return 0; - break; - } - default: - return 0; - } - } - return 1; -} - -static zend_always_inline int32_t ustrlen(char *str, int32_t len) { +static zend_always_inline int32_t utf16_length(char *str, int32_t len) { uint8_t *s = (uint8_t *)str; - int32_t l = len, p = 0; - while (p < len) { - uint8_t a = s[p]; - if (a < 0x80) { - ++p; - } - else if ((a & 0xE0) == 0xC0) { - p += 2; - --l; - } - else if ((a & 0xF0) == 0xE0) { - p += 3; - l -= 2; - } - else if ((a & 0xF8) == 0xF0) { - p += 4; - l -= 2; + uint8_t c = 0, a; + int32_t n = len, i; + for (i = 0; i < len; i++) { + a = s[i]; + if (c == 0) { + if ((a & 0xe0) == 0xc0) { + c = 1; + n--; + } + else if ((a & 0xf0) == 0xe0) { + c = 2; + n -= 2; + } + else if ((a & 0xf8) == 0xf0) { + c = 3; + n -= 2; + } + else if ((a & 0x80) == 0x80) { + return -1; + } } else { - return -1; + if ((a & 0xc0) != 0x80) { + return -1; + } + c--; } } - return l; + if (c != 0) { + return -1; + } + return n; } #if PHP_MAJOR_VERSION < 7 diff --git a/hprose_formatter.c b/hprose_formatter.c index 8be4f69..95a2df5 100644 --- a/hprose_formatter.c +++ b/hprose_formatter.c @@ -89,11 +89,12 @@ static zend_always_inline void hprose_fast_serialize(hprose_bytes_io *stream, zv case IS_STRING: { char * s = Z_STRVAL_P(val); int32_t l = Z_STRLEN_P(val); + int32_t ul = utf16_length(s, l); if (l == 0) { _hprose_writer_write_empty(stream); } - else if (is_utf8(s, l)) { - if (l < 4 && ustrlen(s, l) == 1) { + else if (ul > 0) { + if (ul == 1) { _hprose_writer_write_utf8char(stream, val); } else { diff --git a/hprose_writer.c b/hprose_writer.c index fc7ac5c..6b5e7b1 100644 --- a/hprose_writer.c +++ b/hprose_writer.c @@ -87,11 +87,12 @@ void _hprose_writer_serialize(hprose_writer *_this, hprose_writer_refer *refer, case IS_STRING: { char * s = Z_STRVAL_P(val); int32_t l = Z_STRLEN_P(val); + int32_t ul = utf16_length(s, l); if (l == 0) { _hprose_writer_write_empty(stream); } - else if (is_utf8(s, l)) { - if (l < 4 && ustrlen(s, l) == 1) { + else if (ul > 0) { + if (ul == 1) { _hprose_writer_write_utf8char(stream, val); } else { diff --git a/hprose_writer.h b/hprose_writer.h index b01458c..27a5332 100644 --- a/hprose_writer.h +++ b/hprose_writer.h @@ -270,13 +270,18 @@ static zend_always_inline void _hprose_writer_write_utf8char(hprose_bytes_io *st #define hprose_writer_write_utf8char(_this, val) _hprose_writer_write_utf8char((_this)->stream, (val)) static zend_always_inline void _hprose_writer_write_string(hprose_writer_refer *refer, hprose_bytes_io *stream, zval *val) { - int32_t len = ustrlen(Z_STRVAL_P(val), Z_STRLEN_P(val)); + int32_t len = utf16_length(Z_STRVAL_P(val), Z_STRLEN_P(val)); if (refer) { hprose_writer_refer_set(refer, val); } - hprose_bytes_io_putc(stream, HPROSE_TAG_STRING); - if (len) { - hprose_bytes_io_write_int(stream, len); + if (len >= 0) { + hprose_bytes_io_putc(stream, HPROSE_TAG_STRING); + if (len) { + hprose_bytes_io_write_int(stream, len); + } + } else { + hprose_bytes_io_putc(stream, HPROSE_TAG_BYTES); + hprose_bytes_io_write_int(stream, Z_STRLEN_P(val)); } hprose_bytes_io_putc(stream, HPROSE_TAG_QUOTE); hprose_bytes_io_write(stream, Z_STRVAL_P(val), Z_STRLEN_P(val)); diff --git a/package.xml b/package.xml index 4ca187d..22e8f4c 100644 --- a/package.xml +++ b/package.xml @@ -46,18 +46,18 @@ http://pear.php.net/dtd/package-2.0.xsd"> mabingyao@gmail.com yes - 2020-01-23 - + 2020-03-21 + - 1.7.0 - 1.7.0 + 1.8.0 + 1.8.0 stable stable MIT - Add TagHeader for Hprose 3.0. + Fixed utf8 encoding detection. @@ -111,6 +111,20 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + + 1.8.0 + 1.8.0 + + + stable + stable + + 2020-03-21 + MIT + Fixed utf8 encoding detection. + + 1.7.0 diff --git a/php_hprose.h b/php_hprose.h index 5d6957d..94f95f6 100644 --- a/php_hprose.h +++ b/php_hprose.h @@ -30,7 +30,7 @@ zend_module_entry hprose_module_entry; #define PHP_HPROSE_MODULE_NAME "hprose" #define PHP_HPROSE_BUILD_DATE __DATE__ " " __TIME__ -#define PHP_HPROSE_VERSION "1.7.0" +#define PHP_HPROSE_VERSION "1.8.0" #define PHP_HPROSE_AUTHOR "Ma Bingyao" #define PHP_HPROSE_HOMEPAGE "https://github.com/hprose/hprose-pecl"