From 50daa39ee895f231e8e8b62b3fe78332c4603ad2 Mon Sep 17 00:00:00 2001 From: Anthony Calandra Date: Tue, 10 Jul 2018 09:00:19 -0400 Subject: Optimize JSON `Quote` operation. (#2420) JerryScript-DCO-1.0-Signed-off-by: Anthony Calandra anthony@anthony-calandra.com --- .../ecma/builtin-objects/ecma-builtin-json.c | 154 ++++++++++++--------- jerry-core/lit/lit-magic-strings.inc.h | 3 - jerry-core/lit/lit-magic-strings.ini | 1 - 3 files changed, 92 insertions(+), 66 deletions(-) (limited to 'jerry-core') diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-json.c b/jerry-core/ecma/builtin-objects/ecma-builtin-json.c index 4baaf646..84edd81c 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-json.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-json.c @@ -1213,107 +1213,137 @@ ecma_builtin_json_stringify (ecma_value_t this_arg, /**< 'this' argument */ static ecma_value_t ecma_builtin_json_quote (ecma_string_t *string_p) /**< string that should be quoted*/ { - /* 1. */ - ecma_string_t *product_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_DOUBLE_QUOTE_CHAR); - ECMA_STRING_TO_UTF8_STRING (string_p, string_buff, string_buff_size); - const lit_utf8_byte_t *str_p = string_buff; - const lit_utf8_byte_t *str_end_p = string_buff + string_buff_size; + const lit_utf8_byte_t *str_end_p = str_p + string_buff_size; + size_t n_bytes = 2; /* Start with 2 for surrounding quotes. */ while (str_p < str_end_p) { - ecma_char_t current_char = lit_utf8_read_next (&str_p); + lit_utf8_byte_t c = *str_p++; - /* 2.a, b */ - if (current_char == LIT_CHAR_BACKSLASH - || current_char == LIT_CHAR_DOUBLE_QUOTE - || current_char == LIT_CHAR_BS - || current_char == LIT_CHAR_FF - || current_char == LIT_CHAR_LF - || current_char == LIT_CHAR_CR - || current_char == LIT_CHAR_TAB) + if (c == LIT_CHAR_BACKSLASH || c == LIT_CHAR_DOUBLE_QUOTE) + { + n_bytes += 2; + } + else if (c >= LIT_CHAR_SP && c < LIT_UTF8_1_BYTE_CODE_POINT_MAX) + { + n_bytes++; + } + else if (c > LIT_UTF8_1_BYTE_CODE_POINT_MAX) { - lit_utf8_byte_t abbrev = (lit_utf8_byte_t) current_char; + lit_utf8_size_t sz = lit_get_unicode_char_size_by_utf8_first_byte (c); + n_bytes += sz; + str_p += sz - 1; + } + else + { + switch (c) + { + case LIT_CHAR_BS: + case LIT_CHAR_FF: + case LIT_CHAR_LF: + case LIT_CHAR_CR: + case LIT_CHAR_TAB: + { + n_bytes += 2; + break; + } + default: /* Hexadecimal. */ + { + n_bytes += 2 + 4; + break; + } + } + } + } + + lit_utf8_byte_t *buf_begin = jmem_heap_alloc_block (n_bytes); + JERRY_ASSERT (buf_begin != NULL); + lit_utf8_byte_t *buf = buf_begin; + str_p = string_buff; + + *buf++ = LIT_CHAR_DOUBLE_QUOTE; + + while (str_p < str_end_p) + { + lit_utf8_byte_t c = *str_p++; - switch (current_char) + if (c == LIT_CHAR_BACKSLASH || c == LIT_CHAR_DOUBLE_QUOTE) + { + *buf++ = LIT_CHAR_BACKSLASH; + *buf++ = c; + } + else if (c >= LIT_CHAR_SP && c < LIT_UTF8_1_BYTE_CODE_POINT_MAX) + { + *buf++ = c; + } + else if (c > LIT_UTF8_1_BYTE_CODE_POINT_MAX) + { + str_p--; + ecma_char_t current_char = lit_utf8_read_next (&str_p); + buf += lit_code_unit_to_utf8 (current_char, (lit_utf8_byte_t *) buf); + } + else + { + switch (c) { case LIT_CHAR_BS: { - abbrev = LIT_CHAR_LOWERCASE_B; + *buf++ = LIT_CHAR_BACKSLASH; + *buf++ = LIT_CHAR_LOWERCASE_B; break; } case LIT_CHAR_FF: { - abbrev = LIT_CHAR_LOWERCASE_F; + *buf++ = LIT_CHAR_BACKSLASH; + *buf++ = LIT_CHAR_LOWERCASE_F; break; } case LIT_CHAR_LF: { - abbrev = LIT_CHAR_LOWERCASE_N; + *buf++ = LIT_CHAR_BACKSLASH; + *buf++ = LIT_CHAR_LOWERCASE_N; break; } case LIT_CHAR_CR: { - abbrev = LIT_CHAR_LOWERCASE_R; + *buf++ = LIT_CHAR_BACKSLASH; + *buf++ = LIT_CHAR_LOWERCASE_R; break; } case LIT_CHAR_TAB: { - abbrev = LIT_CHAR_LOWERCASE_T; + *buf++ = LIT_CHAR_BACKSLASH; + *buf++ = LIT_CHAR_LOWERCASE_T; break; } - default: + default: /* Hexadecimal. */ { - JERRY_ASSERT (current_char == LIT_CHAR_BACKSLASH || current_char == LIT_CHAR_DOUBLE_QUOTE); - + JERRY_ASSERT (c < 0x9f); + *buf++ = LIT_CHAR_BACKSLASH; + *buf++ = LIT_CHAR_LOWERCASE_U; + *buf++ = LIT_CHAR_0; + *buf++ = LIT_CHAR_0; + *buf++ = (lit_utf8_byte_t) (LIT_CHAR_0 + (c >> 4)); /* Max range 0-9, hex digits unnecessary. */ + lit_utf8_byte_t c2 = (c & 0xf); + *buf++ = (lit_utf8_byte_t) (c2 + ((c2 <= 9) ? LIT_CHAR_0 : (LIT_CHAR_LOWERCASE_A - 10))); break; } } - - lit_utf8_byte_t chars[2] = { LIT_CHAR_BACKSLASH, abbrev }; - - product_str_p = ecma_append_chars_to_string (product_str_p, chars, 2, 2); } - /* 2.c */ - else if (current_char < LIT_CHAR_SP) - { - lit_utf8_byte_t chars[6] = { LIT_CHAR_BACKSLASH, LIT_CHAR_LOWERCASE_U, LIT_CHAR_0, LIT_CHAR_0 }; - - JERRY_ASSERT (current_char < 0x9f); - - chars[4] = (lit_utf8_byte_t) (LIT_CHAR_0 + (current_char >> 4)); - - int last_char = current_char & 0xf; - last_char += (last_char <= 9) ? LIT_CHAR_0 : (LIT_CHAR_LOWERCASE_A - 10); - - chars[5] = (lit_utf8_byte_t) last_char; - - product_str_p = ecma_append_chars_to_string (product_str_p, chars, 6, 6); - } - /* 2.d */ - else if (current_char < LIT_UTF8_1_BYTE_CODE_POINT_MAX) - { - /* Fast case for ascii characters. */ - lit_utf8_byte_t chars[1] = { (lit_utf8_byte_t) current_char }; + } - product_str_p = ecma_append_chars_to_string (product_str_p, chars, 1, 1); - } - else - { - ecma_string_t *current_char_str_p = ecma_new_ecma_string_from_code_unit (current_char); + *buf++ = LIT_CHAR_DOUBLE_QUOTE; - product_str_p = ecma_concat_ecma_strings (product_str_p, current_char_str_p); - ecma_deref_ecma_string (current_char_str_p); - } - } + /* Make sure we didn't run off the end or allocated more than we actually wanted. */ + JERRY_ASSERT ((size_t) (buf - buf_begin) == n_bytes); + ecma_string_t *product_str_p = ecma_new_ecma_string_from_utf8 ((const lit_utf8_byte_t *) buf_begin, + (lit_utf8_size_t) (buf - buf_begin)); + jmem_heap_free_block (buf_begin, n_bytes); ECMA_FINALIZE_UTF8_STRING (string_buff, string_buff_size); - /* 3. */ - product_str_p = ecma_append_magic_string_to_string (product_str_p, LIT_MAGIC_STRING_DOUBLE_QUOTE_CHAR); - - /* 4. */ return ecma_make_string_value (product_str_p); } /* ecma_builtin_json_quote */ diff --git a/jerry-core/lit/lit-magic-strings.inc.h b/jerry-core/lit/lit-magic-strings.inc.h index 950725b7..11835b17 100644 --- a/jerry-core/lit/lit-magic-strings.inc.h +++ b/jerry-core/lit/lit-magic-strings.inc.h @@ -19,9 +19,6 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING__EMPTY, "") LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPACE_CHAR, " ") -#if !defined (CONFIG_DISABLE_JSON_BUILTIN) -LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DOUBLE_QUOTE_CHAR, "\"") -#endif LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COMMA_CHAR, ",") #if !defined (CONFIG_DISABLE_REGEXP_BUILTIN) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SLASH_CHAR, "/") diff --git a/jerry-core/lit/lit-magic-strings.ini b/jerry-core/lit/lit-magic-strings.ini index 3cd2161c..bdd810ea 100644 --- a/jerry-core/lit/lit-magic-strings.ini +++ b/jerry-core/lit/lit-magic-strings.ini @@ -24,7 +24,6 @@ LIT_MAGIC_STRING__EMPTY = "" LIT_MAGIC_STRING_SPACE_CHAR = " " -LIT_MAGIC_STRING_DOUBLE_QUOTE_CHAR = "\"" LIT_MAGIC_STRING_COMMA_CHAR = "," LIT_MAGIC_STRING_SLASH_CHAR = "/" LIT_MAGIC_STRING_COLON_CHAR = ":" -- cgit v1.2.3