Commit | Line | Data |
---|---|---|
d44e3c4f | 1 | /****************************************************************************** |
2 | * Copyright (c) 2000-2016 Ericsson Telecom AB | |
3 | * All rights reserved. This program and the accompanying materials | |
4 | * are made available under the terms of the Eclipse Public License v1.0 | |
5 | * which accompanies this distribution, and is available at | |
6 | * http://www.eclipse.org/legal/epl-v10.html | |
7 | * | |
8 | * Contributors: | |
9 | * Balasko, Jeno | |
10 | * Baranyi, Botond | |
11 | * | |
12 | ******************************************************************************/ | |
3abe9331 | 13 | |
970ed795 EL |
14 | #include <cstring> |
15 | ||
16 | #include "JSON_Tokenizer.hh" | |
17 | #include "memory.h" | |
18 | #include <cstdio> | |
19 | ||
20 | static const char TABS[] = | |
21 | "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" | |
22 | "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" | |
23 | "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" | |
24 | "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; | |
25 | const size_t MAX_TABS = sizeof(TABS) - 1; // 64 | |
26 | ||
27 | void JSON_Tokenizer::init(const char* p_buf, const size_t p_buf_len) | |
28 | { | |
29 | if (p_buf != 0 && p_buf_len != 0) { | |
30 | buf_ptr = mcopystrn(p_buf, p_buf_len); | |
31 | } else { | |
32 | buf_ptr = 0; | |
33 | } | |
34 | buf_len = p_buf_len; | |
35 | buf_pos = 0; | |
36 | depth = 0; | |
37 | previous_token = JSON_TOKEN_NONE; | |
38 | } | |
39 | ||
40 | JSON_Tokenizer::~JSON_Tokenizer() | |
41 | { | |
42 | Free(buf_ptr); | |
43 | } | |
44 | ||
45 | void JSON_Tokenizer::put_c(const char c) | |
46 | { | |
47 | buf_ptr = mputprintf(buf_ptr, "%c", c); | |
48 | ++buf_len; | |
49 | } | |
50 | ||
51 | void JSON_Tokenizer::put_s(const char* s) | |
52 | { | |
53 | buf_ptr = mputstr(buf_ptr, s); | |
54 | buf_len += strlen(s); | |
55 | } | |
56 | ||
57 | void JSON_Tokenizer::put_depth() | |
58 | { | |
59 | put_s(TABS + ((depth > MAX_TABS) ? 0 : MAX_TABS - depth)); | |
60 | } | |
61 | ||
62 | bool JSON_Tokenizer::skip_white_spaces() | |
63 | { | |
64 | while(buf_pos < buf_len) { | |
65 | switch(buf_ptr[buf_pos]) { | |
66 | case ' ': | |
67 | case '\r': | |
68 | case '\n': | |
69 | case '\t': | |
70 | case '\f': | |
71 | ++buf_pos; | |
72 | break; | |
73 | default: | |
74 | return true; | |
75 | } | |
76 | } | |
77 | return false; | |
78 | } | |
79 | ||
80 | bool JSON_Tokenizer::check_for_string() | |
81 | { | |
82 | if ('\"' == buf_ptr[buf_pos]) { | |
83 | ++buf_pos; | |
84 | } else { | |
85 | return false; | |
86 | } | |
87 | while (buf_pos < buf_len) { | |
88 | if ('\"' == buf_ptr[buf_pos]) { | |
89 | return true; | |
90 | } | |
91 | else if ('\\' == buf_ptr[buf_pos]) { | |
92 | // skip escaped character (so escaped quotes (\") are not mistaken for the ending quotes) | |
93 | ++buf_pos; | |
94 | } | |
95 | ++buf_pos; | |
96 | } | |
97 | return false; | |
98 | } | |
99 | ||
100 | bool JSON_Tokenizer::check_for_number() | |
101 | { | |
102 | bool first_digit = false; // first non-zero digit reached | |
103 | bool zero = false; // first zero digit reached | |
104 | bool decimal_point = false; // decimal point (.) reached | |
105 | bool exponent_mark = false; // exponential mark (e or E) reached | |
106 | bool exponent_sign = false; // sign of the exponential (- or +) reached | |
107 | ||
108 | if ('-' == buf_ptr[buf_pos]) { | |
109 | ++buf_pos; | |
110 | } | |
111 | ||
112 | while (buf_pos < buf_len) { | |
113 | switch(buf_ptr[buf_pos]) { | |
114 | case '.': | |
115 | if (decimal_point || exponent_mark || (!first_digit && !zero)) { | |
116 | return false; | |
117 | } | |
118 | decimal_point = true; | |
119 | first_digit = false; | |
120 | zero = false; | |
121 | break; | |
122 | case 'e': | |
123 | case 'E': | |
124 | if (exponent_mark || (!first_digit && !zero)) { | |
125 | return false; | |
126 | } | |
127 | exponent_mark = true; | |
128 | first_digit = false; | |
129 | zero = false; | |
130 | break; | |
131 | case '0': | |
132 | if (!first_digit && (exponent_mark || (!decimal_point && zero))) { | |
133 | return false; | |
134 | } | |
135 | zero = true; | |
136 | break; | |
137 | case '1': | |
138 | case '2': | |
139 | case '3': | |
140 | case '4': | |
141 | case '5': | |
142 | case '6': | |
143 | case '7': | |
144 | case '8': | |
145 | case '9': | |
146 | if (!first_digit && zero && (!decimal_point || exponent_mark)) { | |
147 | return false; | |
148 | } | |
149 | first_digit = true; | |
150 | break; | |
151 | case '-': | |
152 | case '+': | |
153 | if (exponent_sign || !exponent_mark || zero || first_digit) { | |
154 | return false; | |
155 | } | |
156 | exponent_sign = true; | |
157 | break; | |
158 | default: | |
159 | return first_digit || zero; | |
160 | } | |
161 | ||
162 | ++buf_pos; | |
163 | } | |
164 | return first_digit || zero; | |
165 | } | |
166 | ||
167 | bool JSON_Tokenizer::check_for_separator() | |
168 | { | |
169 | if (buf_pos < buf_len) { | |
170 | switch(buf_ptr[buf_pos]) { | |
171 | case ',': | |
172 | ++buf_pos; | |
173 | // no break | |
174 | case ':': | |
175 | case '{': | |
176 | case '}': | |
177 | case '[': | |
178 | case ']': | |
179 | return true; | |
180 | default: | |
181 | return false; | |
182 | } | |
183 | } | |
184 | return true; | |
185 | } | |
186 | ||
187 | bool JSON_Tokenizer::check_for_literal(const char* p_literal) | |
188 | { | |
189 | size_t len = strlen(p_literal); | |
190 | size_t start_pos = buf_pos; | |
191 | ||
192 | if (buf_len - buf_pos >= len && | |
193 | 0 == strncmp(buf_ptr + buf_pos, p_literal, len)) { | |
194 | buf_pos += len; | |
195 | if (!skip_white_spaces() || check_for_separator()) { | |
196 | return true; | |
197 | } else { | |
198 | // must be followed by a separator (or only white spaces until the buffer ends) -> undo buffer action | |
199 | buf_pos = start_pos; | |
200 | } | |
201 | } | |
202 | return false; | |
203 | } | |
204 | ||
205 | int JSON_Tokenizer::get_next_token(json_token_t* p_token, char** p_token_str, size_t* p_str_len) | |
206 | { | |
207 | size_t start_pos = buf_pos; | |
208 | *p_token = JSON_TOKEN_NONE; | |
209 | if (0 != p_token_str && 0 != p_str_len) { | |
210 | *p_token_str = 0; | |
211 | *p_str_len = 0; | |
212 | } | |
213 | ||
214 | if (skip_white_spaces()) { | |
215 | char c = buf_ptr[buf_pos]; | |
216 | switch (c) { | |
217 | case '{': | |
218 | case '[': | |
219 | *p_token = ('{' == c) ? JSON_TOKEN_OBJECT_START : JSON_TOKEN_ARRAY_START; | |
220 | ++buf_pos; | |
221 | break; | |
222 | case '}': | |
223 | case ']': | |
224 | ++buf_pos; | |
225 | if (skip_white_spaces() && !check_for_separator()) { | |
226 | // must be followed by a separator (or only white spaces until the buffer ends) | |
227 | *p_token = JSON_TOKEN_ERROR; | |
228 | } else { | |
229 | *p_token = ('}' == c) ? JSON_TOKEN_OBJECT_END : JSON_TOKEN_ARRAY_END; | |
230 | } | |
231 | break; | |
232 | case '\"': { | |
233 | // string value or field name | |
234 | size_t string_start_pos = buf_pos; | |
235 | if(!check_for_string()) { | |
236 | // invalid string value | |
237 | *p_token = JSON_TOKEN_ERROR; | |
238 | break; | |
239 | } | |
240 | size_t string_end_pos = ++buf_pos; // step over the string's ending quotes | |
241 | if (skip_white_spaces() && ':' == buf_ptr[buf_pos]) { | |
242 | // name token - don't include the starting and ending quotes | |
243 | *p_token = JSON_TOKEN_NAME; | |
244 | if (0 != p_token_str && 0 != p_str_len) { | |
245 | *p_token_str = buf_ptr + string_start_pos + 1; | |
246 | *p_str_len = string_end_pos - string_start_pos - 2; | |
247 | } | |
248 | ++buf_pos; | |
249 | } else if (check_for_separator()) { | |
250 | // value token - include the starting and ending quotes | |
251 | *p_token = JSON_TOKEN_STRING; | |
252 | if (0 != p_token_str && 0 != p_str_len) { | |
253 | *p_token_str = buf_ptr + string_start_pos; | |
254 | *p_str_len = string_end_pos - string_start_pos; | |
255 | } | |
256 | } else { | |
257 | // value token, but there is no separator after it -> error | |
258 | *p_token = JSON_TOKEN_ERROR; | |
259 | break; | |
260 | } | |
261 | break; | |
262 | } // case: string value or field name | |
263 | default: | |
264 | if (('0' <= buf_ptr[buf_pos] && '9' >= buf_ptr[buf_pos]) || | |
265 | '-' == buf_ptr[buf_pos]) { | |
266 | // number value | |
267 | size_t number_start_pos = buf_pos; | |
268 | if (!check_for_number()) { | |
269 | // invalid number | |
270 | *p_token = JSON_TOKEN_ERROR; | |
271 | break; | |
272 | } | |
273 | size_t number_length = buf_pos - number_start_pos; | |
274 | if (skip_white_spaces() && !check_for_separator()) { | |
275 | // must be followed by a separator (or only white spaces until the buffer ends) | |
276 | *p_token = JSON_TOKEN_ERROR; | |
277 | break; | |
278 | } | |
279 | *p_token = JSON_TOKEN_NUMBER; | |
280 | if (0 != p_token_str && 0 != p_str_len) { | |
281 | *p_token_str = buf_ptr + number_start_pos; | |
282 | *p_str_len = number_length; | |
283 | } | |
284 | break; | |
285 | } // if (number value) | |
286 | else if (check_for_literal("true")) { | |
287 | *p_token = JSON_TOKEN_LITERAL_TRUE; | |
288 | break; | |
289 | } | |
290 | else if (check_for_literal("false")) { | |
291 | *p_token = JSON_TOKEN_LITERAL_FALSE; | |
292 | break; | |
293 | } | |
294 | else if (check_for_literal("null")) { | |
295 | *p_token = JSON_TOKEN_LITERAL_NULL; | |
296 | break; | |
297 | } | |
298 | else { | |
299 | *p_token = JSON_TOKEN_ERROR; | |
300 | break; | |
301 | } | |
302 | } // switch (current char) | |
303 | } // if (skip_white_spaces()) | |
304 | ||
305 | return buf_pos - start_pos; | |
306 | } | |
307 | ||
308 | void JSON_Tokenizer::put_separator() | |
309 | { | |
310 | if (JSON_TOKEN_NAME != previous_token && JSON_TOKEN_NONE != previous_token && | |
311 | JSON_TOKEN_ARRAY_START != previous_token && JSON_TOKEN_OBJECT_START != previous_token) { | |
312 | put_c(','); | |
313 | if (pretty) { | |
314 | put_c('\n'); | |
315 | put_depth(); | |
316 | } | |
317 | } | |
318 | } | |
319 | ||
320 | int JSON_Tokenizer::put_next_token(json_token_t p_token, const char* p_token_str) | |
321 | { | |
322 | int start_len = buf_len; | |
323 | switch(p_token) { | |
324 | case JSON_TOKEN_OBJECT_START: | |
325 | case JSON_TOKEN_ARRAY_START: { | |
326 | put_separator(); | |
327 | put_c( (JSON_TOKEN_OBJECT_START == p_token) ? '{' : '[' ); | |
328 | if (pretty) { | |
329 | put_c('\n'); | |
330 | ++depth; | |
331 | put_depth(); | |
332 | } | |
333 | break; | |
334 | } | |
335 | case JSON_TOKEN_OBJECT_END: | |
336 | case JSON_TOKEN_ARRAY_END: { | |
337 | if (pretty) { | |
338 | if (JSON_TOKEN_OBJECT_START != previous_token && JSON_TOKEN_ARRAY_START != previous_token) { | |
339 | put_c('\n'); | |
340 | --depth; | |
341 | put_depth(); | |
342 | } else if (MAX_TABS >= depth) { | |
343 | // empty object or array -> remove the extra tab added at the start token | |
344 | --depth; | |
345 | --buf_len; | |
346 | buf_ptr[buf_len] = 0; | |
347 | } | |
348 | } | |
349 | put_c( (JSON_TOKEN_OBJECT_END == p_token) ? '}' : ']' ); | |
350 | break; | |
351 | } | |
352 | case JSON_TOKEN_NUMBER: | |
353 | case JSON_TOKEN_STRING: | |
354 | put_separator(); | |
355 | put_s(p_token_str); | |
356 | break; | |
357 | case JSON_TOKEN_LITERAL_TRUE: | |
358 | put_separator(); | |
359 | put_s("true"); | |
360 | break; | |
361 | case JSON_TOKEN_LITERAL_FALSE: | |
362 | put_separator(); | |
363 | put_s("false"); | |
364 | break; | |
365 | case JSON_TOKEN_LITERAL_NULL: | |
366 | put_separator(); | |
367 | put_s("null"); | |
368 | break; | |
369 | case JSON_TOKEN_NAME: | |
370 | put_separator(); | |
371 | put_c('\"'); | |
372 | put_s(p_token_str); | |
373 | if (pretty) { | |
374 | put_s("\" : "); | |
375 | } else { | |
376 | put_s("\":"); | |
377 | } | |
378 | break; | |
379 | default: | |
380 | return 0; | |
381 | } | |
382 | ||
383 | previous_token = p_token; | |
384 | return buf_len - start_len; | |
385 | } | |
386 | ||
3f84031e | 387 | void JSON_Tokenizer::put_raw_data(const char* p_data, size_t p_len) |
388 | { | |
389 | buf_ptr = mputstrn(buf_ptr, p_data, p_len); | |
390 | buf_len += p_len; | |
391 | } | |
3abe9331 | 392 | |
393 | char* convert_to_json_string(const char* str) | |
394 | { | |
395 | char* ret_val = mcopystrn("\"", 1); | |
396 | // control characters (like \n) cannot be placed in a JSON string, replace | |
397 | // them with JSON metacharacters | |
398 | // double quotes and backslashes need to be escaped, too | |
399 | size_t str_len = strlen(str); | |
400 | for (size_t i = 0; i < str_len; ++i) { | |
401 | switch (str[i]) { | |
402 | case '\n': | |
403 | ret_val = mputstrn(ret_val, "\\n", 2); | |
404 | break; | |
405 | case '\r': | |
406 | ret_val = mputstrn(ret_val, "\\r", 2); | |
407 | break; | |
408 | case '\t': | |
409 | ret_val = mputstrn(ret_val, "\\t", 2); | |
410 | break; | |
411 | case '\f': | |
412 | ret_val = mputstrn(ret_val, "\\f", 2); | |
413 | break; | |
414 | case '\b': | |
415 | ret_val = mputstrn(ret_val, "\\b", 2); | |
416 | break; | |
417 | case '\"': | |
418 | ret_val = mputstrn(ret_val, "\\\"", 2); | |
419 | break; | |
420 | case '\\': | |
421 | ret_val = mputstrn(ret_val, "\\\\", 2); | |
422 | break; | |
423 | default: | |
424 | if (str[i] < 32 && str[i] > 0) { | |
425 | // use the JSON \uHHHH notation for other control characters | |
426 | // (this is just for esthetic reasons, these wouldn't break the JSON | |
427 | // string format) | |
428 | ret_val = mputprintf(ret_val, "\\u00%d%c", str[i] / 16, | |
429 | (str[i] % 16 < 10) ? (str[i] % 16 + '0') : (str[i] % 16 - 10 + 'A')); | |
430 | } | |
431 | else { | |
432 | ret_val = mputc(ret_val, str[i]); | |
433 | } | |
434 | break; | |
435 | } | |
436 | } | |
437 | return mputstrn(ret_val, "\"", 1); | |
438 | } |