Titan Core Initial Contribution
[deliverable/titan.core.git] / common / JSON_Tokenizer.cc
1 #include <cstring>
2
3 #include "JSON_Tokenizer.hh"
4 #include "memory.h"
5 #include <cstdio>
6
7 static const char TABS[] =
8 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
9 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
10 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
11 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
12 const size_t MAX_TABS = sizeof(TABS) - 1; // 64
13
14 void JSON_Tokenizer::init(const char* p_buf, const size_t p_buf_len)
15 {
16 if (p_buf != 0 && p_buf_len != 0) {
17 buf_ptr = mcopystrn(p_buf, p_buf_len);
18 } else {
19 buf_ptr = 0;
20 }
21 buf_len = p_buf_len;
22 buf_pos = 0;
23 depth = 0;
24 previous_token = JSON_TOKEN_NONE;
25 }
26
27 JSON_Tokenizer::~JSON_Tokenizer()
28 {
29 Free(buf_ptr);
30 }
31
32 void JSON_Tokenizer::put_c(const char c)
33 {
34 buf_ptr = mputprintf(buf_ptr, "%c", c);
35 ++buf_len;
36 }
37
38 void JSON_Tokenizer::put_s(const char* s)
39 {
40 buf_ptr = mputstr(buf_ptr, s);
41 buf_len += strlen(s);
42 }
43
44 void JSON_Tokenizer::put_depth()
45 {
46 put_s(TABS + ((depth > MAX_TABS) ? 0 : MAX_TABS - depth));
47 }
48
49 bool JSON_Tokenizer::skip_white_spaces()
50 {
51 while(buf_pos < buf_len) {
52 switch(buf_ptr[buf_pos]) {
53 case ' ':
54 case '\r':
55 case '\n':
56 case '\t':
57 case '\f':
58 ++buf_pos;
59 break;
60 default:
61 return true;
62 }
63 }
64 return false;
65 }
66
67 bool JSON_Tokenizer::check_for_string()
68 {
69 if ('\"' == buf_ptr[buf_pos]) {
70 ++buf_pos;
71 } else {
72 return false;
73 }
74 while (buf_pos < buf_len) {
75 if ('\"' == buf_ptr[buf_pos]) {
76 return true;
77 }
78 else if ('\\' == buf_ptr[buf_pos]) {
79 // skip escaped character (so escaped quotes (\") are not mistaken for the ending quotes)
80 ++buf_pos;
81 }
82 ++buf_pos;
83 }
84 return false;
85 }
86
87 bool JSON_Tokenizer::check_for_number()
88 {
89 bool first_digit = false; // first non-zero digit reached
90 bool zero = false; // first zero digit reached
91 bool decimal_point = false; // decimal point (.) reached
92 bool exponent_mark = false; // exponential mark (e or E) reached
93 bool exponent_sign = false; // sign of the exponential (- or +) reached
94
95 if ('-' == buf_ptr[buf_pos]) {
96 ++buf_pos;
97 }
98
99 while (buf_pos < buf_len) {
100 switch(buf_ptr[buf_pos]) {
101 case '.':
102 if (decimal_point || exponent_mark || (!first_digit && !zero)) {
103 return false;
104 }
105 decimal_point = true;
106 first_digit = false;
107 zero = false;
108 break;
109 case 'e':
110 case 'E':
111 if (exponent_mark || (!first_digit && !zero)) {
112 return false;
113 }
114 exponent_mark = true;
115 first_digit = false;
116 zero = false;
117 break;
118 case '0':
119 if (!first_digit && (exponent_mark || (!decimal_point && zero))) {
120 return false;
121 }
122 zero = true;
123 break;
124 case '1':
125 case '2':
126 case '3':
127 case '4':
128 case '5':
129 case '6':
130 case '7':
131 case '8':
132 case '9':
133 if (!first_digit && zero && (!decimal_point || exponent_mark)) {
134 return false;
135 }
136 first_digit = true;
137 break;
138 case '-':
139 case '+':
140 if (exponent_sign || !exponent_mark || zero || first_digit) {
141 return false;
142 }
143 exponent_sign = true;
144 break;
145 default:
146 return first_digit || zero;
147 }
148
149 ++buf_pos;
150 }
151 return first_digit || zero;
152 }
153
154 bool JSON_Tokenizer::check_for_separator()
155 {
156 if (buf_pos < buf_len) {
157 switch(buf_ptr[buf_pos]) {
158 case ',':
159 ++buf_pos;
160 // no break
161 case ':':
162 case '{':
163 case '}':
164 case '[':
165 case ']':
166 return true;
167 default:
168 return false;
169 }
170 }
171 return true;
172 }
173
174 bool JSON_Tokenizer::check_for_literal(const char* p_literal)
175 {
176 size_t len = strlen(p_literal);
177 size_t start_pos = buf_pos;
178
179 if (buf_len - buf_pos >= len &&
180 0 == strncmp(buf_ptr + buf_pos, p_literal, len)) {
181 buf_pos += len;
182 if (!skip_white_spaces() || check_for_separator()) {
183 return true;
184 } else {
185 // must be followed by a separator (or only white spaces until the buffer ends) -> undo buffer action
186 buf_pos = start_pos;
187 }
188 }
189 return false;
190 }
191
192 int JSON_Tokenizer::get_next_token(json_token_t* p_token, char** p_token_str, size_t* p_str_len)
193 {
194 size_t start_pos = buf_pos;
195 *p_token = JSON_TOKEN_NONE;
196 if (0 != p_token_str && 0 != p_str_len) {
197 *p_token_str = 0;
198 *p_str_len = 0;
199 }
200
201 if (skip_white_spaces()) {
202 char c = buf_ptr[buf_pos];
203 switch (c) {
204 case '{':
205 case '[':
206 *p_token = ('{' == c) ? JSON_TOKEN_OBJECT_START : JSON_TOKEN_ARRAY_START;
207 ++buf_pos;
208 break;
209 case '}':
210 case ']':
211 ++buf_pos;
212 if (skip_white_spaces() && !check_for_separator()) {
213 // must be followed by a separator (or only white spaces until the buffer ends)
214 *p_token = JSON_TOKEN_ERROR;
215 } else {
216 *p_token = ('}' == c) ? JSON_TOKEN_OBJECT_END : JSON_TOKEN_ARRAY_END;
217 }
218 break;
219 case '\"': {
220 // string value or field name
221 size_t string_start_pos = buf_pos;
222 if(!check_for_string()) {
223 // invalid string value
224 *p_token = JSON_TOKEN_ERROR;
225 break;
226 }
227 size_t string_end_pos = ++buf_pos; // step over the string's ending quotes
228 if (skip_white_spaces() && ':' == buf_ptr[buf_pos]) {
229 // name token - don't include the starting and ending quotes
230 *p_token = JSON_TOKEN_NAME;
231 if (0 != p_token_str && 0 != p_str_len) {
232 *p_token_str = buf_ptr + string_start_pos + 1;
233 *p_str_len = string_end_pos - string_start_pos - 2;
234 }
235 ++buf_pos;
236 } else if (check_for_separator()) {
237 // value token - include the starting and ending quotes
238 *p_token = JSON_TOKEN_STRING;
239 if (0 != p_token_str && 0 != p_str_len) {
240 *p_token_str = buf_ptr + string_start_pos;
241 *p_str_len = string_end_pos - string_start_pos;
242 }
243 } else {
244 // value token, but there is no separator after it -> error
245 *p_token = JSON_TOKEN_ERROR;
246 break;
247 }
248 break;
249 } // case: string value or field name
250 default:
251 if (('0' <= buf_ptr[buf_pos] && '9' >= buf_ptr[buf_pos]) ||
252 '-' == buf_ptr[buf_pos]) {
253 // number value
254 size_t number_start_pos = buf_pos;
255 if (!check_for_number()) {
256 // invalid number
257 *p_token = JSON_TOKEN_ERROR;
258 break;
259 }
260 size_t number_length = buf_pos - number_start_pos;
261 if (skip_white_spaces() && !check_for_separator()) {
262 // must be followed by a separator (or only white spaces until the buffer ends)
263 *p_token = JSON_TOKEN_ERROR;
264 break;
265 }
266 *p_token = JSON_TOKEN_NUMBER;
267 if (0 != p_token_str && 0 != p_str_len) {
268 *p_token_str = buf_ptr + number_start_pos;
269 *p_str_len = number_length;
270 }
271 break;
272 } // if (number value)
273 else if (check_for_literal("true")) {
274 *p_token = JSON_TOKEN_LITERAL_TRUE;
275 break;
276 }
277 else if (check_for_literal("false")) {
278 *p_token = JSON_TOKEN_LITERAL_FALSE;
279 break;
280 }
281 else if (check_for_literal("null")) {
282 *p_token = JSON_TOKEN_LITERAL_NULL;
283 break;
284 }
285 else {
286 *p_token = JSON_TOKEN_ERROR;
287 break;
288 }
289 } // switch (current char)
290 } // if (skip_white_spaces())
291
292 return buf_pos - start_pos;
293 }
294
295 void JSON_Tokenizer::put_separator()
296 {
297 if (JSON_TOKEN_NAME != previous_token && JSON_TOKEN_NONE != previous_token &&
298 JSON_TOKEN_ARRAY_START != previous_token && JSON_TOKEN_OBJECT_START != previous_token) {
299 put_c(',');
300 if (pretty) {
301 put_c('\n');
302 put_depth();
303 }
304 }
305 }
306
307 int JSON_Tokenizer::put_next_token(json_token_t p_token, const char* p_token_str)
308 {
309 int start_len = buf_len;
310 switch(p_token) {
311 case JSON_TOKEN_OBJECT_START:
312 case JSON_TOKEN_ARRAY_START: {
313 put_separator();
314 put_c( (JSON_TOKEN_OBJECT_START == p_token) ? '{' : '[' );
315 if (pretty) {
316 put_c('\n');
317 ++depth;
318 put_depth();
319 }
320 break;
321 }
322 case JSON_TOKEN_OBJECT_END:
323 case JSON_TOKEN_ARRAY_END: {
324 if (pretty) {
325 if (JSON_TOKEN_OBJECT_START != previous_token && JSON_TOKEN_ARRAY_START != previous_token) {
326 put_c('\n');
327 --depth;
328 put_depth();
329 } else if (MAX_TABS >= depth) {
330 // empty object or array -> remove the extra tab added at the start token
331 --depth;
332 --buf_len;
333 buf_ptr[buf_len] = 0;
334 }
335 }
336 put_c( (JSON_TOKEN_OBJECT_END == p_token) ? '}' : ']' );
337 break;
338 }
339 case JSON_TOKEN_NUMBER:
340 case JSON_TOKEN_STRING:
341 put_separator();
342 put_s(p_token_str);
343 break;
344 case JSON_TOKEN_LITERAL_TRUE:
345 put_separator();
346 put_s("true");
347 break;
348 case JSON_TOKEN_LITERAL_FALSE:
349 put_separator();
350 put_s("false");
351 break;
352 case JSON_TOKEN_LITERAL_NULL:
353 put_separator();
354 put_s("null");
355 break;
356 case JSON_TOKEN_NAME:
357 put_separator();
358 put_c('\"');
359 put_s(p_token_str);
360 if (pretty) {
361 put_s("\" : ");
362 } else {
363 put_s("\":");
364 }
365 break;
366 default:
367 return 0;
368 }
369
370 previous_token = p_token;
371 return buf_len - start_len;
372 }
373
This page took 0.038811 seconds and 5 git commands to generate.