Merge "fixed self-ref check to no longer display errors for omit values (bug 498430)"
[deliverable/titan.core.git] / compiler2 / ustring.cc
1 /******************************************************************************
2 * Copyright (c) 2000-2016 Ericsson Telecom AB
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors:
9 * Balasko, Jeno
10 * Baranyi, Botond
11 * Kovacs, Ferenc
12 * Raduly, Csaba
13 * Szabados, Kristof
14 * Szabo, Bence Janos
15 * Szabo, Janos Zoltan – initial implementation
16 * Zalanyi, Balazs Andor
17 *
18 ******************************************************************************/
19 #include <stdio.h>
20 #include <string.h>
21
22 #include "../common/memory.h"
23 #include "../common/Quadruple.hh"
24 #include "error.h"
25
26 #include "string.hh"
27 #include "ustring.hh"
28 #include "PredefFunc.hh"
29
30 #include "Int.hh"
31
32 /** The amount of memory needed for an ustring containing n characters. */
33 #define MEMORY_SIZE(n) (sizeof(ustring_struct) + \
34 ((n) - 1) * sizeof(universal_char))
35
36 void ustring::init_struct(size_t n_uchars)
37 {
38 if (n_uchars == 0) {
39 /** This will represent the empty strings so they won't need allocated
40 * memory, this delays the memory allocation until it is really needed. */
41 static ustring_struct empty_string = { 1, 0, { { '\0', '\0', '\0', '\0' } } };
42 val_ptr = &empty_string;
43 empty_string.ref_count++;
44 } else {
45 val_ptr = (ustring_struct*)Malloc(MEMORY_SIZE(n_uchars));
46 val_ptr->ref_count = 1;
47 val_ptr->n_uchars = n_uchars;
48 }
49 }
50
51 void ustring::enlarge_memory(size_t incr)
52 {
53 if (incr > max_string_len - val_ptr->n_uchars)
54 FATAL_ERROR("ustring::enlarge_memory(size_t): length overflow");
55 size_t new_length = val_ptr->n_uchars + incr;
56 if (val_ptr->ref_count == 1) {
57 val_ptr = (ustring_struct*)Realloc(val_ptr, MEMORY_SIZE(new_length));
58 val_ptr->n_uchars = new_length;
59 } else {
60 ustring_struct *old_ptr = val_ptr;
61 old_ptr->ref_count--;
62 init_struct(new_length);
63 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr, old_ptr->n_uchars *
64 sizeof(universal_char));
65 }
66 }
67
68 void ustring::copy_value()
69 {
70 if (val_ptr->ref_count > 1) {
71 ustring_struct *old_ptr = val_ptr;
72 old_ptr->ref_count--;
73 init_struct(old_ptr->n_uchars);
74 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
75 old_ptr->n_uchars * sizeof(universal_char));
76 }
77 }
78
79 void ustring::clean_up()
80 {
81 if (val_ptr->ref_count > 1) val_ptr->ref_count--;
82 else if (val_ptr->ref_count == 1) Free(val_ptr);
83 else FATAL_ERROR("ustring::clean_up()");
84 }
85
86 int ustring::compare(const ustring& s) const
87 {
88 if (val_ptr == s.val_ptr) return 0;
89 for (size_t i = 0; ; i++) {
90 if (i == val_ptr->n_uchars) {
91 if (i == s.val_ptr->n_uchars) return 0;
92 else return -1;
93 } else if (i == s.val_ptr->n_uchars) return 1;
94 else if (val_ptr->uchars_ptr[i].group > s.val_ptr->uchars_ptr[i].group)
95 return 1;
96 else if (val_ptr->uchars_ptr[i].group < s.val_ptr->uchars_ptr[i].group)
97 return -1;
98 else if (val_ptr->uchars_ptr[i].plane > s.val_ptr->uchars_ptr[i].plane)
99 return 1;
100 else if (val_ptr->uchars_ptr[i].plane < s.val_ptr->uchars_ptr[i].plane)
101 return -1;
102 else if (val_ptr->uchars_ptr[i].row > s.val_ptr->uchars_ptr[i].row)
103 return 1;
104 else if (val_ptr->uchars_ptr[i].row < s.val_ptr->uchars_ptr[i].row)
105 return -1;
106 else if (val_ptr->uchars_ptr[i].cell > s.val_ptr->uchars_ptr[i].cell)
107 return 1;
108 else if (val_ptr->uchars_ptr[i].cell < s.val_ptr->uchars_ptr[i].cell)
109 return -1;
110 }
111 return 0; // should never get here
112 }
113
114 ustring::ustring(unsigned char p_group, unsigned char p_plane,
115 unsigned char p_row, unsigned char p_cell)
116 {
117 init_struct(1);
118 val_ptr->uchars_ptr[0].group = p_group;
119 val_ptr->uchars_ptr[0].plane = p_plane;
120 val_ptr->uchars_ptr[0].row = p_row;
121 val_ptr->uchars_ptr[0].cell = p_cell;
122 }
123
124 ustring::ustring(size_t n, const universal_char *uc_ptr)
125 {
126 // Check for UTF8 encoding and decode it
127 // incase the editor encoded the TTCN-3 file with UTF-8
128 string octet_str;
129 bool isUTF8 = true;
130 for (size_t i = 0; i < n; ++i) {
131 if (uc_ptr[i].group != 0 || uc_ptr[i].plane != 0 || uc_ptr[i].row != 0) {
132 // Not UTF8
133 isUTF8 = false;
134 break;
135 }
136 octet_str += Common::hexdigit_to_char(uc_ptr[i].cell / 16);
137 octet_str += Common::hexdigit_to_char(uc_ptr[i].cell % 16);
138 }
139 if (isUTF8) {
140 string* ret = Common::get_stringencoding(octet_str);
141 if ("UTF-8" != *ret) {
142 isUTF8 = false;
143 }
144 delete ret;
145 }
146 if (isUTF8) {
147 ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
148 val_ptr = s.val_ptr;
149 val_ptr->ref_count++;
150 } else {
151 init_struct(n);
152 memcpy(val_ptr->uchars_ptr, uc_ptr, n * sizeof(universal_char));
153 }
154 }
155
156 ustring::ustring(const string& s)
157 {
158 // Check for UTF8 encoding and decode it
159 // incase the editor encoded the TTCN-3 file with UTF-8
160 string octet_str;
161 bool isUTF8 = true;
162 size_t len = s.size();
163 for (size_t i = 0; i < len; ++i) {
164 octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) / 16);
165 octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) % 16);
166 }
167 if (isUTF8) {
168 string* ret = Common::get_stringencoding(octet_str);
169 if ("UTF-8" != *ret) {
170 isUTF8 = false;
171 }
172 delete ret;
173 }
174 if (isUTF8) {
175 ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
176 val_ptr = s.val_ptr;
177 val_ptr->ref_count++;
178 } else {
179 init_struct(s.size());
180 const char *src = s.c_str();
181 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
182 val_ptr->uchars_ptr[i].group = 0;
183 val_ptr->uchars_ptr[i].plane = 0;
184 val_ptr->uchars_ptr[i].row = 0;
185 val_ptr->uchars_ptr[i].cell = src[i];
186 }
187 }
188 }
189
190 ustring::ustring(const char** uid, const int n) {
191 //Init the size for characters
192 init_struct(n);
193 for (size_t i = 0; i < val_ptr->n_uchars; ++i) {
194 const char * uidchar = uid[i];
195 size_t offset = 1; //Always starts with u or U
196 offset = uidchar[1] == '+' ? offset + 1 : offset; //Optional '+'
197 string chunk = string(uidchar + offset);
198 //Convert hex to int and get value
199 Common::int_val_t * val = Common::hex2int(chunk);
200 Common::Int int_val = val->get_val();
201
202 //Fill in the quadruple
203 val_ptr->uchars_ptr[i].group = (int_val >> 24) & 0xFF;
204 val_ptr->uchars_ptr[i].plane = (int_val >> 16) & 0xFF;
205 val_ptr->uchars_ptr[i].row = (int_val >> 8) & 0xFF;
206 val_ptr->uchars_ptr[i].cell = int_val & 0xFF;
207
208 //Free pointer
209 Free(val);
210 }
211 }
212
213 void ustring::clear()
214 {
215 if (val_ptr->n_uchars > 0) {
216 clean_up();
217 init_struct(0);
218 }
219 }
220
221 ustring ustring::substr(size_t pos, size_t n) const
222 {
223 if (pos > val_ptr->n_uchars)
224 FATAL_ERROR("ustring::substr(size_t, size_t): position is outside of string");
225 if (pos == 0 && n >= val_ptr->n_uchars) return *this;
226 if (n > val_ptr->n_uchars - pos) n = val_ptr->n_uchars - pos;
227 return ustring(n, val_ptr->uchars_ptr + pos);
228 }
229
230 void ustring::replace(size_t pos, size_t n, const ustring& s)
231 {
232 if (pos > val_ptr->n_uchars)
233 FATAL_ERROR("ustring::replace(): start position is outside the string");
234 if (pos + n > val_ptr->n_uchars)
235 FATAL_ERROR("ustring::replace(): end position is outside the string");
236 size_t s_len = s.size();
237 /* The replacement string is greater than the maximum string length. The
238 replaced characters are taken into account. */
239 if (s_len > max_string_len - val_ptr->n_uchars + n)
240 FATAL_ERROR("ustring::replace(): length overflow");
241 size_t new_size = val_ptr->n_uchars - n + s_len;
242 if (new_size == 0) {
243 clean_up();
244 init_struct(0);
245 } else {
246 ustring_struct *old_ptr = val_ptr;
247 old_ptr->ref_count--;
248 init_struct(new_size);
249 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
250 pos * sizeof(universal_char));
251 memcpy(val_ptr->uchars_ptr + pos, s.u_str(),
252 s_len * sizeof(universal_char));
253 memcpy(val_ptr->uchars_ptr + pos + s_len, old_ptr->uchars_ptr + pos + n,
254 (old_ptr->n_uchars - pos - n) * sizeof(universal_char));
255 if (old_ptr->ref_count == 0) Free(old_ptr);
256 }
257 }
258
259 string ustring::get_stringRepr() const
260 {
261 string ret_val;
262 enum { INIT, PCHAR, UCHAR } state = INIT;
263 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
264 const universal_char& uchar = val_ptr->uchars_ptr[i];
265 if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
266 string::is_printable(uchar.cell)) {
267 // the actual character is printable
268 switch (state) {
269 case UCHAR: // concatenation sign if previous part was not printable
270 ret_val += " & ";
271 // no break
272 case INIT: // opening "
273 ret_val += '"';
274 // no break
275 case PCHAR: // the character itself
276 ret_val.append_stringRepr(uchar.cell);
277 break;
278 }
279 state = PCHAR;
280 } else {
281 // the actual character is not printable
282 switch (state) {
283 case PCHAR: // closing " if previous part was printable
284 ret_val += '"';
285 // no break
286 case UCHAR: // concatenation sign
287 ret_val += " & ";
288 // no break
289 case INIT: // the character itself in quadruple notation
290 ret_val += "char(";
291 ret_val += Common::Int2string(uchar.group);
292 ret_val += ", ";
293 ret_val += Common::Int2string(uchar.plane);
294 ret_val += ", ";
295 ret_val += Common::Int2string(uchar.row);
296 ret_val += ", ";
297 ret_val += Common::Int2string(uchar.cell);
298 ret_val += ')';
299 break;
300 }
301 state = UCHAR;
302 }
303 }
304 // final steps
305 switch (state) {
306 case INIT: // the string was empty
307 ret_val += "\"\"";
308 break;
309 case PCHAR: // last character was printable -> closing "
310 ret_val += '"';
311 break;
312 default:
313 break;
314 }
315 return ret_val;
316 }
317
318 string ustring::get_stringRepr_for_pattern() const {
319 string ret_val; // empty string
320 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
321 const universal_char& uchar = val_ptr->uchars_ptr[i];
322 if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
323 string::is_printable(uchar.cell)) {
324 ret_val.append_stringRepr(uchar.cell);
325 } else {
326 ret_val += "\\q{";
327 ret_val += Common::Int2string(uchar.group);
328 ret_val += ",";
329 ret_val += Common::Int2string(uchar.plane);
330 ret_val += ",";
331 ret_val += Common::Int2string(uchar.row);
332 ret_val += ",";
333 ret_val += Common::Int2string(uchar.cell);
334 ret_val += "}";
335 }
336 }
337 return ret_val;
338 }
339
340 char* ustring::convert_to_regexp_form() const {
341 char* res = (char*)Malloc(val_ptr->n_uchars * 8 + 1);
342 char* ptr = res;
343 res[val_ptr->n_uchars * 8] = '\0';
344 Quad q;
345 for (size_t i = 0; i < val_ptr->n_uchars; i++, ptr += 8) {
346 const universal_char& uchar = val_ptr->uchars_ptr[i];
347 q.set(uchar.group, uchar.plane, uchar.row, uchar.cell);
348 Quad::get_hexrepr(q, ptr);
349 }
350 return res;
351 }
352
353 ustring& ustring::operator=(const ustring& s)
354 {
355 if(&s != this) {
356 clean_up();
357 val_ptr = s.val_ptr;
358 val_ptr->ref_count++;
359 }
360 return *this;
361 }
362
363 ustring::universal_char& ustring::operator[](size_t n)
364 {
365 if (n >= val_ptr->n_uchars)
366 FATAL_ERROR("ustring::operator[](size_t): position is outside the string");
367 copy_value();
368 return val_ptr->uchars_ptr[n];
369 }
370
371 const ustring::universal_char& ustring::operator[](size_t n) const
372 {
373 if (n >= val_ptr->n_uchars)
374 FATAL_ERROR("ustring::operator[](size_t) const: position is outside the string");
375 return val_ptr->uchars_ptr[n];
376 }
377
378 ustring ustring::operator+(const string& s2) const
379 {
380 size_t s2_size = s2.size();
381 if (s2_size > max_string_len - val_ptr->n_uchars)
382 FATAL_ERROR("ustring::operator+(const string&): length overflow");
383 if (s2_size > 0) {
384 ustring s(val_ptr->n_uchars + s2_size);
385 memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
386 sizeof(universal_char));
387 const char *src = s2.c_str();
388 for (size_t i = 0; i < s2_size; i++) {
389 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].group = 0;
390 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].plane = 0;
391 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].row = 0;
392 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].cell = src[i];
393 }
394 return s;
395 } else return *this;
396 }
397
398 ustring ustring::operator+(const ustring& s2) const
399 {
400 if (s2.val_ptr->n_uchars > max_string_len - val_ptr->n_uchars)
401 FATAL_ERROR("ustring::operator+(const ustring&): length overflow");
402 if (val_ptr->n_uchars == 0) return s2;
403 else if (s2.val_ptr->n_uchars == 0) return *this;
404 else {
405 ustring s(val_ptr->n_uchars + s2.val_ptr->n_uchars);
406 memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
407 sizeof(universal_char));
408 memcpy(s.val_ptr->uchars_ptr + val_ptr->n_uchars,
409 s2.val_ptr->uchars_ptr, s2.val_ptr->n_uchars * sizeof(universal_char));
410 return s;
411 }
412 }
413
414 ustring& ustring::operator+=(const string& s)
415 {
416 size_t s_size = s.size();
417 if (s_size > 0) {
418 size_t old_size = val_ptr->n_uchars;
419 enlarge_memory(s_size);
420 const char *src = s.c_str();
421 for (size_t i = 0; i < s_size; i++) {
422 val_ptr->uchars_ptr[old_size + i].group = 0;
423 val_ptr->uchars_ptr[old_size + i].plane = 0;
424 val_ptr->uchars_ptr[old_size + i].row = 0;
425 val_ptr->uchars_ptr[old_size + i].cell = src[i];
426 }
427 }
428 return *this;
429 }
430
431 ustring& ustring::operator+=(const ustring& s)
432 {
433 if (s.val_ptr->n_uchars > 0) {
434 if (val_ptr->n_uchars > 0) {
435 size_t old_size = val_ptr->n_uchars, s_size = s.val_ptr->n_uchars;
436 enlarge_memory(s_size);
437 memcpy(val_ptr->uchars_ptr + old_size, s.val_ptr->uchars_ptr,
438 s_size * sizeof(universal_char));
439 } else {
440 clean_up();
441 val_ptr = s.val_ptr;
442 val_ptr->ref_count++;
443 }
444 }
445 return *this;
446 }
447
448 bool ustring::operator==(const ustring& s2) const
449 {
450 if (val_ptr == s2.val_ptr) return true;
451 else if (val_ptr->n_uchars != s2.val_ptr->n_uchars) return false;
452 else return !memcmp(val_ptr->uchars_ptr, s2.val_ptr->uchars_ptr,
453 val_ptr->n_uchars * sizeof(universal_char));
454 }
455
456 bool operator==(const ustring::universal_char& uc1,
457 const ustring::universal_char& uc2)
458 {
459 return uc1.group == uc2.group && uc1.plane == uc2.plane &&
460 uc1.row == uc2.row && uc1.cell == uc2.cell;
461 }
462
463 bool operator<(const ustring::universal_char& uc1,
464 const ustring::universal_char& uc2)
465 {
466 if (uc1.group < uc2.group) return true;
467 else if (uc1.group > uc2.group) return false;
468 else if (uc1.plane < uc2.plane) return true;
469 else if (uc1.plane > uc2.plane) return false;
470 else if (uc1.row < uc2.row) return true;
471 else if (uc1.row > uc2.row) return false;
472 else return uc1.cell < uc2.cell;
473 }
474
475 string ustring_to_uft8(const ustring& ustr)
476 {
477 string ret_val;
478 for(size_t i = 0; i < ustr.size(); i++) {
479 unsigned char g = ustr[i].group;
480 unsigned char p = ustr[i].plane;
481 unsigned char r = ustr[i].row;
482 unsigned char c = ustr[i].cell;
483 if(g == 0x00 && p <= 0x1F) {
484 if(p == 0x00) {
485 if(r == 0x00 && c <= 0x7F) {
486 // 1 octet
487 ret_val += c;
488 } // r
489 // 2 octets
490 else if(r <= 0x07) {
491 ret_val += (0xC0 | r << 2 | c >> 6);
492 ret_val += (0x80 | (c & 0x3F));
493 } // r
494 // 3 octets
495 else {
496 ret_val += (0xE0 | r >> 4);
497 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
498 ret_val += (0x80 | (c & 0x3F));
499 } // r
500 } // p
501 // 4 octets
502 else {
503 ret_val += (0xF0 | p >> 2);
504 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
505 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
506 ret_val += (0x80 | (c & 0x3F));
507 } // p
508 } //g
509 // 5 octets
510 else if(g <= 0x03) {
511 ret_val += (0xF8 | g);
512 ret_val += (0x80 | p >> 2);
513 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
514 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
515 ret_val += (0x80 | (c & 0x3F));
516 } // g
517 // 6 octets
518 else {
519 ret_val += (0xFC | g >> 6);
520 ret_val += (0x80 | (g & 0x3F));
521 ret_val += (0x80 | p >> 2);
522 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
523 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
524 ret_val += (0x80 | (c & 0x3F));
525 }
526 } // for i
527 return ret_val;
528 }
This page took 0.042227 seconds and 5 git commands to generate.