Merge pull request #29 from BotondBaranyi/master
[deliverable/titan.core.git] / compiler2 / ustring.cc
CommitLineData
d44e3c4f 1/******************************************************************************
2 * Copyright (c) 2000-2016 Ericsson Telecom AB
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors:
9 * Balasko, Jeno
10 * Baranyi, Botond
11 * Kovacs, Ferenc
12 * Raduly, Csaba
13 * Szabados, Kristof
14 * Szabo, Janos Zoltan – initial implementation
15 * Zalanyi, Balazs Andor
16 *
17 ******************************************************************************/
970ed795
EL
18#include <stdio.h>
19#include <string.h>
20
21#include "../common/memory.h"
22#include "../common/Quadruple.hh"
23#include "error.h"
24
25#include "string.hh"
26#include "ustring.hh"
27#include "PredefFunc.hh"
28
29#include "Int.hh"
30
31/** The amount of memory needed for an ustring containing n characters. */
32#define MEMORY_SIZE(n) (sizeof(ustring_struct) + \
33 ((n) - 1) * sizeof(universal_char))
34
35void ustring::init_struct(size_t n_uchars)
36{
37 if (n_uchars == 0) {
38 /** This will represent the empty strings so they won't need allocated
39 * memory, this delays the memory allocation until it is really needed. */
40 static ustring_struct empty_string = { 1, 0, { { '\0', '\0', '\0', '\0' } } };
41 val_ptr = &empty_string;
42 empty_string.ref_count++;
43 } else {
44 val_ptr = (ustring_struct*)Malloc(MEMORY_SIZE(n_uchars));
45 val_ptr->ref_count = 1;
46 val_ptr->n_uchars = n_uchars;
47 }
48}
49
50void ustring::enlarge_memory(size_t incr)
51{
52 if (incr > max_string_len - val_ptr->n_uchars)
53 FATAL_ERROR("ustring::enlarge_memory(size_t): length overflow");
54 size_t new_length = val_ptr->n_uchars + incr;
55 if (val_ptr->ref_count == 1) {
56 val_ptr = (ustring_struct*)Realloc(val_ptr, MEMORY_SIZE(new_length));
57 val_ptr->n_uchars = new_length;
58 } else {
59 ustring_struct *old_ptr = val_ptr;
60 old_ptr->ref_count--;
61 init_struct(new_length);
62 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr, old_ptr->n_uchars *
63 sizeof(universal_char));
64 }
65}
66
67void ustring::copy_value()
68{
69 if (val_ptr->ref_count > 1) {
70 ustring_struct *old_ptr = val_ptr;
71 old_ptr->ref_count--;
72 init_struct(old_ptr->n_uchars);
73 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
74 old_ptr->n_uchars * sizeof(universal_char));
75 }
76}
77
78void ustring::clean_up()
79{
80 if (val_ptr->ref_count > 1) val_ptr->ref_count--;
81 else if (val_ptr->ref_count == 1) Free(val_ptr);
82 else FATAL_ERROR("ustring::clean_up()");
83}
84
85int ustring::compare(const ustring& s) const
86{
87 if (val_ptr == s.val_ptr) return 0;
88 for (size_t i = 0; ; i++) {
89 if (i == val_ptr->n_uchars) {
90 if (i == s.val_ptr->n_uchars) return 0;
91 else return -1;
92 } else if (i == s.val_ptr->n_uchars) return 1;
93 else if (val_ptr->uchars_ptr[i].group > s.val_ptr->uchars_ptr[i].group)
94 return 1;
95 else if (val_ptr->uchars_ptr[i].group < s.val_ptr->uchars_ptr[i].group)
96 return -1;
97 else if (val_ptr->uchars_ptr[i].plane > s.val_ptr->uchars_ptr[i].plane)
98 return 1;
99 else if (val_ptr->uchars_ptr[i].plane < s.val_ptr->uchars_ptr[i].plane)
100 return -1;
101 else if (val_ptr->uchars_ptr[i].row > s.val_ptr->uchars_ptr[i].row)
102 return 1;
103 else if (val_ptr->uchars_ptr[i].row < s.val_ptr->uchars_ptr[i].row)
104 return -1;
105 else if (val_ptr->uchars_ptr[i].cell > s.val_ptr->uchars_ptr[i].cell)
106 return 1;
107 else if (val_ptr->uchars_ptr[i].cell < s.val_ptr->uchars_ptr[i].cell)
108 return -1;
109 }
110 return 0; // should never get here
111}
112
113ustring::ustring(unsigned char p_group, unsigned char p_plane,
114 unsigned char p_row, unsigned char p_cell)
115{
116 init_struct(1);
117 val_ptr->uchars_ptr[0].group = p_group;
118 val_ptr->uchars_ptr[0].plane = p_plane;
119 val_ptr->uchars_ptr[0].row = p_row;
120 val_ptr->uchars_ptr[0].cell = p_cell;
121}
122
123ustring::ustring(size_t n, const universal_char *uc_ptr)
124{
125 // Check for UTF8 encoding and decode it
126 // incase the editor encoded the TTCN-3 file with UTF-8
127 string octet_str;
128 bool isUTF8 = true;
129 for (size_t i = 0; i < n; ++i) {
130 if (uc_ptr[i].group != 0 || uc_ptr[i].plane != 0 || uc_ptr[i].row != 0) {
131 // Not UTF8
132 isUTF8 = false;
133 break;
134 }
135 octet_str += Common::hexdigit_to_char(uc_ptr[i].cell / 16);
136 octet_str += Common::hexdigit_to_char(uc_ptr[i].cell % 16);
137 }
138 if (isUTF8) {
139 string* ret = Common::get_stringencoding(octet_str);
140 if ("UTF-8" != *ret) {
141 isUTF8 = false;
142 }
143 delete ret;
144 }
145 if (isUTF8) {
146 ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
147 val_ptr = s.val_ptr;
148 val_ptr->ref_count++;
149 } else {
150 init_struct(n);
151 memcpy(val_ptr->uchars_ptr, uc_ptr, n * sizeof(universal_char));
152 }
153}
154
155ustring::ustring(const string& s)
156{
157 // Check for UTF8 encoding and decode it
158 // incase the editor encoded the TTCN-3 file with UTF-8
159 string octet_str;
160 bool isUTF8 = true;
161 size_t len = s.size();
162 for (size_t i = 0; i < len; ++i) {
163 octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) / 16);
164 octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) % 16);
165 }
166 if (isUTF8) {
167 string* ret = Common::get_stringencoding(octet_str);
168 if ("UTF-8" != *ret) {
169 isUTF8 = false;
170 }
171 delete ret;
172 }
173 if (isUTF8) {
174 ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
175 val_ptr = s.val_ptr;
176 val_ptr->ref_count++;
177 } else {
178 init_struct(s.size());
179 const char *src = s.c_str();
180 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
181 val_ptr->uchars_ptr[i].group = 0;
182 val_ptr->uchars_ptr[i].plane = 0;
183 val_ptr->uchars_ptr[i].row = 0;
184 val_ptr->uchars_ptr[i].cell = src[i];
185 }
186 }
187}
188
189void ustring::clear()
190{
191 if (val_ptr->n_uchars > 0) {
192 clean_up();
193 init_struct(0);
194 }
195}
196
197ustring ustring::substr(size_t pos, size_t n) const
198{
199 if (pos > val_ptr->n_uchars)
200 FATAL_ERROR("ustring::substr(size_t, size_t): position is outside of string");
201 if (pos == 0 && n >= val_ptr->n_uchars) return *this;
202 if (n > val_ptr->n_uchars - pos) n = val_ptr->n_uchars - pos;
203 return ustring(n, val_ptr->uchars_ptr + pos);
204}
205
206void ustring::replace(size_t pos, size_t n, const ustring& s)
207{
208 if (pos > val_ptr->n_uchars)
209 FATAL_ERROR("ustring::replace(): start position is outside the string");
210 if (pos + n > val_ptr->n_uchars)
211 FATAL_ERROR("ustring::replace(): end position is outside the string");
212 size_t s_len = s.size();
213 /* The replacement string is greater than the maximum string length. The
214 replaced characters are taken into account. */
215 if (s_len > max_string_len - val_ptr->n_uchars + n)
216 FATAL_ERROR("ustring::replace(): length overflow");
217 size_t new_size = val_ptr->n_uchars - n + s_len;
218 if (new_size == 0) {
219 clean_up();
220 init_struct(0);
221 } else {
222 ustring_struct *old_ptr = val_ptr;
223 old_ptr->ref_count--;
224 init_struct(new_size);
225 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
226 pos * sizeof(universal_char));
227 memcpy(val_ptr->uchars_ptr + pos, s.u_str(),
228 s_len * sizeof(universal_char));
229 memcpy(val_ptr->uchars_ptr + pos + s_len, old_ptr->uchars_ptr + pos + n,
230 (old_ptr->n_uchars - pos - n) * sizeof(universal_char));
231 if (old_ptr->ref_count == 0) Free(old_ptr);
232 }
233}
234
235string ustring::get_stringRepr() const
236{
237 string ret_val;
238 enum { INIT, PCHAR, UCHAR } state = INIT;
239 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
240 const universal_char& uchar = val_ptr->uchars_ptr[i];
241 if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
242 string::is_printable(uchar.cell)) {
243 // the actual character is printable
244 switch (state) {
245 case UCHAR: // concatenation sign if previous part was not printable
246 ret_val += " & ";
247 // no break
248 case INIT: // opening "
249 ret_val += '"';
250 // no break
251 case PCHAR: // the character itself
252 ret_val.append_stringRepr(uchar.cell);
253 break;
254 }
255 state = PCHAR;
256 } else {
257 // the actual character is not printable
258 switch (state) {
259 case PCHAR: // closing " if previous part was printable
260 ret_val += '"';
261 // no break
262 case UCHAR: // concatenation sign
263 ret_val += " & ";
264 // no break
265 case INIT: // the character itself in quadruple notation
266 ret_val += "char(";
267 ret_val += Common::Int2string(uchar.group);
268 ret_val += ", ";
269 ret_val += Common::Int2string(uchar.plane);
270 ret_val += ", ";
271 ret_val += Common::Int2string(uchar.row);
272 ret_val += ", ";
273 ret_val += Common::Int2string(uchar.cell);
274 ret_val += ')';
275 break;
276 }
277 state = UCHAR;
278 }
279 }
280 // final steps
281 switch (state) {
282 case INIT: // the string was empty
283 ret_val += "\"\"";
284 break;
285 case PCHAR: // last character was printable -> closing "
286 ret_val += '"';
287 break;
288 default:
289 break;
290 }
291 return ret_val;
292}
293
294string ustring::get_stringRepr_for_pattern() const {
295 string ret_val; // empty string
296 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
297 const universal_char& uchar = val_ptr->uchars_ptr[i];
298 if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
299 string::is_printable(uchar.cell)) {
300 ret_val.append_stringRepr(uchar.cell);
301 } else {
302 ret_val += "\\q{";
303 ret_val += Common::Int2string(uchar.group);
304 ret_val += ",";
305 ret_val += Common::Int2string(uchar.plane);
306 ret_val += ",";
307 ret_val += Common::Int2string(uchar.row);
308 ret_val += ",";
309 ret_val += Common::Int2string(uchar.cell);
310 ret_val += "}";
311 }
312 }
313 return ret_val;
314}
315
316char* ustring::convert_to_regexp_form() const {
317 char* res = (char*)Malloc(val_ptr->n_uchars * 8 + 1);
318 char* ptr = res;
319 res[val_ptr->n_uchars * 8] = '\0';
320 Quad q;
321 for (size_t i = 0; i < val_ptr->n_uchars; i++, ptr += 8) {
322 const universal_char& uchar = val_ptr->uchars_ptr[i];
323 q.set(uchar.group, uchar.plane, uchar.row, uchar.cell);
324 Quad::get_hexrepr(q, ptr);
325 }
326 return res;
327}
328
329ustring& ustring::operator=(const ustring& s)
330{
331 if(&s != this) {
332 clean_up();
333 val_ptr = s.val_ptr;
334 val_ptr->ref_count++;
335 }
336 return *this;
337}
338
339ustring::universal_char& ustring::operator[](size_t n)
340{
341 if (n >= val_ptr->n_uchars)
342 FATAL_ERROR("ustring::operator[](size_t): position is outside the string");
343 copy_value();
344 return val_ptr->uchars_ptr[n];
345}
346
347const ustring::universal_char& ustring::operator[](size_t n) const
348{
349 if (n >= val_ptr->n_uchars)
350 FATAL_ERROR("ustring::operator[](size_t) const: position is outside the string");
351 return val_ptr->uchars_ptr[n];
352}
353
354ustring ustring::operator+(const string& s2) const
355{
356 size_t s2_size = s2.size();
357 if (s2_size > max_string_len - val_ptr->n_uchars)
358 FATAL_ERROR("ustring::operator+(const string&): length overflow");
359 if (s2_size > 0) {
360 ustring s(val_ptr->n_uchars + s2_size);
361 memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
362 sizeof(universal_char));
363 const char *src = s2.c_str();
364 for (size_t i = 0; i < s2_size; i++) {
365 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].group = 0;
366 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].plane = 0;
367 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].row = 0;
368 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].cell = src[i];
369 }
370 return s;
371 } else return *this;
372}
373
374ustring ustring::operator+(const ustring& s2) const
375{
376 if (s2.val_ptr->n_uchars > max_string_len - val_ptr->n_uchars)
377 FATAL_ERROR("ustring::operator+(const ustring&): length overflow");
378 if (val_ptr->n_uchars == 0) return s2;
379 else if (s2.val_ptr->n_uchars == 0) return *this;
380 else {
381 ustring s(val_ptr->n_uchars + s2.val_ptr->n_uchars);
382 memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
383 sizeof(universal_char));
384 memcpy(s.val_ptr->uchars_ptr + val_ptr->n_uchars,
385 s2.val_ptr->uchars_ptr, s2.val_ptr->n_uchars * sizeof(universal_char));
386 return s;
387 }
388}
389
390ustring& ustring::operator+=(const string& s)
391{
392 size_t s_size = s.size();
393 if (s_size > 0) {
394 size_t old_size = val_ptr->n_uchars;
395 enlarge_memory(s_size);
396 const char *src = s.c_str();
397 for (size_t i = 0; i < s_size; i++) {
398 val_ptr->uchars_ptr[old_size + i].group = 0;
399 val_ptr->uchars_ptr[old_size + i].plane = 0;
400 val_ptr->uchars_ptr[old_size + i].row = 0;
401 val_ptr->uchars_ptr[old_size + i].cell = src[i];
402 }
403 }
404 return *this;
405}
406
407ustring& ustring::operator+=(const ustring& s)
408{
409 if (s.val_ptr->n_uchars > 0) {
410 if (val_ptr->n_uchars > 0) {
411 size_t old_size = val_ptr->n_uchars, s_size = s.val_ptr->n_uchars;
412 enlarge_memory(s_size);
413 memcpy(val_ptr->uchars_ptr + old_size, s.val_ptr->uchars_ptr,
414 s_size * sizeof(universal_char));
415 } else {
416 clean_up();
417 val_ptr = s.val_ptr;
418 val_ptr->ref_count++;
419 }
420 }
421 return *this;
422}
423
424bool ustring::operator==(const ustring& s2) const
425{
426 if (val_ptr == s2.val_ptr) return true;
427 else if (val_ptr->n_uchars != s2.val_ptr->n_uchars) return false;
428 else return !memcmp(val_ptr->uchars_ptr, s2.val_ptr->uchars_ptr,
429 val_ptr->n_uchars * sizeof(universal_char));
430}
431
432bool operator==(const ustring::universal_char& uc1,
433 const ustring::universal_char& uc2)
434{
435 return uc1.group == uc2.group && uc1.plane == uc2.plane &&
436 uc1.row == uc2.row && uc1.cell == uc2.cell;
437}
438
439bool operator<(const ustring::universal_char& uc1,
440 const ustring::universal_char& uc2)
441{
442 if (uc1.group < uc2.group) return true;
443 else if (uc1.group > uc2.group) return false;
444 else if (uc1.plane < uc2.plane) return true;
445 else if (uc1.plane > uc2.plane) return false;
446 else if (uc1.row < uc2.row) return true;
447 else if (uc1.row > uc2.row) return false;
448 else return uc1.cell < uc2.cell;
449}
3abe9331 450
451string ustring_to_uft8(const ustring& ustr)
452{
453 string ret_val;
454 for(size_t i = 0; i < ustr.size(); i++) {
455 unsigned char g = ustr[i].group;
456 unsigned char p = ustr[i].plane;
457 unsigned char r = ustr[i].row;
458 unsigned char c = ustr[i].cell;
459 if(g == 0x00 && p <= 0x1F) {
460 if(p == 0x00) {
461 if(r == 0x00 && c <= 0x7F) {
462 // 1 octet
463 ret_val += c;
464 } // r
465 // 2 octets
466 else if(r <= 0x07) {
467 ret_val += (0xC0 | r << 2 | c >> 6);
468 ret_val += (0x80 | (c & 0x3F));
469 } // r
470 // 3 octets
471 else {
472 ret_val += (0xE0 | r >> 4);
473 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
474 ret_val += (0x80 | (c & 0x3F));
475 } // r
476 } // p
477 // 4 octets
478 else {
479 ret_val += (0xF0 | p >> 2);
480 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
481 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
482 ret_val += (0x80 | (c & 0x3F));
483 } // p
484 } //g
485 // 5 octets
486 else if(g <= 0x03) {
487 ret_val += (0xF8 | g);
488 ret_val += (0x80 | p >> 2);
489 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
490 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
491 ret_val += (0x80 | (c & 0x3F));
492 } // g
493 // 6 octets
494 else {
495 ret_val += (0xFC | g >> 6);
496 ret_val += (0x80 | (g & 0x3F));
497 ret_val += (0x80 | p >> 2);
498 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
499 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
500 ret_val += (0x80 | (c & 0x3F));
501 }
502 } // for i
503 return ret_val;
504}
This page took 0.040657 seconds and 5 git commands to generate.