[deliverable/titan.core.git] / compiler2 / ustring.cc

/******************************************************************************
 * Copyright (c) 2000-2016 Ericsson Telecom AB
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *   Balasko, Jeno
 *   Baranyi, Botond
 *   Kovacs, Ferenc
 *   Raduly, Csaba
 *   Szabados, Kristof
 *   Szabo, Janos Zoltan – initial implementation
 *   Zalanyi, Balazs Andor
 *
 ******************************************************************************/
#include <stdio.h>
#include <string.h>

#include "../common/memory.h"
#include "../common/Quadruple.hh"
#include "error.h"

#include "string.hh"
#include "ustring.hh"
#include "PredefFunc.hh"

#include "Int.hh"

/** The amount of memory needed for an ustring containing n characters. */
#define MEMORY_SIZE(n) (sizeof(ustring_struct) + \
  ((n) - 1) * sizeof(universal_char))

void ustring::init_struct(size_t n_uchars)
{
  if (n_uchars == 0) {
    /** This will represent the empty strings so they won't need allocated
     * memory, this delays the memory allocation until it is really needed. */
    static ustring_struct empty_string = { 1, 0, { { '\0', '\0', '\0', '\0' } } };
    val_ptr = &empty_string;
    empty_string.ref_count++;
  } else {
    val_ptr = (ustring_struct*)Malloc(MEMORY_SIZE(n_uchars));
    val_ptr->ref_count = 1;
    val_ptr->n_uchars = n_uchars;
  }
}

void ustring::enlarge_memory(size_t incr)
{
  if (incr > max_string_len - val_ptr->n_uchars)
    FATAL_ERROR("ustring::enlarge_memory(size_t): length overflow");
  size_t new_length = val_ptr->n_uchars + incr;
  if (val_ptr->ref_count == 1) {
    val_ptr = (ustring_struct*)Realloc(val_ptr, MEMORY_SIZE(new_length));
    val_ptr->n_uchars = new_length;
  } else {
    ustring_struct *old_ptr = val_ptr;
    old_ptr->ref_count--;
    init_struct(new_length);
    memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr, old_ptr->n_uchars *
      sizeof(universal_char));
  }
}

void ustring::copy_value()
{
  if (val_ptr->ref_count > 1) {
    ustring_struct *old_ptr = val_ptr;
    old_ptr->ref_count--;
    init_struct(old_ptr->n_uchars);
    memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
      old_ptr->n_uchars * sizeof(universal_char));
  }
}

void ustring::clean_up()
{
  if (val_ptr->ref_count > 1) val_ptr->ref_count--;
  else if (val_ptr->ref_count == 1) Free(val_ptr);
  else FATAL_ERROR("ustring::clean_up()");
}

int ustring::compare(const ustring& s) const
{
  if (val_ptr == s.val_ptr) return 0;
  for (size_t i = 0; ; i++) {
    if (i == val_ptr->n_uchars) {
      if (i == s.val_ptr->n_uchars) return 0;
      else return -1;
    } else if (i == s.val_ptr->n_uchars) return 1;
    else if (val_ptr->uchars_ptr[i].group > s.val_ptr->uchars_ptr[i].group)
      return 1;
    else if (val_ptr->uchars_ptr[i].group < s.val_ptr->uchars_ptr[i].group)
      return -1;
    else if (val_ptr->uchars_ptr[i].plane > s.val_ptr->uchars_ptr[i].plane)
      return 1;
    else if (val_ptr->uchars_ptr[i].plane < s.val_ptr->uchars_ptr[i].plane)
      return -1;
    else if (val_ptr->uchars_ptr[i].row > s.val_ptr->uchars_ptr[i].row)
      return 1;
    else if (val_ptr->uchars_ptr[i].row < s.val_ptr->uchars_ptr[i].row)
      return -1;
    else if (val_ptr->uchars_ptr[i].cell > s.val_ptr->uchars_ptr[i].cell)
      return 1;
    else if (val_ptr->uchars_ptr[i].cell < s.val_ptr->uchars_ptr[i].cell)
      return -1;
  }
  return 0; // should never get here
}

ustring::ustring(unsigned char p_group, unsigned char p_plane,
  unsigned char p_row, unsigned char p_cell)
{
  init_struct(1);
  val_ptr->uchars_ptr[0].group = p_group;
  val_ptr->uchars_ptr[0].plane = p_plane;
  val_ptr->uchars_ptr[0].row = p_row;
  val_ptr->uchars_ptr[0].cell = p_cell;
}

ustring::ustring(size_t n, const universal_char *uc_ptr)
{
  // Check for UTF8 encoding and decode it
  // incase the editor encoded the TTCN-3 file with UTF-8
  string octet_str;
  bool isUTF8 = true;
  for (size_t i = 0; i < n; ++i) {
    if (uc_ptr[i].group != 0 || uc_ptr[i].plane != 0 || uc_ptr[i].row != 0) {
      // Not UTF8
      isUTF8 = false;
      break;
    }
    octet_str += Common::hexdigit_to_char(uc_ptr[i].cell / 16);
    octet_str += Common::hexdigit_to_char(uc_ptr[i].cell % 16);
  }
  if (isUTF8) {
    string* ret = Common::get_stringencoding(octet_str);
    if ("UTF-8" != *ret) {
      isUTF8 = false;
    }
    delete ret;
  }
  if (isUTF8) {
    ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
    val_ptr = s.val_ptr;
    val_ptr->ref_count++;
  } else {
    init_struct(n);
    memcpy(val_ptr->uchars_ptr, uc_ptr, n * sizeof(universal_char));
  }
}

ustring::ustring(const string& s)
{
  // Check for UTF8 encoding and decode it
  // incase the editor encoded the TTCN-3 file with UTF-8
  string octet_str;
  bool isUTF8 = true;
  size_t len = s.size();
  for (size_t i = 0; i < len; ++i) {
    octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) / 16);
    octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) % 16);
  }
  if (isUTF8) {
    string* ret = Common::get_stringencoding(octet_str);
    if ("UTF-8" != *ret) {
      isUTF8 = false;
    }
    delete ret;
  }
  if (isUTF8) {
    ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
    val_ptr = s.val_ptr;
    val_ptr->ref_count++;
  } else {
    init_struct(s.size());
    const char *src = s.c_str();
    for (size_t i = 0; i < val_ptr->n_uchars; i++) {
      val_ptr->uchars_ptr[i].group = 0;
      val_ptr->uchars_ptr[i].plane = 0;
      val_ptr->uchars_ptr[i].row = 0;
      val_ptr->uchars_ptr[i].cell = src[i];
    }
  }
}

void ustring::clear()
{
  if (val_ptr->n_uchars > 0) {
    clean_up();
    init_struct(0);
  }
}

ustring ustring::substr(size_t pos, size_t n) const
{
  if (pos > val_ptr->n_uchars)
    FATAL_ERROR("ustring::substr(size_t, size_t): position is outside of string");
  if (pos == 0 && n >= val_ptr->n_uchars) return *this;
  if (n > val_ptr->n_uchars - pos) n = val_ptr->n_uchars - pos;
  return ustring(n, val_ptr->uchars_ptr + pos);
}

void ustring::replace(size_t pos, size_t n, const ustring& s)
{
  if (pos > val_ptr->n_uchars)
    FATAL_ERROR("ustring::replace(): start position is outside the string");
  if (pos + n > val_ptr->n_uchars)
    FATAL_ERROR("ustring::replace(): end position is outside the string");
  size_t s_len = s.size();
  /* The replacement string is greater than the maximum string length.  The
     replaced characters are taken into account.  */
  if (s_len > max_string_len - val_ptr->n_uchars + n)
	FATAL_ERROR("ustring::replace(): length overflow");
  size_t new_size = val_ptr->n_uchars - n + s_len;
  if (new_size == 0) {
    clean_up();
    init_struct(0);
  } else {
    ustring_struct *old_ptr = val_ptr;
    old_ptr->ref_count--;
	init_struct(new_size);
	memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
		   pos * sizeof(universal_char));
    memcpy(val_ptr->uchars_ptr + pos, s.u_str(),
    	   s_len * sizeof(universal_char));
    memcpy(val_ptr->uchars_ptr + pos + s_len, old_ptr->uchars_ptr + pos + n,
	       (old_ptr->n_uchars - pos - n) * sizeof(universal_char));
	if (old_ptr->ref_count == 0) Free(old_ptr);
  }
}

string ustring::get_stringRepr() const
{
  string ret_val;
  enum { INIT, PCHAR, UCHAR } state = INIT;
  for (size_t i = 0; i < val_ptr->n_uchars; i++) {
    const universal_char& uchar = val_ptr->uchars_ptr[i];
    if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
	string::is_printable(uchar.cell)) {
      // the actual character is printable
      switch (state) {
      case UCHAR: // concatenation sign if previous part was not printable
	ret_val += " & ";
	// no break
      case INIT: // opening "
	ret_val += '"';
	// no break
      case PCHAR: // the character itself
	ret_val.append_stringRepr(uchar.cell);
	break;
      }
      state = PCHAR;
    } else {
      // the actual character is not printable
      switch (state) {
      case PCHAR: // closing " if previous part was printable
	ret_val += '"';
	// no break
      case UCHAR: // concatenation sign
	ret_val += " & ";
	// no break
      case INIT: // the character itself in quadruple notation
	ret_val += "char(";
	ret_val += Common::Int2string(uchar.group);
	ret_val += ", ";
	ret_val += Common::Int2string(uchar.plane);
	ret_val += ", ";
	ret_val += Common::Int2string(uchar.row);
	ret_val += ", ";
	ret_val += Common::Int2string(uchar.cell);
	ret_val += ')';
	break;
      }
      state = UCHAR;
    }
  }
  // final steps
  switch (state) {
  case INIT: // the string was empty
    ret_val += "\"\"";
    break;
  case PCHAR: // last character was printable -> closing "
    ret_val += '"';
    break;
  default:
    break;
  }
  return ret_val;
}

string ustring::get_stringRepr_for_pattern() const {
  string ret_val; // empty string
  for (size_t i = 0; i < val_ptr->n_uchars; i++) {
    const universal_char& uchar = val_ptr->uchars_ptr[i];
    if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
      string::is_printable(uchar.cell)) {
      ret_val.append_stringRepr(uchar.cell);
    } else {
      ret_val += "\\q{";
      ret_val += Common::Int2string(uchar.group);
      ret_val += ",";
      ret_val += Common::Int2string(uchar.plane);
      ret_val += ",";
      ret_val += Common::Int2string(uchar.row);
      ret_val += ",";
      ret_val += Common::Int2string(uchar.cell);
      ret_val += "}";
    }
  }
  return ret_val;
}

char* ustring::convert_to_regexp_form() const {
  char* res = (char*)Malloc(val_ptr->n_uchars * 8 + 1);
  char* ptr = res;
  res[val_ptr->n_uchars * 8] = '\0';
  Quad q;
  for (size_t i = 0; i < val_ptr->n_uchars; i++, ptr += 8) {
    const universal_char& uchar = val_ptr->uchars_ptr[i];
    q.set(uchar.group, uchar.plane, uchar.row, uchar.cell);
    Quad::get_hexrepr(q, ptr);
  }
  return res;
}

ustring& ustring::operator=(const ustring& s)
{
  if(&s != this) {
    clean_up();
    val_ptr = s.val_ptr;
    val_ptr->ref_count++;
  }
  return *this;
}

ustring::universal_char& ustring::operator[](size_t n)
{
  if (n >= val_ptr->n_uchars)
    FATAL_ERROR("ustring::operator[](size_t): position is outside the string");
  copy_value();
  return val_ptr->uchars_ptr[n];
}

const ustring::universal_char& ustring::operator[](size_t n) const
{
  if (n >= val_ptr->n_uchars)
    FATAL_ERROR("ustring::operator[](size_t) const: position is outside the string");
  return val_ptr->uchars_ptr[n];
}

ustring ustring::operator+(const string& s2) const
{
  size_t s2_size = s2.size();
  if (s2_size > max_string_len - val_ptr->n_uchars)
    FATAL_ERROR("ustring::operator+(const string&): length overflow");
  if (s2_size > 0) {
    ustring s(val_ptr->n_uchars + s2_size);
    memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
      sizeof(universal_char));
    const char *src = s2.c_str();
    for (size_t i = 0; i < s2_size; i++) {
      s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].group = 0;
      s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].plane = 0;
      s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].row = 0;
      s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].cell = src[i];
    }
    return s;
  } else return *this;
}

ustring ustring::operator+(const ustring& s2) const
{
  if (s2.val_ptr->n_uchars > max_string_len - val_ptr->n_uchars)
    FATAL_ERROR("ustring::operator+(const ustring&): length overflow");
  if (val_ptr->n_uchars == 0) return s2;
  else if (s2.val_ptr->n_uchars == 0) return *this;
  else {
    ustring s(val_ptr->n_uchars + s2.val_ptr->n_uchars);
    memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
      sizeof(universal_char));
    memcpy(s.val_ptr->uchars_ptr + val_ptr->n_uchars,
      s2.val_ptr->uchars_ptr, s2.val_ptr->n_uchars * sizeof(universal_char)); 
    return s;
  }
}

ustring& ustring::operator+=(const string& s)
{
  size_t s_size = s.size();
  if (s_size > 0) {
    size_t old_size = val_ptr->n_uchars;
    enlarge_memory(s_size);
    const char *src = s.c_str();
    for (size_t i = 0; i < s_size; i++) {
      val_ptr->uchars_ptr[old_size + i].group = 0;
      val_ptr->uchars_ptr[old_size + i].plane = 0;
      val_ptr->uchars_ptr[old_size + i].row = 0;
      val_ptr->uchars_ptr[old_size + i].cell = src[i];
    }
  }
  return *this;
}

ustring& ustring::operator+=(const ustring& s)
{
  if (s.val_ptr->n_uchars > 0) {
    if (val_ptr->n_uchars > 0) {
      size_t old_size = val_ptr->n_uchars, s_size = s.val_ptr->n_uchars;
      enlarge_memory(s_size);
      memcpy(val_ptr->uchars_ptr + old_size, s.val_ptr->uchars_ptr,
	s_size * sizeof(universal_char));
    } else {
      clean_up();
      val_ptr = s.val_ptr;
      val_ptr->ref_count++;
    }
  }
  return *this;
}

bool ustring::operator==(const ustring& s2) const
{
  if (val_ptr == s2.val_ptr) return true;
  else if (val_ptr->n_uchars != s2.val_ptr->n_uchars) return false;
  else return !memcmp(val_ptr->uchars_ptr, s2.val_ptr->uchars_ptr,
    val_ptr->n_uchars * sizeof(universal_char));
}

bool operator==(const ustring::universal_char& uc1,
  const ustring::universal_char& uc2)
{
  return uc1.group == uc2.group && uc1.plane == uc2.plane &&
    uc1.row == uc2.row && uc1.cell == uc2.cell;
}

bool operator<(const ustring::universal_char& uc1,
  const ustring::universal_char& uc2)
{
  if (uc1.group < uc2.group) return true;
  else if (uc1.group > uc2.group) return false;
  else if (uc1.plane < uc2.plane) return true;
  else if (uc1.plane > uc2.plane) return false;
  else if (uc1.row < uc2.row) return true;
  else if (uc1.row > uc2.row) return false;
  else return uc1.cell < uc2.cell;
}

string ustring_to_uft8(const ustring& ustr)
{
  string ret_val;
  for(size_t i = 0; i < ustr.size(); i++) {
    unsigned char g = ustr[i].group;
    unsigned char p = ustr[i].plane;
    unsigned char r = ustr[i].row;
    unsigned char c = ustr[i].cell;
    if(g == 0x00 && p <= 0x1F) {
      if(p == 0x00) {
        if(r == 0x00 && c <= 0x7F) {
          // 1 octet
          ret_val += c;
        } // r
        // 2 octets
        else if(r <= 0x07) {
          ret_val += (0xC0 | r << 2 | c >> 6);
          ret_val += (0x80 | (c & 0x3F));
        } // r
        // 3 octets
        else {
          ret_val += (0xE0 | r >> 4);
          ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
          ret_val += (0x80 | (c & 0x3F));
        } // r
      } // p
      // 4 octets
      else {
        ret_val += (0xF0 | p >> 2);
        ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
        ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
        ret_val += (0x80 | (c & 0x3F));
      } // p
    } //g
    // 5 octets
    else if(g <= 0x03) {
      ret_val += (0xF8 | g);
      ret_val += (0x80 | p >> 2);
      ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
      ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
      ret_val += (0x80 | (c & 0x3F));
    } // g
    // 6 octets
    else {
      ret_val += (0xFC | g >> 6);
      ret_val += (0x80 | (g & 0x3F));
      ret_val += (0x80 | p >> 2);
      ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
      ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
      ret_val += (0x80 | (c & 0x3F));
    }
  } // for i
  return ret_val;
}
Commit	Line	Data
d44e3c4f	1	/******************************************************************************
	2	* Copyright (c) 2000-2016 Ericsson Telecom AB
	3	* All rights reserved. This program and the accompanying materials
	4	* are made available under the terms of the Eclipse Public License v1.0
	5	* which accompanies this distribution, and is available at
	6	* http://www.eclipse.org/legal/epl-v10.html
	7	*
	8	* Contributors:
	9	* Balasko, Jeno
	10	* Baranyi, Botond
	11	* Kovacs, Ferenc
	12	* Raduly, Csaba
	13	* Szabados, Kristof
	14	* Szabo, Janos Zoltan – initial implementation
	15	* Zalanyi, Balazs Andor
	16	*
	17	******************************************************************************/
970ed795 EL	18	#include <stdio.h>
	19	#include <string.h>
	20
	21	#include "../common/memory.h"
	22	#include "../common/Quadruple.hh"
	23	#include "error.h"
	24
	25	#include "string.hh"
	26	#include "ustring.hh"
	27	#include "PredefFunc.hh"
	28
	29	#include "Int.hh"
	30
	31	/** The amount of memory needed for an ustring containing n characters. */
	32	#define MEMORY_SIZE(n) (sizeof(ustring_struct) + \
	33	((n) - 1) * sizeof(universal_char))
	34
	35	void ustring::init_struct(size_t n_uchars)
	36	{
	37	if (n_uchars == 0) {
	38	/** This will represent the empty strings so they won't need allocated
	39	* memory, this delays the memory allocation until it is really needed. */
	40	static ustring_struct empty_string = { 1, 0, { { '\0', '\0', '\0', '\0' } } };
	41	val_ptr = &empty_string;
	42	empty_string.ref_count++;
	43	} else {
	44	val_ptr = (ustring_struct*)Malloc(MEMORY_SIZE(n_uchars));
	45	val_ptr->ref_count = 1;
	46	val_ptr->n_uchars = n_uchars;
	47	}
	48	}
	49
	50	void ustring::enlarge_memory(size_t incr)
	51	{
	52	if (incr > max_string_len - val_ptr->n_uchars)
	53	FATAL_ERROR("ustring::enlarge_memory(size_t): length overflow");
	54	size_t new_length = val_ptr->n_uchars + incr;
	55	if (val_ptr->ref_count == 1) {
	56	val_ptr = (ustring_struct*)Realloc(val_ptr, MEMORY_SIZE(new_length));
	57	val_ptr->n_uchars = new_length;
	58	} else {
	59	ustring_struct *old_ptr = val_ptr;
	60	old_ptr->ref_count--;
	61	init_struct(new_length);
	62	memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr, old_ptr->n_uchars *
	63	sizeof(universal_char));
	64	}
	65	}
	66
	67	void ustring::copy_value()
	68	{
	69	if (val_ptr->ref_count > 1) {
	70	ustring_struct *old_ptr = val_ptr;
	71	old_ptr->ref_count--;
	72	init_struct(old_ptr->n_uchars);
	73	memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
	74	old_ptr->n_uchars * sizeof(universal_char));
	75	}
	76	}
	77
	78	void ustring::clean_up()
	79	{
	80	if (val_ptr->ref_count > 1) val_ptr->ref_count--;
	81	else if (val_ptr->ref_count == 1) Free(val_ptr);
82	else FATAL_ERROR("ustring::clean_up()");
83	}
84
85	int ustring::compare(const ustring& s) const
86	{
87	if (val_ptr == s.val_ptr) return 0;
88	for (size_t i = 0; ; i++) {
89	if (i == val_ptr->n_uchars) {
90	if (i == s.val_ptr->n_uchars) return 0;
91	else return -1;
92	} else if (i == s.val_ptr->n_uchars) return 1;
93	else if (val_ptr->uchars_ptr[i].group > s.val_ptr->uchars_ptr[i].group)
94	return 1;
95	else if (val_ptr->uchars_ptr[i].group < s.val_ptr->uchars_ptr[i].group)
96	return -1;
97	else if (val_ptr->uchars_ptr[i].plane > s.val_ptr->uchars_ptr[i].plane)
98	return 1;
99	else if (val_ptr->uchars_ptr[i].plane < s.val_ptr->uchars_ptr[i].plane)
100	return -1;
101	else if (val_ptr->uchars_ptr[i].row > s.val_ptr->uchars_ptr[i].row)
102	return 1;
103	else if (val_ptr->uchars_ptr[i].row < s.val_ptr->uchars_ptr[i].row)
104	return -1;
105	else if (val_ptr->uchars_ptr[i].cell > s.val_ptr->uchars_ptr[i].cell)
106	return 1;
107	else if (val_ptr->uchars_ptr[i].cell < s.val_ptr->uchars_ptr[i].cell)
108	return -1;
109	}
110	return 0; // should never get here
111	}
112
113	ustring::ustring(unsigned char p_group, unsigned char p_plane,
114	unsigned char p_row, unsigned char p_cell)
115	{
116	init_struct(1);
117	val_ptr->uchars_ptr[0].group = p_group;
118	val_ptr->uchars_ptr[0].plane = p_plane;
119	val_ptr->uchars_ptr[0].row = p_row;
120	val_ptr->uchars_ptr[0].cell = p_cell;
121	}
122
123	ustring::ustring(size_t n, const universal_char *uc_ptr)
124	{
125	// Check for UTF8 encoding and decode it
126	// incase the editor encoded the TTCN-3 file with UTF-8
127	string octet_str;
128	bool isUTF8 = true;
129	for (size_t i = 0; i < n; ++i) {
130	if (uc_ptr[i].group != 0 \|\| uc_ptr[i].plane != 0 \|\| uc_ptr[i].row != 0) {
131	// Not UTF8
132	isUTF8 = false;
133	break;
134	}
135	octet_str += Common::hexdigit_to_char(uc_ptr[i].cell / 16);
136	octet_str += Common::hexdigit_to_char(uc_ptr[i].cell % 16);
137	}
138	if (isUTF8) {
139	string* ret = Common::get_stringencoding(octet_str);
140	if ("UTF-8" != *ret) {
141	isUTF8 = false;
142	}
143	delete ret;
144	}
145	if (isUTF8) {
146	ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
147	val_ptr = s.val_ptr;
148	val_ptr->ref_count++;
149	} else {
150	init_struct(n);
151	memcpy(val_ptr->uchars_ptr, uc_ptr, n * sizeof(universal_char));
152	}
153	}
154
155	ustring::ustring(const string& s)
156	{
157	// Check for UTF8 encoding and decode it
158	// incase the editor encoded the TTCN-3 file with UTF-8
159	string octet_str;
160	bool isUTF8 = true;
161	size_t len = s.size();
162	for (size_t i = 0; i < len; ++i) {
163	octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) / 16);
164	octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) % 16);
165	}
166	if (isUTF8) {
167	string* ret = Common::get_stringencoding(octet_str);
168	if ("UTF-8" != *ret) {
169	isUTF8 = false;
170	}
171	delete ret;
172	}
173	if (isUTF8) {
174	ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
175	val_ptr = s.val_ptr;
176	val_ptr->ref_count++;
177	} else {
178	init_struct(s.size());
179	const char *src = s.c_str();
180	for (size_t i = 0; i < val_ptr->n_uchars; i++) {
181	val_ptr->uchars_ptr[i].group = 0;
182	val_ptr->uchars_ptr[i].plane = 0;
183	val_ptr->uchars_ptr[i].row = 0;
184	val_ptr->uchars_ptr[i].cell = src[i];
185	}
186	}
187	}
188
189	void ustring::clear()
190	{
191	if (val_ptr->n_uchars > 0) {
192	clean_up();
193	init_struct(0);
194	}
195	}
196
197	ustring ustring::substr(size_t pos, size_t n) const
198	{
199	if (pos > val_ptr->n_uchars)
200	FATAL_ERROR("ustring::substr(size_t, size_t): position is outside of string");
201	if (pos == 0 && n >= val_ptr->n_uchars) return *this;
202	if (n > val_ptr->n_uchars - pos) n = val_ptr->n_uchars - pos;
203	return ustring(n, val_ptr->uchars_ptr + pos);
204	}
205
206	void ustring::replace(size_t pos, size_t n, const ustring& s)
207	{
208	if (pos > val_ptr->n_uchars)
209	FATAL_ERROR("ustring::replace(): start position is outside the string");
210	if (pos + n > val_ptr->n_uchars)
211	FATAL_ERROR("ustring::replace(): end position is outside the string");
212	size_t s_len = s.size();
213	/* The replacement string is greater than the maximum string length. The
214	replaced characters are taken into account. */
215	if (s_len > max_string_len - val_ptr->n_uchars + n)
216	FATAL_ERROR("ustring::replace(): length overflow");
217	size_t new_size = val_ptr->n_uchars - n + s_len;
218	if (new_size == 0) {
219	clean_up();
220	init_struct(0);
221	} else {
222	ustring_struct *old_ptr = val_ptr;
223	old_ptr->ref_count--;
224	init_struct(new_size);
225	memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
226	pos * sizeof(universal_char));
227	memcpy(val_ptr->uchars_ptr + pos, s.u_str(),
228	s_len * sizeof(universal_char));
229	memcpy(val_ptr->uchars_ptr + pos + s_len, old_ptr->uchars_ptr + pos + n,
230	(old_ptr->n_uchars - pos - n) * sizeof(universal_char));
231	if (old_ptr->ref_count == 0) Free(old_ptr);
232	}
233	}
234
235	string ustring::get_stringRepr() const
236	{
237	string ret_val;
238	enum { INIT, PCHAR, UCHAR } state = INIT;
239	for (size_t i = 0; i < val_ptr->n_uchars; i++) {
240	const universal_char& uchar = val_ptr->uchars_ptr[i];
241	if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
242	string::is_printable(uchar.cell)) {
243	// the actual character is printable
244	switch (state) {
245	case UCHAR: // concatenation sign if previous part was not printable
246	ret_val += " & ";
247	// no break
248	case INIT: // opening "
249	ret_val += '"';
250	// no break
251	case PCHAR: // the character itself
252	ret_val.append_stringRepr(uchar.cell);
253	break;
254	}
255	state = PCHAR;
256	} else {
257	// the actual character is not printable
258	switch (state) {
259	case PCHAR: // closing " if previous part was printable
260	ret_val += '"';
261	// no break
262	case UCHAR: // concatenation sign
263	ret_val += " & ";
264	// no break
265	case INIT: // the character itself in quadruple notation
266	ret_val += "char(";
267	ret_val += Common::Int2string(uchar.group);
268	ret_val += ", ";
269	ret_val += Common::Int2string(uchar.plane);
270	ret_val += ", ";
271	ret_val += Common::Int2string(uchar.row);
272	ret_val += ", ";
273	ret_val += Common::Int2string(uchar.cell);
274	ret_val += ')';
275	break;
276	}
277	state = UCHAR;
278	}
279	}
280	// final steps
281	switch (state) {
282	case INIT: // the string was empty
283	ret_val += "\"\"";
284	break;
285	case PCHAR: // last character was printable -> closing "
286	ret_val += '"';
287	break;
288	default:
289	break;
290	}
291	return ret_val;
292	}
293
294	string ustring::get_stringRepr_for_pattern() const {
295	string ret_val; // empty string
296	for (size_t i = 0; i < val_ptr->n_uchars; i++) {
297	const universal_char& uchar = val_ptr->uchars_ptr[i];
298	if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
299	string::is_printable(uchar.cell)) {
300	ret_val.append_stringRepr(uchar.cell);
301	} else {
302	ret_val += "\\q{";
303	ret_val += Common::Int2string(uchar.group);
304	ret_val += ",";
305	ret_val += Common::Int2string(uchar.plane);
306	ret_val += ",";
307	ret_val += Common::Int2string(uchar.row);
308	ret_val += ",";
309	ret_val += Common::Int2string(uchar.cell);
310	ret_val += "}";
311	}
312	}
313	return ret_val;
314	}
315
316	char* ustring::convert_to_regexp_form() const {
317	char* res = (char)Malloc(val_ptr->n_uchars 8 + 1);
318	char* ptr = res;
319	res[val_ptr->n_uchars * 8] = '\0';
320	Quad q;
321	for (size_t i = 0; i < val_ptr->n_uchars; i++, ptr += 8) {
322	const universal_char& uchar = val_ptr->uchars_ptr[i];
323	q.set(uchar.group, uchar.plane, uchar.row, uchar.cell);
324	Quad::get_hexrepr(q, ptr);
325	}
326	return res;
327	}
328
329	ustring& ustring::operator=(const ustring& s)
330	{
331	if(&s != this) {
332	clean_up();
333	val_ptr = s.val_ptr;
334	val_ptr->ref_count++;
335	}
336	return *this;
337	}
338
339	ustring::universal_char& ustring::operator[](size_t n)
340	{
341	if (n >= val_ptr->n_uchars)
342	FATAL_ERROR("ustring::operator[](size_t): position is outside the string");
343	copy_value();
344	return val_ptr->uchars_ptr[n];
345	}
346
347	const ustring::universal_char& ustring::operator[](size_t n) const
348	{
349	if (n >= val_ptr->n_uchars)
350	FATAL_ERROR("ustring::operator[](size_t) const: position is outside the string");
351	return val_ptr->uchars_ptr[n];
352	}
353
354	ustring ustring::operator+(const string& s2) const
355	{
356	size_t s2_size = s2.size();
357	if (s2_size > max_string_len - val_ptr->n_uchars)
358	FATAL_ERROR("ustring::operator+(const string&): length overflow");
359	if (s2_size > 0) {
360	ustring s(val_ptr->n_uchars + s2_size);
361	memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
362	sizeof(universal_char));
363	const char *src = s2.c_str();
364	for (size_t i = 0; i < s2_size; i++) {
365	s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].group = 0;
366	s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].plane = 0;
367	s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].row = 0;
368	s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].cell = src[i];
369	}
370	return s;
371	} else return *this;
372	}
373
374	ustring ustring::operator+(const ustring& s2) const
375	{
376	if (s2.val_ptr->n_uchars > max_string_len - val_ptr->n_uchars)
377	FATAL_ERROR("ustring::operator+(const ustring&): length overflow");
378	if (val_ptr->n_uchars == 0) return s2;
379	else if (s2.val_ptr->n_uchars == 0) return *this;
380	else {
381	ustring s(val_ptr->n_uchars + s2.val_ptr->n_uchars);
382	memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
383	sizeof(universal_char));
384	memcpy(s.val_ptr->uchars_ptr + val_ptr->n_uchars,
385	s2.val_ptr->uchars_ptr, s2.val_ptr->n_uchars * sizeof(universal_char));
386	return s;
387	}
388	}
389
390	ustring& ustring::operator+=(const string& s)
391	{
392	size_t s_size = s.size();
393	if (s_size > 0) {
394	size_t old_size = val_ptr->n_uchars;
395	enlarge_memory(s_size);
396	const char *src = s.c_str();
397	for (size_t i = 0; i < s_size; i++) {
398	val_ptr->uchars_ptr[old_size + i].group = 0;
399	val_ptr->uchars_ptr[old_size + i].plane = 0;
400	val_ptr->uchars_ptr[old_size + i].row = 0;
401	val_ptr->uchars_ptr[old_size + i].cell = src[i];
402	}
403	}
404	return *this;
405	}
406
407	ustring& ustring::operator+=(const ustring& s)
408	{
409	if (s.val_ptr->n_uchars > 0) {
410	if (val_ptr->n_uchars > 0) {
411	size_t old_size = val_ptr->n_uchars, s_size = s.val_ptr->n_uchars;
412	enlarge_memory(s_size);
413	memcpy(val_ptr->uchars_ptr + old_size, s.val_ptr->uchars_ptr,
414	s_size * sizeof(universal_char));
415	} else {
416	clean_up();
417	val_ptr = s.val_ptr;
418	val_ptr->ref_count++;
419	}
420	}
421	return *this;
422	}
423
424	bool ustring::operator==(const ustring& s2) const
425	{
426	if (val_ptr == s2.val_ptr) return true;
427	else if (val_ptr->n_uchars != s2.val_ptr->n_uchars) return false;
428	else return !memcmp(val_ptr->uchars_ptr, s2.val_ptr->uchars_ptr,
429	val_ptr->n_uchars * sizeof(universal_char));
430	}
431
432	bool operator==(const ustring::universal_char& uc1,
433	const ustring::universal_char& uc2)
434	{
435	return uc1.group == uc2.group && uc1.plane == uc2.plane &&
436	uc1.row == uc2.row && uc1.cell == uc2.cell;
437	}
438
439	bool operator<(const ustring::universal_char& uc1,
440	const ustring::universal_char& uc2)
441	{
442	if (uc1.group < uc2.group) return true;
443	else if (uc1.group > uc2.group) return false;
444	else if (uc1.plane < uc2.plane) return true;
445	else if (uc1.plane > uc2.plane) return false;
446	else if (uc1.row < uc2.row) return true;
447	else if (uc1.row > uc2.row) return false;
448	else return uc1.cell < uc2.cell;
449	}
3abe9331	450
	451	string ustring_to_uft8(const ustring& ustr)
	452	{
	453	string ret_val;
	454	for(size_t i = 0; i < ustr.size(); i++) {
	455	unsigned char g = ustr[i].group;
	456	unsigned char p = ustr[i].plane;
	457	unsigned char r = ustr[i].row;
	458	unsigned char c = ustr[i].cell;
	459	if(g == 0x00 && p <= 0x1F) {
	460	if(p == 0x00) {
	461	if(r == 0x00 && c <= 0x7F) {
	462	// 1 octet
	463	ret_val += c;
	464	} // r
	465	// 2 octets
	466	else if(r <= 0x07) {
	467	ret_val += (0xC0 \| r << 2 \| c >> 6);
	468	ret_val += (0x80 \| (c & 0x3F));
	469	} // r
	470	// 3 octets
	471	else {
	472	ret_val += (0xE0 \| r >> 4);
	473	ret_val += (0x80 \| (r << 2 & 0x3C) \| c >> 6);
	474	ret_val += (0x80 \| (c & 0x3F));
	475	} // r
	476	} // p
	477	// 4 octets
	478	else {
	479	ret_val += (0xF0 \| p >> 2);
	480	ret_val += (0x80 \| (p << 4 & 0x30) \| r >> 4);
	481	ret_val += (0x80 \| (r << 2 & 0x3C) \| c >> 6);
	482	ret_val += (0x80 \| (c & 0x3F));
	483	} // p
	484	} //g
	485	// 5 octets
	486	else if(g <= 0x03) {
	487	ret_val += (0xF8 \| g);
	488	ret_val += (0x80 \| p >> 2);
	489	ret_val += (0x80 \| (p << 4 & 0x30) \| r >> 4);
	490	ret_val += (0x80 \| (r << 2 & 0x3C) \| c >> 6);
	491	ret_val += (0x80 \| (c & 0x3F));
	492	} // g
	493	// 6 octets
	494	else {
	495	ret_val += (0xFC \| g >> 6);
	496	ret_val += (0x80 \| (g & 0x3F));
	497	ret_val += (0x80 \| p >> 2);
	498	ret_val += (0x80 \| (p << 4 & 0x30) \| r >> 4);
	499	ret_val += (0x80 \| (r << 2 & 0x3C) \| c >> 6);
	500	ret_val += (0x80 \| (c & 0x3F));
	501	}
	502	} // for i
	503	return ret_val;
	504	}