note
	description: "[
					Platform specific encoding of Unicode strings. By default, UTF-8 on unix or UTF-16 on Windows.
		
					Mixed-encoding consideration
					============================
		
					Most operating systems have conventions for strings that are incompatible with Unicode.
					On UNIX, a string is just a null-terminated byte sequence, it does not follow any
					specific encoding. Usually the locale setting enables you to see the string the way
					you expect.
					On Windows, the sequence of names is made of null-terminated UTF-16 code unit sequence. Windows
					does not guarantee that the sequence is actually a valid UTF-16 sequence.
		
					In other words, when there is an invalid UTF-8 encoding on UNIX, or an invalid UTF-16 encoding
					on Windows, the string is not directly representable as a Unicode string. To make it possible
					to create and store strings in a textually representable form, the query string will create
					an encoded representation that can be then later used in make to create a NATIVE_STRING
					equivalent to the original string. The encoding is described in UTF_CONVERTER's note clause
					and is a fourth variant of the recommended practice for replacement characters in Unicode
					(see http://www.unicode.org/review/pr-121.html).
		
	]"
	date: "$Date: 2020-05-19 14:32:38 +0000 (Tue, 19 May 2020) $"
	revision: "$Revision: 104260 $"

class 
	NATIVE_STRING

inherit
	NATIVE_STRING_HANDLER
		redefine
			is_equal
		end

create 
	make,
	make_empty,
	make_from_pointer,
	make_from_raw_string

feature {NONE} -- Initialization

	make (a_string: READABLE_STRING_GENERAL)
			-- Initialize an instance of Current using a_string treated as a sequence
			-- of Unicode characters.
		require
			a_string_not_void: a_string /= Void
		do
			make_empty (a_string.count)
			set_string (a_string)
		end

	make_empty (a_length: INTEGER_32)
			-- Initialize an empty instance of Current that will accommodate a_length code units.
			-- The memory area is not initialized.
		require
			a_length_positive: a_length >= 0
		do
			create managed_data.make ((a_length + 1) * unit_size)
			unit_count := 0
		end

	make_from_pointer (a_pointer: POINTER)
			-- Initialize current from a_pointer, a platform system specific null-terminated string.
		require
			a_path_pointer_not_null: a_pointer /= default_pointer
		local
			l_count: INTEGER_32
		do
			l_count := pointer_length_in_bytes (a_pointer)
			create managed_data.make_from_pointer (a_pointer, l_count + unit_size)
			unit_count := l_count // unit_size
		end

	make_from_raw_string (a_raw_string: like raw_string)
			-- Initialize current from a_raw_string.
		require
			a_raw_string_not_void: a_raw_string /= Void
		local
			i: INTEGER_32
		do
			create managed_data.make (a_raw_string.count + unit_size)
			across
				a_raw_string as l_c
			loop
				managed_data.put_character (l_c.item, i)
				i := i + 1
			end
			if {PLATFORM}.is_windows then
				managed_data.put_natural_16 (0, i)
			else
				managed_data.put_natural_8 (0, i)
			end
			unit_count := (managed_data.count - unit_size) // unit_size
		ensure
			set: raw_string.same_string (a_raw_string)
		end
	
feature -- Access

	raw_string: STRING_8
			-- Sequence of bytes representing Current.
		local
			l_cstr: C_STRING
		do
			create l_cstr.make_shared_from_pointer_and_count (managed_data.item, managed_data.count)
			Result := l_cstr.substring_8 (1, managed_data.count - unit_size)
		end

	string: STRING_32
			-- Representation of Current up to the first null character.
		local
			u: UTF_CONVERTER
		do
			if {PLATFORM}.is_windows then
				Result := u.utf_16_0_pointer_to_escaped_string_32 (managed_data)
			else
				Result := u.utf_8_0_pointer_to_escaped_string_32 (managed_data)
			end
		end

	substring (start_pos, end_pos: INTEGER_32): STRING_32
			-- Copy of substring containing all code units at indices
			-- between start_pos and end_pos.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_position_not_too_big: end_pos <= (capacity // unit_size)
		local
			u: UTF_CONVERTER
		do
			create Result.make (end_pos - start_pos + 1)
			if {PLATFORM}.is_windows then
				u.utf_16_0_subpointer_into_escaped_string_32 (managed_data, start_pos - 1, end_pos - 1, False, Result)
			else
				Result := u.utf_8_0_pointer_to_escaped_string_32 (managed_data);
				u.utf_8_0_subpointer_into_escaped_string_32 (managed_data, start_pos - 1, end_pos - 1, False, Result)
			end
		ensure
			susbstring_not_void: Result /= Void
		end

	item: POINTER
			-- Get pointer to allocated area.
		do
			Result := managed_data.item
		ensure
			item_not_null: Result /= default_pointer
		end

	managed_data: MANAGED_POINTER
			-- Hold data of Current.
	
feature -- Status Report

	is_empty: BOOLEAN
			-- Is current empty?
		do
			Result := unit_count = 0
		end
	
feature -- Measurement

	capacity: INTEGER_32
			-- Number of bytes in Current.
		do
			Result := managed_data.count
		end

	bytes_count: INTEGER_32
			-- Number of bytes used by Current not including the null terminating character.
		do
			Result := unit_count * unit_size
		ensure
			bytes_count_non_negative: Result >= 0
		end

	unit_count: INTEGER_32
			-- Number of units used by Current not including the null terminating unit.

	unit_size: INTEGER_32
			-- Size in bytes of a unit for storage.
		do
			if {PLATFORM}.is_windows then
				Result := 2
			else
				Result := 1
			end
		end
	
feature -- Comparison

	is_equal (other: like Current): BOOLEAN
			-- Is content of string identical to content of string other?
		do
			Result := item.is_equal (other.item)
		end
	
feature -- Element change

	set_string (a_string: READABLE_STRING_GENERAL)
			-- Set string with a_string	treated as a sequence of Unicode characters.
		require
			a_string_not_void: a_string /= Void
		do
			set_substring (a_string, 1, a_string.count)
		end

	set_substring (a_string: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32)
			-- Set string with a subset of a_string from a_start_pos index to end_pos index.
		require
			a_string_not_void: a_string /= Void
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= a_string.count
		local
			u: UTF_CONVERTER
		do
			if {PLATFORM}.is_windows then
				u.escaped_utf_32_substring_into_utf_16_0_pointer (a_string, start_pos, end_pos, managed_data, 0, Upper_cell)
			else
				u.escaped_utf_32_substring_into_utf_8_0_pointer (a_string, start_pos, end_pos, managed_data, 0, Upper_cell)
			end
			unit_count := Upper_cell.item // unit_size
		end

	set_shared_from_pointer (a_ptr: POINTER)
			-- New instance sharing a_ptr.
		require
			a_ptr_not_null: a_ptr /= default_pointer
		do
			set_shared_from_pointer_and_count (a_ptr, pointer_length_in_bytes (a_ptr))
		end

	set_shared_from_pointer_and_count (a_ptr: POINTER; a_length: INTEGER_32)
			-- New instance sharing a_ptr of a_length byte. Space for an additional
			-- null terminating code unit is added to managed_data.
		require
			a_ptr_not_null: a_ptr /= default_pointer
			a_length_non_negative: a_length >= 0
			a_length_valid: (a_length \\ unit_size) = 0
		do
			unit_count := a_length // unit_size
			if not managed_data.is_shared then
				create managed_data.share_from_pointer (a_ptr, a_length + unit_size)
			else
				managed_data.set_from_pointer (a_ptr, a_length + unit_size)
			end
		end
	
feature {NONE} -- Implementation

	Platform: PLATFORM
			-- Access underlying platform info, used to satisfy invariant below.
		once
			create Result
		end

	Upper_cell: CELL [INTEGER_32]
			-- Temporary storage for byte index of last insertions when using UTF_CONVERTER routines.
		once
			create Result.put (0)
		end
	
invariant
	little_endian_windows: {PLATFORM}.is_windows implies Platform.Is_little_endian
	even_count_on_windows: {PLATFORM}.is_windows implies managed_data.count \\ unit_size = 0

note
	copyright: "Copyright (c) 1984-2020, Eiffel Software and others"
	license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
	source: "[
		Eiffel Software
		5949 Hollister Ave., Goleta, CA 93117 USA
		Telephone 805-685-1006, Fax 805-685-6869
		Website http://www.eiffel.com
		Customer support http://support.eiffel.com
	]"

end -- class NATIVE_STRING

Generated by ISE EiffelStudio