note
	description: "[
		Helper to perform efficient search of a string in another one.
		Note: The algorithm used is the one described in Communications of the ACM,
			volume 33, number 8, August 1990, by Daniel M. Sunday. The fuzzy
			version was presented by Peter R. Sibbald in Communications of the
			ACM, volume 35, number 4, April 1992 (Technical Correspondance).
	]"
	library: "Free implementation of ELKS library"
	status: "See notice at end of class."
	legal: "See notice at end of class."
	date: "$Date: 2017-03-28 12:36:24 +0000 (Tue, 28 Mar 2017) $"
	revision: "$Revision: 100064 $"

frozen class 
	STRING_8_SEARCHER

create 
	make

feature {NONE} -- Initialization

	default_create
			-- Process instances of classes with no creation clause.
			-- (Default: do nothing.)
			-- (from ANY)
		do
		end

	make
			-- Initialize current
			-- (from STRING_SEARCHER)
		do
			create deltas.make_empty (Max_code_point_value + 1)
		end
	
feature -- Initialization

	initialize_deltas (a_pattern: READABLE_STRING_GENERAL)
			-- Initialize deltas with a_pattern.
			-- Optimized for the top Max_code_point_value characters only.
			-- (from STRING_SEARCHER)
		require -- from STRING_SEARCHER
			a_pattern_not_void: a_pattern /= Void
		do
			internal_initialize_deltas (a_pattern, a_pattern.count, deltas)
		end
	
feature -- Access

	generating_type: TYPE [detachable STRING_8_SEARCHER]
			-- Type of current object
			-- (type of which it is a direct instance)
			-- (from ANY)
		external
			"built_in"
		ensure -- from ANY
			generating_type_not_void: Result /= Void
		end

	generator: STRING_8
			-- Name of current object's generating class
			-- (base class of the type of which it is a direct instance)
			-- (from ANY)
		external
			"built_in"
		ensure -- from ANY
			generator_not_void: Result /= Void
			generator_not_empty: not Result.is_empty
		end

	Max_code_point_value: INTEGER_32 = 255
			-- Maximum character value for which we optimize the lookup of a pattern.
			-- If any item's code of the searched string is above that limit, search will
			-- not be as efficient.
			-- For STRING_8, it is limited to the number of characters in the extended ASCII character set.

	string_type: READABLE_STRING_8
			-- Type of strings Current manipulate to perform searches.
		do
			Result := ""
		ensure -- from STRING_SEARCHER
			for_typing_only: False
		end
	
feature -- Comparison

	frozen deep_equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void
			-- or attached to isomorphic object structures?
			-- (from ANY)
		do
			if a = Void then
				Result := b = Void
			else
				Result := b /= Void and then a.is_deep_equal (b)
			end
		ensure -- from ANY
			instance_free: class
			shallow_implies_deep: standard_equal (a, b) implies Result
			both_or_none_void: (a = Void) implies (Result = (b = Void))
			same_type: (Result and (a /= Void)) implies (b /= Void and then a.same_type (b))
			symmetric: Result implies deep_equal (b, a)
		end

	frozen equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void or attached
			-- to objects considered equal?
			-- (from ANY)
		do
			if a = Void then
				Result := b = Void
			else
				Result := b /= Void and then a.is_equal (b)
			end
		ensure -- from ANY
			instance_free: class
			definition: Result = (a = Void and b = Void) or else ((a /= Void and b /= Void) and then a.is_equal (b))
		end

	frozen is_deep_equal alias "≡≡≡" (other: STRING_8_SEARCHER): BOOLEAN
			-- Are Current and other attached to isomorphic object structures?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		external
			"built_in"
		ensure -- from ANY
			shallow_implies_deep: standard_is_equal (other) implies Result
			same_type: Result implies same_type (other)
			symmetric: Result implies other.is_deep_equal (Current)
		end

	is_equal (other: STRING_8_SEARCHER): BOOLEAN
			-- Is other attached to an object considered
			-- equal to current object?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		external
			"built_in"
		ensure -- from ANY
			symmetric: Result implies other ~ Current
			consistent: standard_is_equal (other) implies Result
		end

	frozen standard_equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void or attached to
			-- field-by-field identical objects of the same type?
			-- Always uses default object comparison criterion.
			-- (from ANY)
		do
			if a = Void then
				Result := b = Void
			else
				Result := b /= Void and then a.standard_is_equal (b)
			end
		ensure -- from ANY
			instance_free: class
			definition: Result = (a = Void and b = Void) or else ((a /= Void and b /= Void) and then a.standard_is_equal (b))
		end

	frozen standard_is_equal alias "" (other: STRING_8_SEARCHER): BOOLEAN
			-- Is other attached to an object of the same type
			-- as current object, and field-by-field identical to it?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		external
			"built_in"
		ensure -- from ANY
			same_type: Result implies same_type (other)
			symmetric: Result implies other.standard_is_equal (Current)
		end
	
feature -- Status report

	conforms_to (other: ANY): BOOLEAN
			-- Does type of current object conform to type
			-- of other (as per Eiffel: The Language, chapter 13)?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		external
			"built_in"
		end

	same_type (other: ANY): BOOLEAN
			-- Is type of current object identical to type of other?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		external
			"built_in"
		ensure -- from ANY
			definition: Result = (conforms_to (other) and other.conforms_to (Current))
		end
	
feature -- Duplication

	frozen clone (other: detachable ANY): like other
		obsolete "Use `twin' instead. [2017-05-31]"
			-- Void if other is void; otherwise new object
			-- equal to other
			--
			-- For non-void other, clone calls copy;
			-- to change copying/cloning semantics, redefine copy.
			-- (from ANY)
		do
			if other /= Void then
				Result := other.twin
			end
		ensure -- from ANY
			instance_free: class
			equal: Result ~ other
		end

	copy (other: STRING_8_SEARCHER)
			-- Update current object using fields of object attached
			-- to other, so as to yield equal objects.
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
			type_identity: same_type (other)
		external
			"built_in"
		ensure -- from ANY
			is_equal: Current ~ other
		end

	frozen deep_clone (other: detachable ANY): like other
		obsolete "Use `deep_twin' instead. [2017-05-31]"
			-- Void if other is void: otherwise, new object structure
			-- recursively duplicated from the one attached to other
			-- (from ANY)
		do
			if other /= Void then
				Result := other.deep_twin
			end
		ensure -- from ANY
			instance_free: class
			deep_equal: deep_equal (other, Result)
		end

	frozen deep_copy (other: STRING_8_SEARCHER)
			-- Effect equivalent to that of:
			--		copy (other . deep_twin)
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		do
			copy (other.deep_twin)
		ensure -- from ANY
			deep_equal: deep_equal (Current, other)
		end

	frozen deep_twin: STRING_8_SEARCHER
			-- New object structure recursively duplicated from Current.
			-- (from ANY)
		external
			"built_in"
		ensure -- from ANY
			deep_twin_not_void: Result /= Void
			deep_equal: deep_equal (Current, Result)
		end

	frozen standard_clone (other: detachable ANY): like other
		obsolete "Use `standard_twin' instead. [2017-05-31]"
			-- Void if other is void; otherwise new object
			-- field-by-field identical to other.
			-- Always uses default copying semantics.
			-- (from ANY)
		do
			if other /= Void then
				Result := other.standard_twin
			end
		ensure -- from ANY
			instance_free: class
			equal: standard_equal (Result, other)
		end

	frozen standard_copy (other: STRING_8_SEARCHER)
			-- Copy every field of other onto corresponding field
			-- of current object.
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
			type_identity: same_type (other)
		external
			"built_in"
		ensure -- from ANY
			is_standard_equal: standard_is_equal (other)
		end

	frozen standard_twin: STRING_8_SEARCHER
			-- New object field-by-field identical to other.
			-- Always uses default copying semantics.
			-- (from ANY)
		external
			"built_in"
		ensure -- from ANY
			standard_twin_not_void: Result /= Void
			equal: standard_equal (Result, Current)
		end

	frozen twin: STRING_8_SEARCHER
			-- New object equal to Current
			-- twin calls copy; to change copying/twinning semantics, redefine copy.
			-- (from ANY)
		external
			"built_in"
		ensure -- from ANY
			twin_not_void: Result /= Void
			is_equal: Result ~ Current
		end
	
feature -- Basic operations

	frozen as_attached: attached STRING_8_SEARCHER
		obsolete "Remove calls to this feature. [2017-05-31]"
			-- Attached version of Current.
			-- (Can be used during transitional period to convert
			-- non-void-safe classes to void-safe ones.)
			-- (from ANY)
		do
			Result := Current
		end

	frozen default: detachable STRING_8_SEARCHER
			-- Default value of object's type
			-- (from ANY)
		do
		end

	frozen default_pointer: POINTER
			-- Default value of type POINTER
			-- (Avoid the need to write p.default for
			-- some p of type POINTER.)
			-- (from ANY)
		do
		ensure -- from ANY
			instance_free: class
		end

	default_rescue
			-- Process exception for routines with no Rescue clause.
			-- (Default: do nothing.)
			-- (from ANY)
		do
		end

	frozen do_nothing
			-- Execute a null action.
			-- (from ANY)
		do
		ensure -- from ANY
			instance_free: class
		end
	
feature {NONE} -- Implementation

	initialize_fuzzy_deltas (a_pattern: READABLE_STRING_GENERAL; fuzzy: INTEGER_32)
			-- Compile a_pattern by computing the delta shift tables from a pattern
			-- string. This has to be done before searching occurs. The first delta
			-- table is computed with the full pattern, the second one is computed with
			-- the rightmost character removed, and so on. A total of (fuzzy + 1) tables
			-- are computed and stored in deltas_array.
			-- (from STRING_SEARCHER)
		require -- from STRING_SEARCHER
			a_pattern_not_void: a_pattern /= Void
			fuzzy_positive: fuzzy > 0
		local
			l_deltas: like deltas
			l_deltas_array: SPECIAL [like deltas]
			i, l_fuzzy: INTEGER_32
			l_pattern_count: INTEGER_32
		do
			from
				l_pattern_count := a_pattern.count
				l_fuzzy := fuzzy + 1
				create l_deltas_array.make_empty (l_fuzzy)
			until
				i = l_fuzzy
			loop
				create l_deltas.make_empty (Max_code_point_value + 1);
				l_deltas_array.extend (l_deltas)
				internal_initialize_deltas (a_pattern, l_pattern_count - i, l_deltas)
				i := i + 1
			end
			deltas_array := l_deltas_array
		ensure -- from STRING_SEARCHER
			deltas_array_not_void: deltas_array /= Void
			deltas_array_count_set: attached {SPECIAL [like deltas]} deltas_array as delta and then delta.count = fuzzy + 1
		end

	internal_initialize_deltas (a_pattern: READABLE_STRING_GENERAL; a_pattern_count: INTEGER_32; a_deltas: like deltas)
			-- Initialize a_deltas with a_pattern.
			-- Optimized for the top Max_code_point_value characters only.
			-- (from STRING_SEARCHER)
		require -- from STRING_SEARCHER
			a_pattern_not_void: a_pattern /= Void
			a_pattern_count_not_negative: a_pattern_count >= 0
			a_pattern_count_valid: a_pattern_count <= a_pattern.count
			a_deltas_not_void: a_deltas /= Void
			a_deltas_valid: a_deltas.capacity = Max_code_point_value + 1
		local
			i, l_char_code: INTEGER_32
		do
			a_deltas.fill_with (a_pattern_count + 1, 0, Max_code_point_value)
			from
				i := 0
			until
				i = a_pattern_count
			loop
				l_char_code := a_pattern.code (i + 1).to_integer_32
				if l_char_code <= Max_code_point_value then
					a_deltas.put (a_pattern_count - i, l_char_code)
				end
				i := i + 1
			end
		end
	
feature {NONE} -- Implementation: Access

	deltas: SPECIAL [INTEGER_32]
			-- Record shifting deltas.
			-- (from STRING_SEARCHER)

	deltas_array: detachable SPECIAL [like deltas]
			-- Record shifting deltas for fuzzy search.
			-- (from STRING_SEARCHER)
	
feature -- Output

	Io: STD_FILES
			-- Handle to standard file setup
			-- (from ANY)
		once
			create Result;
			Result.set_output_default
		ensure -- from ANY
			instance_free: class
			io_not_void: Result /= Void
		end

	out: STRING_8
			-- New string containing terse printable representation
			-- of current object
			-- (from ANY)
		do
			Result := tagged_out
		ensure -- from ANY
			out_not_void: Result /= Void
		end

	print (o: detachable ANY)
			-- Write terse external representation of o
			-- on standard output.
			-- (from ANY)
		local
			s: READABLE_STRING_8
		do
			if attached o then
				s := o.out
				if attached {READABLE_STRING_32} s as s32 then
					Io.put_string_32 (s32)
				elseif attached {READABLE_STRING_8} s as s8 then
					Io.put_string (s8)
				else
					Io.put_string_32 (s.as_string_32)
				end
			end
		ensure -- from ANY
			instance_free: class
		end

	frozen tagged_out: STRING_8
			-- New string containing terse printable representation
			-- of current object
			-- (from ANY)
		external
			"built_in"
		ensure -- from ANY
			tagged_out_not_void: Result /= Void
		end
	
feature -- Platform

	Operating_environment: OPERATING_ENVIRONMENT
			-- Objects available from the operating system
			-- (from ANY)
		once
			create Result
		ensure -- from ANY
			instance_free: class
			operating_environment_not_void: Result /= Void
		end
	
feature {NONE} -- Retrieval

	frozen internal_correct_mismatch
			-- Called from runtime to perform a proper dynamic dispatch on correct_mismatch
			-- from MISMATCH_CORRECTOR.
			-- (from ANY)
		local
			l_msg: STRING_32
			l_exc: EXCEPTIONS
		do
			if attached {MISMATCH_CORRECTOR} Current as l_corrector then
				l_corrector.correct_mismatch
			else
				create l_msg.make_from_string ("Mismatch: ".as_string_32)
				create l_exc;
				l_msg.append (generating_type.name_32);
				l_exc.raise_retrieval_exception (l_msg)
			end
		end
	
feature -- Search

	fuzzy_index (a_string: like string_type; a_pattern: READABLE_STRING_GENERAL; start_pos, end_pos, fuzzy: INTEGER_32): INTEGER_32
			-- Position of first occurrence of a_pattern at or after start_pos in
			-- a_string with 0..fuzzy mismatches between a_string and a_pattern.
			-- 0 if there are no fuzzy matches.
		require -- from STRING_SEARCHER
			a_string_not_void: a_string /= Void
			a_pattern_not_void: a_pattern /= Void
			a_pattern_not_empty: not a_pattern.is_empty
			start_large_enough: start_pos >= 1
			end_pos_large_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= a_string.count
			fuzzy_non_negative: fuzzy >= 0
			acceptable_fuzzy: fuzzy <= a_pattern.count
		local
			i, j, l_min_offset, l_end_pos: INTEGER_32
			l_pattern_count, l_nb_mismatched: INTEGER_32
			l_matched: BOOLEAN
			l_char_code: INTEGER_32
			l_deltas_array: like deltas_array
			l_area: SPECIAL [CHARACTER_8]
			l_area_lower: INTEGER_32
		do
			if fuzzy = a_pattern.count then
				Result := start_pos
			else
				if fuzzy = 0 then
					Result := substring_index (a_string, a_pattern, start_pos, end_pos)
				else
					initialize_fuzzy_deltas (a_pattern, fuzzy)
					l_deltas_array := deltas_array
					if l_deltas_array /= Void then
						from
							l_pattern_count := a_pattern.count
							l_area := a_string.area
							l_area_lower := a_string.area_lower
							i := start_pos + l_area_lower
							l_end_pos := end_pos + 1 + l_area_lower
						until
							i + l_pattern_count > l_end_pos
						loop
							from
								j := 0
								l_nb_mismatched := 0
								l_matched := True
							until
								j = l_pattern_count
							loop
								if l_area.item (i + j - 1).natural_32_code /= a_pattern.code (j + 1) then
									l_nb_mismatched := l_nb_mismatched + 1
									if l_nb_mismatched > fuzzy then
										j := l_pattern_count - 1
										l_matched := False
									end
								end
								j := j + 1
							end
							if l_matched then
								Result := i - l_area_lower
								i := l_end_pos
							else
								if i + l_pattern_count <= end_pos then
									from
										j := 0
										l_min_offset := l_pattern_count + 1
									until
										j > fuzzy
									loop
										l_char_code := l_area.item (i + l_pattern_count - j - 1).code
										l_min_offset := l_min_offset.min (l_deltas_array.item (j).item (l_char_code))
										j := j + 1
									end
									i := i + l_min_offset
								else
									i := i + 1
								end
							end
						end
					end
					deltas_array := Void
				end
			end
		end

	substring_index (a_string: like string_type; a_pattern: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Position of first occurrence of a_pattern at or after start_pos
			-- and before end_pos in a_string.
			-- 0 if there are no matches.
			-- (from STRING_SEARCHER)
		require -- from STRING_SEARCHER
			a_string_not_void: a_string /= Void
			a_pattern_not_void: a_pattern /= Void
			start_large_enough: start_pos >= 1
			end_pos_large_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= a_string.count
		do
			if a_string = a_pattern then
				if start_pos = 1 then
					Result := 1
				end
			else
				if a_pattern.count = 0 then
					Result := start_pos
				elseif a_pattern.count <= end_pos - start_pos + 1 then
					internal_initialize_deltas (a_pattern, a_pattern.count, deltas)
					Result := substring_index_with_deltas (a_string, a_pattern, start_pos, end_pos)
				end
			end
		end

	substring_index_list_with_deltas (a_string: like string_type; a_pattern: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): detachable ARRAYED_LIST [INTEGER_32]
			-- Index positions of all occurrences of a_pattern at or after start_pos until end_pos in a_string.
			-- Result is Void if there are no matches.
			-- (from STRING_SEARCHER)
		require -- from STRING_SEARCHER
			a_string_not_void: a_string /= Void
			a_pattern_not_void: a_pattern /= Void
			a_pattern_not_empty: not a_pattern.is_empty
			start_large_enough: start_pos >= 1
			end_pos_large_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= a_string.count
		local
			l_pattern_count, l_index: INTEGER_32
		do
			l_index := substring_index_with_deltas (a_string, a_pattern, start_pos, end_pos)
			if l_index > 0 then
				from
					l_pattern_count := a_pattern.count
					create Result.make ((((end_pos - start_pos).max (3) // (l_index + l_pattern_count)) // 4).max (2))
				until
					l_index = 0
				loop
					Result.extend (l_index)
					l_index := substring_index_with_deltas (a_string, a_pattern, l_index + l_pattern_count, end_pos)
				end
			end
		ensure -- from STRING_SEARCHER
			matches: Result /= Void implies not Result.is_empty
		end

	substring_index_with_deltas (a_string: like string_type; a_pattern: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Position of first occurrence of a_pattern at or after start_pos in a_string.
			-- 0 if there are no matches.
		require -- from STRING_SEARCHER
			a_string_not_void: a_string /= Void
			a_pattern_not_void: a_pattern /= Void
			a_pattern_not_empty: not a_pattern.is_empty
			start_large_enough: start_pos >= 1
			end_pos_large_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= a_string.count
		local
			i, j, l_end_pos: INTEGER_32
			l_pattern_count: INTEGER_32
			l_matched: BOOLEAN
			l_deltas: like deltas
			l_area: SPECIAL [CHARACTER_8]
			l_area_lower: INTEGER_32
		do
			if a_string = a_pattern then
				if start_pos = 1 then
					Result := 1
				end
			else
				l_pattern_count := a_pattern.count
				check
					l_pattern_count_positive: l_pattern_count > 0
				end
				from
					l_area := a_string.area
					l_area_lower := a_string.area_lower
					i := start_pos + l_area_lower
					l_deltas := deltas
					l_end_pos := end_pos + 1 + l_area_lower
				until
					i + l_pattern_count > l_end_pos
				loop
					from
						j := 0
						l_matched := True
					until
						j = l_pattern_count
					loop
						if l_area.item (i + j - 1).natural_32_code /= a_pattern.code (j + 1) then
							j := l_pattern_count - 1
							l_matched := False
						end
						j := j + 1
					end
					if l_matched then
						Result := i - l_area_lower
						i := l_end_pos
					else
						if i + l_pattern_count <= end_pos then
							i := i + l_deltas.item (l_area.item (i + l_pattern_count - 1).code)
						else
							i := i + 1
						end
					end
				end
			end
		end
	
invariant
		-- from STRING_SEARCHER
	deltas_not_void: deltas /= Void
	deltas_valid: deltas.capacity = Max_code_point_value + 1

		-- from ANY
	reflexive_equality: standard_is_equal (Current)
	reflexive_conformance: conforms_to (Current)

note
	copyright: "Copyright (c) 1984-2017, Eiffel Software and others"
	license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
	source: "[
		Eiffel Software
		5949 Hollister Ave., Goleta, CA 93117 USA
		Telephone 805-685-1006, Fax 805-685-6869
		Website http://www.eiffel.com
		Customer support http://support.eiffel.com
	]"

end -- class STRING_8_SEARCHER

Generated by ISE EiffelStudio