note description: "[ Helper to perform efficient search of a string in another one. Note: The algorithm used is the one described in Communications of the ACM, volume 33, number 8, August 1990, by Daniel M. Sunday. The fuzzy version was presented by Peter R. Sibbald in Communications of the ACM, volume 35, number 4, April 1992 (Technical Correspondance). ]" library: "Free implementation of ELKS library" status: "See notice at end of class." legal: "See notice at end of class." date: "$Date: 2017-03-28 12:36:24 +0000 (Tue, 28 Mar 2017) $" revision: "$Revision: 100064 $" frozen class STRING_8_SEARCHER create make feature {NONE} -- Initialization default_create -- Process instances of classes with no creation clause. -- (Default: do nothing.) -- (from ANY) do end make -- Initialize current -- (from STRING_SEARCHER) do create deltas.make_empty (Max_code_point_value + 1) end feature -- Initialization initialize_deltas (a_pattern: READABLE_STRING_GENERAL) -- Initialize deltas with a_pattern. -- Optimized for the top Max_code_point_value characters only. -- (from STRING_SEARCHER) require -- from STRING_SEARCHER a_pattern_not_void: a_pattern /= Void do internal_initialize_deltas (a_pattern, a_pattern.count, deltas) end feature -- Access generating_type: TYPE [detachable STRING_8_SEARCHER] -- Type of current object -- (type of which it is a direct instance) -- (from ANY) external "built_in" ensure -- from ANY generating_type_not_void: Result /= Void end generator: STRING_8 -- Name of current object's generating class -- (base class of the type of which it is a direct instance) -- (from ANY) external "built_in" ensure -- from ANY generator_not_void: Result /= Void generator_not_empty: not Result.is_empty end Max_code_point_value: INTEGER_32 = 255 -- Maximum character value for which we optimize the lookup of a pattern. -- If any item's code of the searched string is above that limit, search will -- not be as efficient. -- For STRING_8, it is limited to the number of characters in the extended ASCII character set. string_type: READABLE_STRING_8 -- Type of strings Current manipulate to perform searches. do Result := "" ensure -- from STRING_SEARCHER for_typing_only: False end feature -- Comparison frozen deep_equal (a: detachable ANY; b: like arg #1): BOOLEAN -- Are a and b either both void -- or attached to isomorphic object structures? -- (from ANY) do if a = Void then Result := b = Void else Result := b /= Void and then a.is_deep_equal (b) end ensure -- from ANY instance_free: class shallow_implies_deep: standard_equal (a, b) implies Result both_or_none_void: (a = Void) implies (Result = (b = Void)) same_type: (Result and (a /= Void)) implies (b /= Void and then a.same_type (b)) symmetric: Result implies deep_equal (b, a) end frozen equal (a: detachable ANY; b: like arg #1): BOOLEAN -- Are a and b either both void or attached -- to objects considered equal? -- (from ANY) do if a = Void then Result := b = Void else Result := b /= Void and then a.is_equal (b) end ensure -- from ANY instance_free: class definition: Result = (a = Void and b = Void) or else ((a /= Void and b /= Void) and then a.is_equal (b)) end frozen is_deep_equal alias "≡≡≡" (other: STRING_8_SEARCHER): BOOLEAN -- Are Current and other attached to isomorphic object structures? -- (from ANY) require -- from ANY other_not_void: other /= Void external "built_in" ensure -- from ANY shallow_implies_deep: standard_is_equal (other) implies Result same_type: Result implies same_type (other) symmetric: Result implies other.is_deep_equal (Current) end is_equal (other: STRING_8_SEARCHER): BOOLEAN -- Is other attached to an object considered -- equal to current object? -- (from ANY) require -- from ANY other_not_void: other /= Void external "built_in" ensure -- from ANY symmetric: Result implies other ~ Current consistent: standard_is_equal (other) implies Result end frozen standard_equal (a: detachable ANY; b: like arg #1): BOOLEAN -- Are a and b either both void or attached to -- field-by-field identical objects of the same type? -- Always uses default object comparison criterion. -- (from ANY) do if a = Void then Result := b = Void else Result := b /= Void and then a.standard_is_equal (b) end ensure -- from ANY instance_free: class definition: Result = (a = Void and b = Void) or else ((a /= Void and b /= Void) and then a.standard_is_equal (b)) end frozen standard_is_equal alias "≜" (other: STRING_8_SEARCHER): BOOLEAN -- Is other attached to an object of the same type -- as current object, and field-by-field identical to it? -- (from ANY) require -- from ANY other_not_void: other /= Void external "built_in" ensure -- from ANY same_type: Result implies same_type (other) symmetric: Result implies other.standard_is_equal (Current) end feature -- Status report conforms_to (other: ANY): BOOLEAN -- Does type of current object conform to type -- of other (as per Eiffel: The Language, chapter 13)? -- (from ANY) require -- from ANY other_not_void: other /= Void external "built_in" end same_type (other: ANY): BOOLEAN -- Is type of current object identical to type of other? -- (from ANY) require -- from ANY other_not_void: other /= Void external "built_in" ensure -- from ANY definition: Result = (conforms_to (other) and other.conforms_to (Current)) end feature -- Duplication frozen clone (other: detachable ANY): like other obsolete "Use `twin' instead. [2017-05-31]" -- Void if other is void; otherwise new object -- equal to other -- -- For non-void other, clone calls copy; -- to change copying/cloning semantics, redefine copy. -- (from ANY) do if other /= Void then Result := other.twin end ensure -- from ANY instance_free: class equal: Result ~ other end copy (other: STRING_8_SEARCHER) -- Update current object using fields of object attached -- to other, so as to yield equal objects. -- (from ANY) require -- from ANY other_not_void: other /= Void type_identity: same_type (other) external "built_in" ensure -- from ANY is_equal: Current ~ other end frozen deep_clone (other: detachable ANY): like other obsolete "Use `deep_twin' instead. [2017-05-31]" -- Void if other is void: otherwise, new object structure -- recursively duplicated from the one attached to other -- (from ANY) do if other /= Void then Result := other.deep_twin end ensure -- from ANY instance_free: class deep_equal: deep_equal (other, Result) end frozen deep_copy (other: STRING_8_SEARCHER) -- Effect equivalent to that of: -- copy (other . deep_twin) -- (from ANY) require -- from ANY other_not_void: other /= Void do copy (other.deep_twin) ensure -- from ANY deep_equal: deep_equal (Current, other) end frozen deep_twin: STRING_8_SEARCHER -- New object structure recursively duplicated from Current. -- (from ANY) external "built_in" ensure -- from ANY deep_twin_not_void: Result /= Void deep_equal: deep_equal (Current, Result) end frozen standard_clone (other: detachable ANY): like other obsolete "Use `standard_twin' instead. [2017-05-31]" -- Void if other is void; otherwise new object -- field-by-field identical to other. -- Always uses default copying semantics. -- (from ANY) do if other /= Void then Result := other.standard_twin end ensure -- from ANY instance_free: class equal: standard_equal (Result, other) end frozen standard_copy (other: STRING_8_SEARCHER) -- Copy every field of other onto corresponding field -- of current object. -- (from ANY) require -- from ANY other_not_void: other /= Void type_identity: same_type (other) external "built_in" ensure -- from ANY is_standard_equal: standard_is_equal (other) end frozen standard_twin: STRING_8_SEARCHER -- New object field-by-field identical to other. -- Always uses default copying semantics. -- (from ANY) external "built_in" ensure -- from ANY standard_twin_not_void: Result /= Void equal: standard_equal (Result, Current) end frozen twin: STRING_8_SEARCHER -- New object equal to Current -- twin calls copy; to change copying/twinning semantics, redefine copy. -- (from ANY) external "built_in" ensure -- from ANY twin_not_void: Result /= Void is_equal: Result ~ Current end feature -- Basic operations frozen as_attached: attached STRING_8_SEARCHER obsolete "Remove calls to this feature. [2017-05-31]" -- Attached version of Current. -- (Can be used during transitional period to convert -- non-void-safe classes to void-safe ones.) -- (from ANY) do Result := Current end frozen default: detachable STRING_8_SEARCHER -- Default value of object's type -- (from ANY) do end frozen default_pointer: POINTER -- Default value of type POINTER -- (Avoid the need to write p.default for -- some p of type POINTER.) -- (from ANY) do ensure -- from ANY instance_free: class end default_rescue -- Process exception for routines with no Rescue clause. -- (Default: do nothing.) -- (from ANY) do end frozen do_nothing -- Execute a null action. -- (from ANY) do ensure -- from ANY instance_free: class end feature {NONE} -- Implementation initialize_fuzzy_deltas (a_pattern: READABLE_STRING_GENERAL; fuzzy: INTEGER_32) -- Compile a_pattern by computing the delta shift tables from a pattern -- string. This has to be done before searching occurs. The first delta -- table is computed with the full pattern, the second one is computed with -- the rightmost character removed, and so on. A total of (fuzzy + 1) tables -- are computed and stored in deltas_array. -- (from STRING_SEARCHER) require -- from STRING_SEARCHER a_pattern_not_void: a_pattern /= Void fuzzy_positive: fuzzy > 0 local l_deltas: like deltas l_deltas_array: SPECIAL [like deltas] i, l_fuzzy: INTEGER_32 l_pattern_count: INTEGER_32 do from l_pattern_count := a_pattern.count l_fuzzy := fuzzy + 1 create l_deltas_array.make_empty (l_fuzzy) until i = l_fuzzy loop create l_deltas.make_empty (Max_code_point_value + 1); l_deltas_array.extend (l_deltas) internal_initialize_deltas (a_pattern, l_pattern_count - i, l_deltas) i := i + 1 end deltas_array := l_deltas_array ensure -- from STRING_SEARCHER deltas_array_not_void: deltas_array /= Void deltas_array_count_set: attached {SPECIAL [like deltas]} deltas_array as delta and then delta.count = fuzzy + 1 end internal_initialize_deltas (a_pattern: READABLE_STRING_GENERAL; a_pattern_count: INTEGER_32; a_deltas: like deltas) -- Initialize a_deltas with a_pattern. -- Optimized for the top Max_code_point_value characters only. -- (from STRING_SEARCHER) require -- from STRING_SEARCHER a_pattern_not_void: a_pattern /= Void a_pattern_count_not_negative: a_pattern_count >= 0 a_pattern_count_valid: a_pattern_count <= a_pattern.count a_deltas_not_void: a_deltas /= Void a_deltas_valid: a_deltas.capacity = Max_code_point_value + 1 local i, l_char_code: INTEGER_32 do a_deltas.fill_with (a_pattern_count + 1, 0, Max_code_point_value) from i := 0 until i = a_pattern_count loop l_char_code := a_pattern.code (i + 1).to_integer_32 if l_char_code <= Max_code_point_value then a_deltas.put (a_pattern_count - i, l_char_code) end i := i + 1 end end feature {NONE} -- Implementation: Access deltas: SPECIAL [INTEGER_32] -- Record shifting deltas. -- (from STRING_SEARCHER) deltas_array: detachable SPECIAL [like deltas] -- Record shifting deltas for fuzzy search. -- (from STRING_SEARCHER) feature -- Output Io: STD_FILES -- Handle to standard file setup -- (from ANY) once create Result; Result.set_output_default ensure -- from ANY instance_free: class io_not_void: Result /= Void end out: STRING_8 -- New string containing terse printable representation -- of current object -- (from ANY) do Result := tagged_out ensure -- from ANY out_not_void: Result /= Void end print (o: detachable ANY) -- Write terse external representation of o -- on standard output. -- (from ANY) local s: READABLE_STRING_8 do if attached o then s := o.out if attached {READABLE_STRING_32} s as s32 then Io.put_string_32 (s32) elseif attached {READABLE_STRING_8} s as s8 then Io.put_string (s8) else Io.put_string_32 (s.as_string_32) end end ensure -- from ANY instance_free: class end frozen tagged_out: STRING_8 -- New string containing terse printable representation -- of current object -- (from ANY) external "built_in" ensure -- from ANY tagged_out_not_void: Result /= Void end feature -- Platform Operating_environment: OPERATING_ENVIRONMENT -- Objects available from the operating system -- (from ANY) once create Result ensure -- from ANY instance_free: class operating_environment_not_void: Result /= Void end feature {NONE} -- Retrieval frozen internal_correct_mismatch -- Called from runtime to perform a proper dynamic dispatch on correct_mismatch -- from MISMATCH_CORRECTOR. -- (from ANY) local l_msg: STRING_32 l_exc: EXCEPTIONS do if attached {MISMATCH_CORRECTOR} Current as l_corrector then l_corrector.correct_mismatch else create l_msg.make_from_string ("Mismatch: ".as_string_32) create l_exc; l_msg.append (generating_type.name_32); l_exc.raise_retrieval_exception (l_msg) end end feature -- Search fuzzy_index (a_string: like string_type; a_pattern: READABLE_STRING_GENERAL; start_pos, end_pos, fuzzy: INTEGER_32): INTEGER_32 -- Position of first occurrence of a_pattern at or after start_pos in -- a_string with 0..fuzzy mismatches between a_string and a_pattern. -- 0 if there are no fuzzy matches. require -- from STRING_SEARCHER a_string_not_void: a_string /= Void a_pattern_not_void: a_pattern /= Void a_pattern_not_empty: not a_pattern.is_empty start_large_enough: start_pos >= 1 end_pos_large_enough: start_pos <= end_pos + 1 end_pos_small_enough: end_pos <= a_string.count fuzzy_non_negative: fuzzy >= 0 acceptable_fuzzy: fuzzy <= a_pattern.count local i, j, l_min_offset, l_end_pos: INTEGER_32 l_pattern_count, l_nb_mismatched: INTEGER_32 l_matched: BOOLEAN l_char_code: INTEGER_32 l_deltas_array: like deltas_array l_area: SPECIAL [CHARACTER_8] l_area_lower: INTEGER_32 do if fuzzy = a_pattern.count then Result := start_pos else if fuzzy = 0 then Result := substring_index (a_string, a_pattern, start_pos, end_pos) else initialize_fuzzy_deltas (a_pattern, fuzzy) l_deltas_array := deltas_array if l_deltas_array /= Void then from l_pattern_count := a_pattern.count l_area := a_string.area l_area_lower := a_string.area_lower i := start_pos + l_area_lower l_end_pos := end_pos + 1 + l_area_lower until i + l_pattern_count > l_end_pos loop from j := 0 l_nb_mismatched := 0 l_matched := True until j = l_pattern_count loop if l_area.item (i + j - 1).natural_32_code /= a_pattern.code (j + 1) then l_nb_mismatched := l_nb_mismatched + 1 if l_nb_mismatched > fuzzy then j := l_pattern_count - 1 l_matched := False end end j := j + 1 end if l_matched then Result := i - l_area_lower i := l_end_pos else if i + l_pattern_count <= end_pos then from j := 0 l_min_offset := l_pattern_count + 1 until j > fuzzy loop l_char_code := l_area.item (i + l_pattern_count - j - 1).code l_min_offset := l_min_offset.min (l_deltas_array.item (j).item (l_char_code)) j := j + 1 end i := i + l_min_offset else i := i + 1 end end end end deltas_array := Void end end end substring_index (a_string: like string_type; a_pattern: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): INTEGER_32 -- Position of first occurrence of a_pattern at or after start_pos -- and before end_pos in a_string. -- 0 if there are no matches. -- (from STRING_SEARCHER) require -- from STRING_SEARCHER a_string_not_void: a_string /= Void a_pattern_not_void: a_pattern /= Void start_large_enough: start_pos >= 1 end_pos_large_enough: start_pos <= end_pos + 1 end_pos_small_enough: end_pos <= a_string.count do if a_string = a_pattern then if start_pos = 1 then Result := 1 end else if a_pattern.count = 0 then Result := start_pos elseif a_pattern.count <= end_pos - start_pos + 1 then internal_initialize_deltas (a_pattern, a_pattern.count, deltas) Result := substring_index_with_deltas (a_string, a_pattern, start_pos, end_pos) end end end substring_index_list_with_deltas (a_string: like string_type; a_pattern: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): detachable ARRAYED_LIST [INTEGER_32] -- Index positions of all occurrences of a_pattern at or after start_pos until end_pos in a_string. -- Result is Void if there are no matches. -- (from STRING_SEARCHER) require -- from STRING_SEARCHER a_string_not_void: a_string /= Void a_pattern_not_void: a_pattern /= Void a_pattern_not_empty: not a_pattern.is_empty start_large_enough: start_pos >= 1 end_pos_large_enough: start_pos <= end_pos + 1 end_pos_small_enough: end_pos <= a_string.count local l_pattern_count, l_index: INTEGER_32 do l_index := substring_index_with_deltas (a_string, a_pattern, start_pos, end_pos) if l_index > 0 then from l_pattern_count := a_pattern.count create Result.make ((((end_pos - start_pos).max (3) // (l_index + l_pattern_count)) // 4).max (2)) until l_index = 0 loop Result.extend (l_index) l_index := substring_index_with_deltas (a_string, a_pattern, l_index + l_pattern_count, end_pos) end end ensure -- from STRING_SEARCHER matches: Result /= Void implies not Result.is_empty end substring_index_with_deltas (a_string: like string_type; a_pattern: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): INTEGER_32 -- Position of first occurrence of a_pattern at or after start_pos in a_string. -- 0 if there are no matches. require -- from STRING_SEARCHER a_string_not_void: a_string /= Void a_pattern_not_void: a_pattern /= Void a_pattern_not_empty: not a_pattern.is_empty start_large_enough: start_pos >= 1 end_pos_large_enough: start_pos <= end_pos + 1 end_pos_small_enough: end_pos <= a_string.count local i, j, l_end_pos: INTEGER_32 l_pattern_count: INTEGER_32 l_matched: BOOLEAN l_deltas: like deltas l_area: SPECIAL [CHARACTER_8] l_area_lower: INTEGER_32 do if a_string = a_pattern then if start_pos = 1 then Result := 1 end else l_pattern_count := a_pattern.count check l_pattern_count_positive: l_pattern_count > 0 end from l_area := a_string.area l_area_lower := a_string.area_lower i := start_pos + l_area_lower l_deltas := deltas l_end_pos := end_pos + 1 + l_area_lower until i + l_pattern_count > l_end_pos loop from j := 0 l_matched := True until j = l_pattern_count loop if l_area.item (i + j - 1).natural_32_code /= a_pattern.code (j + 1) then j := l_pattern_count - 1 l_matched := False end j := j + 1 end if l_matched then Result := i - l_area_lower i := l_end_pos else if i + l_pattern_count <= end_pos then i := i + l_deltas.item (l_area.item (i + l_pattern_count - 1).code) else i := i + 1 end end end end end invariant -- from STRING_SEARCHER deltas_not_void: deltas /= Void deltas_valid: deltas.capacity = Max_code_point_value + 1 -- from ANY reflexive_equality: standard_is_equal (Current) reflexive_conformance: conforms_to (Current) note copyright: "Copyright (c) 1984-2017, Eiffel Software and others" license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)" source: "[ Eiffel Software 5949 Hollister Ave., Goleta, CA 93117 USA Telephone 805-685-1006, Fax 805-685-6869 Website http://www.eiffel.com Customer support http://support.eiffel.com ]" end -- class STRING_8_SEARCHER
Generated by ISE EiffelStudio