18#include <sdsl/suffix_trees.hpp>
53template <
typename index_t>
72 index_type::text_layout_mode,
73 typename index_type::sdsl_index_type>>;
146 template <
typename csa_t>
147 requires (std::same_as<csa_t, typename index_type::sdsl_index_type>
148 || std::same_as<csa_t, typename index_type::rev_sdsl_index_type>)
bool
156 assert((l_fwd <= r_fwd) && (r_fwd < csa.size()));
157 assert(r_fwd + 1 >= l_fwd);
158 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
160 size_type _l_fwd, _r_fwd, _l_bwd, _r_bwd;
163 if constexpr (!std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
165 cc = csa.char2comp[c];
166 if (cc == 0 && c > 0)
171 if (r_fwd + 1 - l_fwd == csa.size())
175 _r_fwd = csa.C[cc + 1] - 1;
181 auto const r_s_b = csa.wavelet_tree.lex_count(l_fwd, r_fwd + 1, c);
182 size_type const rank_l = std::get<0>(r_s_b);
183 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b);
184 size_type const rank_r = r_fwd - l_fwd - s - b + rank_l;
185 _l_fwd = c_begin + rank_l;
186 _r_fwd = c_begin + rank_r;
191 if (_r_fwd >= _l_fwd)
197 assert(r_fwd + 1 >= l_fwd);
198 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
205 template <
typename csa_t>
206 requires (std::same_as<csa_t, typename index_type::sdsl_index_type>
207 || std::same_as<csa_t, typename index_type::rev_sdsl_index_type>)
bool
217 assert((l_parent <= r_parent) && (r_parent < csa.size()));
220 if constexpr (std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
223 c_begin = csa.C[csa.char2comp[c]];
225 auto const r_s_b = csa.wavelet_tree.lex_count(l_parent, r_parent + 1, c);
226 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b), rank_l = std::get<0>(r_s_b),
227 rank_r = r_parent - l_parent - s - b + rank_l;
229 size_type const _l_fwd = c_begin + rank_l;
230 size_type const _r_fwd = c_begin + rank_r;
232 size_type const _r_bwd = r_bwd + 1 + rank_r - rank_l;
234 if (_r_fwd >= _l_fwd)
240 assert(r_fwd + 1 >= l_fwd);
241 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
265 fwd_rb(_index.size() - 1),
267 rev_rb(_index.size() - 1),
287 assert(
index !=
nullptr);
309 assert(
index !=
nullptr);
311 return !(*
this == rhs);
337 assert(
index !=
nullptr);
344 index->fwd_fm.index.comp2char[c],
389 assert(
index !=
nullptr);
396 index->rev_fm.index.comp2char[c],
431 template <
typename char_t>
432 requires std::convertible_to<char_t, index_alphabet_type>
bool
439 assert(
index !=
nullptr);
462 template <
typename char_type>
463 requires seqan3::detail::is_char_adaptation_v<char_type>
bool
482 template <
typename char_t>
483 requires std::convertible_to<char_t, index_alphabet_type>
bool
490 assert(
index !=
nullptr);
513 template <
typename char_type>
514 requires seqan3::detail::is_char_adaptation_v<char_type>
bool
536 template <std::ranges::range seq_t>
539 static_assert(std::ranges::forward_range<seq_t>,
"The query must model forward_range.");
541 "The alphabet of the sequence must be convertible to the alphabet of the index.");
543 assert(
index !=
nullptr);
546 auto last = std::ranges::end(
seq);
557 for (
auto it = first; it != last; ++len, ++it)
566 new_parent_lb = _fwd_lb;
567 new_parent_rb = _fwd_rb;
606 template <std::ranges::range seq_t>
609 static_assert(std::ranges::bidirectional_range<seq_t>,
"The query must model bidirectional_range.");
611 "The alphabet of the sequence must be convertible to the alphabet of the index.");
612 assert(
index !=
nullptr);
614 auto rev_seq = std::views::reverse(
seq);
616 auto last = std::ranges::end(rev_seq);
628 for (
auto it = first; it != last; ++len, ++it)
637 new_parent_lb = _rev_lb;
638 new_parent_rb = _rev_rb;
695 index->fwd_fm.index.comp2char[c],
753 index->rev_fm.index.comp2char[c],
810 assert(
index !=
nullptr);
838 assert(
index !=
nullptr);
873 template <std::ranges::range text_t>
877 static_assert(std::ranges::input_range<text_t>,
"The text must model input_range.");
878 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
880 "The alphabet types of the given text and index differ.");
881 assert(
index !=
nullptr);
888 template <std::ranges::range text_t>
892 static_assert(std::ranges::input_range<text_t>,
"The text collection must model input_range.");
893 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
895 "The alphabet types of the given text and index differ.");
896 assert(
index !=
nullptr);
903 size_type const rank =
index->fwd_fm.text_begin_rs.rank(location + 1);
908 size_type const start_location =
index->fwd_fm.text_begin_ss.select(rank);
910 size_type const query_begin = location - start_location;
948 assert(
index !=
nullptr);
963 assert(
index !=
nullptr);
970 size_type sequence_rank =
index->fwd_fm.text_begin_rs.rank(loc + 1);
971 size_type sequence_position = loc -
index->fwd_fm.text_begin_ss.select(sequence_rank);
992 assert(
index !=
nullptr);
995 | std::views::transform(
996 [*
this, _offset =
offset()](
auto sa_pos)
1006 assert(
index !=
nullptr);
1009 | std::views::transform(
1010 [*
this, _offset =
offset()](
auto sa_pos)
1012 auto loc = _offset -
index->fwd_fm.index[sa_pos];
1013 size_type sequence_rank =
index->fwd_fm.text_begin_rs.rank(loc + 1);
1014 size_type sequence_position = loc -
index->fwd_fm.text_begin_ss.select(sequence_rank);
1026 template <cereal_archive archive_t>
Core alphabet concept and free function/type trait wrappers.
Provides alphabet adaptations for standard char types.
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:55
size_type rev_rb
Right suffix array interval of the reverse cursor (for extend_left).
Definition: bi_fm_index_cursor.hpp:103
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition: bi_fm_index_cursor.hpp:331
typename index_t::sdsl_index_type sdsl_index_type
Type of the SDSL index.
Definition: bi_fm_index_cursor.hpp:80
typename index_type::sdsl_char_type sdsl_char_type
Type of the representation of characters in the underlying SDSL index.
Definition: bi_fm_index_cursor.hpp:78
size_type count() const noexcept
Counts the number of occurrences of the searched query in the text.
Definition: bi_fm_index_cursor.hpp:927
bool extend_left(seq_t &&seq) noexcept
Tries to extend the query by seq to the left.
Definition: bi_fm_index_cursor.hpp:607
bool operator==(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: bi_fm_index_cursor.hpp:285
bool extend_left(char_type const *cstring) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bi_fm_index_cursor.hpp:515
bool fwd_cursor_last_used
Stores the information which cursor has been used last for extend_*([...]) to allow for assert() in.
Definition: bi_fm_index_cursor.hpp:135
auto path_label(text_t &&text) const noexcept
Returns the searched query.
Definition: bi_fm_index_cursor.hpp:874
bool extend_left() noexcept
Tries to extend the query by the smallest possible character to the left such that the query is found...
Definition: bi_fm_index_cursor.hpp:383
index_type const * index
Type of the underlying FM index.
Definition: bi_fm_index_cursor.hpp:91
auto path_label(text_t &&text) const noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bi_fm_index_cursor.hpp:889
typename index_type::sdsl_sigma_type sdsl_sigma_type
Type of the alphabet size in the underlying SDSL index.
Definition: bi_fm_index_cursor.hpp:82
locate_result_type locate() const
Locates the occurrences of the searched query in the text.
Definition: bi_fm_index_cursor.hpp:945
sdsl_sigma_type sigma
Alphabet size of the index without delimiters.
Definition: bi_fm_index_cursor.hpp:107
bool cycle_back() noexcept
Tries to replace the rightmost character of the query by the next lexicographically larger character ...
Definition: bi_fm_index_cursor.hpp:682
bool cycle_front() noexcept
Tries to replace the leftmost character of the query by the next lexicographically larger character s...
Definition: bi_fm_index_cursor.hpp:741
auto lazy_locate() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bi_fm_index_cursor.hpp:1003
typename index_t::alphabet_type index_alphabet_type
Alphabet type of the index.
Definition: bi_fm_index_cursor.hpp:84
fwd_cursor to_fwd_cursor() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the original text. path_label() on the returned u...
Definition: bi_fm_index_cursor.hpp:836
size_type depth
Depth of the node in the suffix tree, i.e. length of the searched query.
Definition: bi_fm_index_cursor.hpp:128
size_type query_length() const noexcept
Returns the depth of the cursor node in the implicit suffix tree, i.e. the length of the sequence sea...
Definition: bi_fm_index_cursor.hpp:808
bool extend_right(char_type const *cstring) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bi_fm_index_cursor.hpp:464
size_type rev_lb
Left suffix array interval of the reverse cursor (for extend_left).
Definition: bi_fm_index_cursor.hpp:101
size_type last_rank() noexcept
Outputs the rightmost respectively leftmost rank depending on whether extend_right() or extend_left()...
Definition: bi_fm_index_cursor.hpp:789
bool bidirectional_search_cycle(csa_t const &csa, sdsl_char_type const c, size_type const l_parent, size_type const r_parent, size_type &l_fwd, size_type &r_fwd, size_type &l_bwd, size_type &r_bwd) const noexcept
Optimized bidirectional search for cycle_back() and cycle_front() without alphabet mapping.
Definition: bi_fm_index_cursor.hpp:208
index_t index_type
Type of the index.
Definition: bi_fm_index_cursor.hpp:58
bool bidirectional_search(csa_t const &csa, sdsl_char_type const c, size_type &l_fwd, size_type &r_fwd, size_type &l_bwd, size_type &r_bwd) const noexcept
Optimized bidirectional search without alphabet mapping.
Definition: bi_fm_index_cursor.hpp:149
typename index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index_cursor.hpp:64
size_type offset() const noexcept
Helper function to recompute text positions since the indexed text is reversed.
Definition: bi_fm_index_cursor.hpp:139
bool operator!=(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: bi_fm_index_cursor.hpp:307
std::vector< std::pair< size_type, size_type > > locate() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bi_fm_index_cursor.hpp:960
size_type parent_rb
Left suffix array interval of the parent node.
Definition: bi_fm_index_cursor.hpp:122
size_type fwd_lb
Left suffix array interval of the forward cursor (for extend_right).
Definition: bi_fm_index_cursor.hpp:97
bi_fm_index_cursor() noexcept=default
Default constructor. Accessing member functions on a default constructed object is undefined behavior...
size_type parent_lb
Left suffix array interval of the parent node.
Definition: bi_fm_index_cursor.hpp:120
auto lazy_locate() const
Locates the occurrences of the searched query in the text on demand, i.e. a std::ranges::view is retu...
Definition: bi_fm_index_cursor.hpp:989
bool extend_right(seq_t &&seq) noexcept
Tries to extend the query by seq to the right.
Definition: bi_fm_index_cursor.hpp:537
bool extend_right(char_t const c) noexcept
Tries to extend the query by the character c to the right.
Definition: bi_fm_index_cursor.hpp:433
bool extend_left(char_t const c) noexcept
Tries to extend the query by the character c to the left.
Definition: bi_fm_index_cursor.hpp:484
size_type fwd_rb
Right suffix array interval of the forward cursor (for extend_right).
Definition: bi_fm_index_cursor.hpp:99
sdsl_char_type _last_char
Label of the last edge moved down. Needed for cycle_back() or cycle_front().
Definition: bi_fm_index_cursor.hpp:124
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:87
The SeqAn FM Index.
Definition: fm_index.hpp:189
Provides various transformation traits used by the range module.
T emplace_back(T... args)
Provides the unidirectional seqan3::fm_index.
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: alphabet/concept.hpp:155
@ seq
The "sequence", usually a range of nucleotides or amino acids.
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: search/fm_index/concept.hpp:91
@ single
The text is a single range.
Definition: search/fm_index/concept.hpp:93
@ collection
The text is a range of ranges.
Definition: search/fm_index/concept.hpp:95
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:178
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::views::slice.
Provides alphabet adaptations for standard uint types.