19 #include <sdsl/suffix_trees.hpp>
50 template <semialphabet alphabet_t, text_layout text_layout_mode_, std::ranges::range text_t>
55 static_assert(std::ranges::bidirectional_range<text_t>,
"The text must model bidirectional_range.");
57 "The alphabet of the text collection must be convertible to the alphabet of the index.");
58 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
60 if (std::ranges::empty(text))
65 static_assert(std::ranges::bidirectional_range<text_t>,
66 "The text collection must model bidirectional_range.");
67 static_assert(std::ranges::bidirectional_range<std::ranges::range_reference_t<text_t>>,
68 "The elements of the text collection must model bidirectional_range.");
70 "The alphabet of the text collection must be convertible to the alphabet of the index.");
71 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
73 if (std::ranges::empty(text))
86 template <
typename index_t>
87 class fm_index_cursor;
89 template <
typename index_t>
90 class bi_fm_index_cursor;
96 detail::sdsl_index sdsl_index_type_>
97 class reverse_fm_index;
137 sdsl::csa_wt<sdsl::wt_blcd<sdsl::bit_vector,
138 sdsl::rank_support_v<>,
139 sdsl::select_support_scan<>,
140 sdsl::select_support_scan<0>>,
143 sdsl::sa_order_sa_sampling<>,
144 sdsl::isa_sampling<>,
145 sdsl::plain_byte_alphabet>;
240 template <std::ranges::range text_t>
246 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
248 constexpr
auto sigma = alphabet_size<alphabet_t>;
255 sdsl::int_vector<8> tmp_text(std::ranges::distance(text));
261 if constexpr (sigma == 256)
265 "character alphabets the last one/two values are reserved"
266 "(single sequence/collection).");
270 | std::views::reverse,
271 std::ranges::begin(tmp_text));
273 sdsl::construct_im(
index, tmp_text, 0);
282 template <std::ranges::range text_t>
288 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
292 for (
auto && t : text)
293 text_sizes.
push_back(std::ranges::distance(t));
295 size_t const number_of_texts{text_sizes.
size()};
300 if (number_of_texts == text_size)
303 constexpr
auto sigma = alphabet_size<alphabet_t>;
308 sdsl::sd_vector_builder builder(text_size, number_of_texts);
309 size_t prefix_sum{0};
311 for (
auto &&
size : text_sizes)
313 builder.set(prefix_sum);
314 prefix_sum +=
size + 1;
322 sdsl::int_vector<8> tmp_text(text_size - (number_of_texts > 1));
324 constexpr uint8_t delimiter = sigma >= 255 ? 255 : sigma + 1;
332 if constexpr (sigma >= 255)
336 " for full character alphabets the last one/"
337 "two values are reserved (single sequence/"
343 | views::join(delimiter),
344 std::ranges::begin(tmp_text));
349 if (number_of_texts == 1)
350 tmp_text.back() = delimiter;
352 std::ranges::reverse(tmp_text);
359 if (number_of_texts == 1)
361 std::ranges::reverse(tmp_text.begin(), tmp_text.end() - 1);
362 tmp_text.back() = delimiter;
366 std::ranges::reverse(tmp_text);
370 sdsl::construct_im(
index, tmp_text, 0);
388 template <
typename bi_fm_index_t>
391 template <
typename fm_index_t>
394 template <
typename fm_index_t>
443 template <std::ranges::b
idirectional_range text_t>
512 return !(*
this == rhs);
541 template <cereal_archive archive_t>
551 auto sigma = alphabet_size<alphabet_t>;
553 if (sigma != alphabet_size<alphabet_t>)
556 " but it is being read into an fm_index with an alphabet of size " +
565 (tmp ?
"text collection" :
"single text") +
566 " but it is being read into an fm_index expecting a " +
578 template <std::ranges::range text_t>
607 template <std::ranges::range text_t>
612 auto reverse_text = text | std::views::reverse;
617 auto reverse_text = text | views::deep{std::views::reverse} | std::views::reverse;
626 template <std::ranges::b
idirectional_range text_t>
Adaptations of algorithms from the Ranges TS.
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:58
An FM Index specialisation that handles reversing the given text.
Definition: fm_index.hpp:604
void construct_(text_t &&text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition: fm_index.hpp:608
reverse_fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition: fm_index.hpp:627
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:90
The SeqAn FM Index.
Definition: fm_index.hpp:194
sdsl_index_type_ sdsl_index_type
The type of the underlying SDSL index.
Definition: fm_index.hpp:200
fm_index()=default
Defaulted.
static constexpr text_layout text_layout_mode
Indicates whether index is built over a collection.
Definition: fm_index.hpp:375
cursor_type cursor() const noexcept
Returns a seqan3::fm_index_cursor on the index that can be used for searching. Cursor is pointing to ...
Definition: fm_index.hpp:529
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:461
sdsl::rank_support_sd< 1 > text_begin_rs
Rank support for text_begin.
Definition: fm_index.hpp:219
sdsl_index_type index
Underlying index from the SDSL.
Definition: fm_index.hpp:212
bool operator!=(fm_index const &rhs) const noexcept
Compares two indices.
Definition: fm_index.hpp:510
typename sdsl_index_type::alphabet_type::sigma_type sdsl_sigma_type
The type of the alphabet size of the underlying SDSL index.
Definition: fm_index.hpp:206
bool empty() const noexcept
Checks whether the index is empty.
Definition: fm_index.hpp:477
bool operator==(fm_index const &rhs) const noexcept
Compares two indices.
Definition: fm_index.hpp:493
typename sdsl_index_type::alphabet_type::char_type sdsl_char_type
The type of the reduced alphabet type. (The reduced alphabet might be smaller than the original alpha...
Definition: fm_index.hpp:204
fm_index & operator=(fm_index rhs)
When copy/move assigning, also update internal data structures.
Definition: fm_index.hpp:420
~fm_index()=default
Defaulted.
alphabet_t alphabet_type
The type of the underlying character of the indexed text.
Definition: fm_index.hpp:381
fm_index(fm_index &&rhs)
When move constructing, also update internal data structures.
Definition: fm_index.hpp:411
void construct(text_t &&text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition: fm_index.hpp:244
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: fm_index.hpp:383
sdsl::sd_vector text_begin
Bitvector storing begin positions for collections.
Definition: fm_index.hpp:215
sdsl::select_support_sd< 1 > text_begin_ss
Select support for text_begin.
Definition: fm_index.hpp:217
fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition: fm_index.hpp:444
fm_index(fm_index const &rhs)
When copy constructing, also update internal data structures.
Definition: fm_index.hpp:403
Provides various transformation traits used by the range module.
Provides the internal representation of a node of the seqan3::fm_index_cursor.
This header includes C++17 filesystem support and imports it into namespace std::filesystem (independ...
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:730
typename range_innermost_value< t >::type range_innermost_value_t
Shortcut for seqan3::range_innermost_value (transformation_trait shortcut).
Definition: type_traits.hpp:240
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: concept.hpp:82
sdsl_wt_index_type default_sdsl_index_type
The default FM Index Configuration.
Definition: fm_index.hpp:151
sdsl::csa_wt< sdsl::wt_blcd< sdsl::bit_vector, sdsl::rank_support_v<>, sdsl::select_support_scan<>, sdsl::select_support_scan< 0 > >, 16, 10 '000 '000, sdsl::sa_order_sa_sampling<>, sdsl::isa_sampling<>, sdsl::plain_byte_alphabet > sdsl_wt_index_type
The FM Index Configuration using a Wavelet Tree.
Definition: fm_index.hpp:145
fm_index(text_t &&) -> fm_index< range_innermost_value_t< text_t >, text_layout
Deduces the alphabet and dimensions of the text.
Definition: fm_index.hpp:579
@ single
The text is a single range.
Definition: concept.hpp:84
@ collection
The text is a range of ranges.
Definition: concept.hpp:86
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:434
auto const to_rank
A view that calls seqan3::to_rank() on each element in the input range.
Definition: to_rank.hpp:67
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:70
The basis for seqan3::alphabet, but requires only rank interface (not char).
Provides seqan3::views::join.
The internal SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
Adaptations of concepts from the Ranges TS.
Provides the concept for seqan3::detail::sdsl_index.
Internal representation of the node of an FM index cursor.
Definition: fm_index_cursor.hpp:34
Class used to validate the requirements on the input text of the fm_index.
Definition: fm_index.hpp:35
static void validate(text_t &&text)
Validates the fm_index template parameters and text.
Definition: fm_index.hpp:51
Provides seqan3::views::to.
Provides seqan3::views::to_rank.