129 template <
typename stream_type,
130 typename seq_legal_alph_type,
131 typename stream_pos_type,
137 stream_pos_type & position_buffer,
140 qual_type & qualities);
142 template <
typename stream_type,
150 qual_type && qualities);
152 template <
typename stream_type,
153 typename seq_legal_alph_type,
154 typename ref_seqs_type,
155 typename ref_ids_type,
156 typename stream_pos_type,
159 typename ref_seq_type,
160 typename ref_id_type,
161 typename ref_offset_type,
167 typename tag_dict_type,
168 typename e_value_type,
169 typename bit_score_type>
172 ref_seqs_type & ref_seqs,
174 stream_pos_type & position_buffer,
178 ref_seq_type & SEQAN3_DOXYGEN_ONLY(
ref_seq),
181 cigar_type & cigar_vector,
185 tag_dict_type & tag_dict,
186 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
187 bit_score_type & SEQAN3_DOXYGEN_ONLY(
bit_score));
189 template <
typename stream_type,
190 typename header_type,
193 typename ref_seq_type,
194 typename ref_id_type,
197 typename tag_dict_type,
198 typename e_value_type,
199 typename bit_score_type>
202 header_type && header,
206 ref_seq_type && SEQAN3_DOXYGEN_ONLY(
ref_seq),
213 tag_dict_type && tag_dict,
214 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
215 bit_score_type && SEQAN3_DOXYGEN_ONLY(
bit_score));
240 template <
typename t>
243 return std::forward<t>(v);
246 template <arithmetic value_type>
253 template <
typename stream_it_t, std::ranges::forward_range field_type>
256 template <
typename stream_it_t>
259 template <
typename stream_it_t>
264template <
typename stream_type,
265 typename seq_legal_alph_type,
266 typename stream_pos_type,
272 stream_pos_type & position_buffer,
275 qual_type & qualities)
300 if constexpr (!detail::decays_to_ignore_v<seq_type>)
301 if (std::ranges::distance(
sequence) == 0)
302 throw parse_error{
"The sequence information must not be empty."};
303 if constexpr (!detail::decays_to_ignore_v<id_type>)
304 if (std::ranges::distance(
id) == 0)
305 throw parse_error{
"The id information must not be empty."};
312template <
typename stream_type,
320 qual_type && qualities)
345template <
typename stream_type,
346 typename seq_legal_alph_type,
347 typename ref_seqs_type,
348 typename ref_ids_type,
349 typename stream_pos_type,
352 typename ref_seq_type,
353 typename ref_id_type,
354 typename ref_offset_type,
360 typename tag_dict_type,
361 typename e_value_type,
362 typename bit_score_type>
366 ref_seqs_type & ref_seqs,
368 stream_pos_type & position_buffer,
372 ref_seq_type & SEQAN3_DOXYGEN_ONLY(
ref_seq),
375 cigar_type & cigar_vector,
379 tag_dict_type & tag_dict,
380 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
381 bit_score_type & SEQAN3_DOXYGEN_ONLY(
bit_score))
383 static_assert(detail::decays_to_ignore_v<ref_offset_type>
384 || detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
385 "The ref_offset must be a specialisation of std::optional.");
391 int32_t ref_offset_tmp{};
392 std::ranges::range_value_t<
decltype(header.
ref_ids())> ref_id_tmp{};
405 position_buffer = stream.tellg();
410 stream_it.cache_record_into(
'\n',
'\t',
raw_record);
414 if constexpr (!detail::decays_to_ignore_v<id_type>)
417 uint16_t flag_integral{};
427 if (ref_offset_tmp == -1)
429 else if (ref_offset_tmp > -1)
431 else if (ref_offset_tmp < -1)
432 throw format_error{
"No negative values are allowed for field::ref_offset."};
434 if constexpr (!detail::decays_to_ignore_v<mapq_type>)
439 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
444 if constexpr (!detail::decays_to_ignore_v<mate_type>)
446 std::ranges::range_value_t<
decltype(header.
ref_ids())> tmp_mate_ref_id{};
449 if (tmp_mate_ref_id ==
"=")
451 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
465 get<1>(
mate) = --tmp_pnext;
466 else if (tmp_pnext < 0)
467 throw format_error{
"No negative values are allowed at the mate mapping position."};
475 if constexpr (!detail::decays_to_ignore_v<seq_type>)
482 constexpr auto is_legal_alph = char_is_valid_for<seq_legal_alph_type>;
484 for (
size_t i = 0; i < seq_str.
size(); ++i)
486 if (!is_legal_alph(seq_str[i]))
488 + detail::type_name_as_string<seq_legal_alph_type>
501 size_t tag_begin_pos =
raw_record[10].find(
'\t');
504 (tag_begin_pos == std::string_view::npos) ?
raw_record[10] :
raw_record[10].substr(0, tag_begin_pos);
506 if constexpr (!detail::decays_to_ignore_v<qual_type>)
509 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
511 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0
512 && std::ranges::distance(
seq) != std::ranges::distance(
qual))
515 std::ranges::distance(
seq),
516 ") and quality length (",
517 std::ranges::distance(
qual),
518 ") must be the same.")};
524 if constexpr (!detail::decays_to_ignore_v<tag_dict_type>)
526 while (tag_begin_pos != std::string_view::npos)
529 size_t const tag_end_pos =
raw_record[10].find(
'\t', tag_begin_pos);
532 char const * tag_end =
537 tag_begin_pos = tag_end_pos;
541 assert(stream_it == std::default_sentinel_t{} || *stream_it ==
'\n');
546template <
typename stream_type,
547 typename header_type,
550 typename ref_seq_type,
551 typename ref_id_type,
554 typename tag_dict_type,
555 typename e_value_type,
556 typename bit_score_type>
559 header_type && header,
563 ref_seq_type && SEQAN3_DOXYGEN_ONLY(
ref_seq),
570 tag_dict_type && tag_dict,
571 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
572 bit_score_type && SEQAN3_DOXYGEN_ONLY(
bit_score))
591 "The seq object must be a std::ranges::forward_range over "
592 "letters that model seqan3::alphabet.");
595 "The id object must be a std::ranges::forward_range over "
596 "letters that model seqan3::alphabet.");
598 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
600 static_assert((std::ranges::forward_range<ref_id_type> || std::integral<std::remove_reference_t<ref_id_type>>
601 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>),
602 "The ref_id object must be a std::ranges::forward_range "
603 "over letters that model seqan3::alphabet.");
605 if constexpr (std::integral<std::remove_cvref_t<ref_id_type>>
606 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>)
607 static_assert(!detail::decays_to_ignore_v<header_type>,
608 "If you give indices as reference id information the header must also be present.");
612 "The qual object must be a std::ranges::forward_range "
613 "over letters that model seqan3::alphabet.");
616 "The mate object must be a std::tuple of size 3 with "
617 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
618 "2) a std::integral or std::optional<std::integral>, and "
619 "3) a std::integral.");
622 ((std::ranges::forward_range<decltype(std::get<0>(
mate))>
624 || detail::is_type_specialisation_of_v<
626 std::optional>)&&(std::integral<std::remove_cvref_t<decltype(std::get<1>(
mate))>>
627 || detail::is_type_specialisation_of_v<
629 std::optional>)&&std::integral<std::remove_cvref_t<decltype(std::get<2>(
mate))>>),
630 "The mate object must be a std::tuple of size 3 with "
631 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
632 "2) a std::integral or std::optional<std::integral>, and "
633 "3) a std::integral.");
635 if constexpr (std::integral<std::remove_cvref_t<decltype(std::get<0>(
mate))>>
638 static_assert(!detail::decays_to_ignore_v<header_type>,
639 "If you give indices as mate reference id information the header must also be present.");
642 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
647 if constexpr (!detail::decays_to_ignore_v<header_type> && !detail::decays_to_ignore_v<ref_id_type>
648 && !std::integral<std::remove_reference_t<ref_id_type>>
649 && !detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>,
std::optional>)
656 if constexpr (std::ranges::contiguous_range<
decltype(
ref_id)> && std::ranges::sized_range<
decltype(
ref_id)>
657 && std::ranges::borrowed_range<
decltype(
ref_id)>)
666 "The ref_id type is not convertible to the reference id information stored in the "
667 "reference dictionary of the header object.");
675 "' was not in the list of references:",
681 throw format_error{
"The ref_offset object must be a std::integral >= 0."};
686 if constexpr (!detail::decays_to_ignore_v<header_type>)
700 constexpr char separator{
'\t'};
703 *stream_it = separator;
705 stream_it.write_number(
static_cast<uint16_t
>(
flag));
706 *stream_it = separator;
708 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
710 if constexpr (std::integral<std::remove_reference_t<ref_id_type>>)
714 else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>,
std::optional>)
731 *stream_it = separator;
734 stream_it.write_number(
ref_offset.value_or(-1) + 1);
735 *stream_it = separator;
737 stream_it.write_number(
static_cast<unsigned>(
mapq));
738 *stream_it = separator;
740 if (!std::ranges::empty(cigar_vector))
742 for (
auto & c : cigar_vector)
743 stream_it.write_range(c.to_string());
750 *stream_it = separator;
752 if constexpr (std::integral<std::remove_reference_t<decltype(get<0>(
mate))>>)
756 else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<decltype(get<0>(
mate))>,
759 if (get<0>(
mate).has_value())
769 *stream_it = separator;
771 if constexpr (detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(get<1>(
mate))>,
std::optional>)
774 stream_it.write_number(get<1>(
mate).value_or(-1) + 1);
775 *stream_it = separator;
779 stream_it.write_number(get<1>(
mate));
780 *stream_it = separator;
783 stream_it.write_number(get<2>(
mate));
784 *stream_it = separator;
787 *stream_it = separator;
812template <arithmetic value_type>
821 while (start_pos != std::string_view::npos)
823 end_pos = str.
find(
',', start_pos);
824 auto end = (end_pos == std::string_view::npos) ? str.
end() : str.
begin() + end_pos;
826 tmp_vector.push_back(value);
828 start_pos = (end_pos == std::string_view::npos) ? end_pos : end_pos + 1;
830 variant = std::move(tmp_vector);
848 uint8_t dummy_byte{};
850 if (str.
size() % 2 != 0)
851 throw format_error{
"[CORRUPTED SAM FILE] Hexadecimal tag must have even number of digits."};
855 for (
auto hex_begin = str.
begin(), hex_end = str.
begin() + 2; hex_begin != str.
end(); hex_begin += 2, hex_end += 2)
859 if (res.ec == std::errc::invalid_argument)
861 +
"' could not be cast into type uint8_t."};
863 if (res.ec == std::errc::result_out_of_range)
865 +
"' into type uint8_t would cause an overflow."};
870 variant = std::move(tmp_vector);
895 assert(tag_str.
size() > 5);
897 uint16_t tag =
static_cast<uint16_t
>(tag_str[0]) << 8;
898 tag +=
static_cast<uint16_t
>(tag_str[1]);
900 char type_id = tag_str[3];
906 assert(tag_str.
size() == 6);
907 target[tag] = tag_str[5];
936 assert(tag_str.
size() > 6);
937 char array_value_type_id = tag_str[5];
939 switch (array_value_type_id)
964 +
"id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id
971 "SAM tag ([TAG]:[TYPE_ID]:[VALUE]) must be one of [A,i,Z,H,B,f] but '")
972 + type_id +
"' was given."};
983template <
typename stream_it_t, std::ranges::forward_range field_type>
986 if (std::ranges::empty(field_value))
992 if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<field_type>>,
char>)
993 stream_it.write_range(field_value);
1005template <
typename stream_it_t>
1018template <
typename stream_it_t>
1022 auto const stream_variant_fn = [&stream_it](
auto && arg)
1026 if constexpr (std::ranges::input_range<T>)
1028 if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>,
char>)
1030 stream_it.write_range(arg);
1032 else if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>,
std::byte>)
1034 if (!std::ranges::empty(arg))
1038 for (
auto && elem : arg | std::views::drop(1))
1041 stream_it.write_number(std::to_integer<uint8_t>(elem));
1047 if (!std::ranges::empty(arg))
1051 for (
auto && elem : arg | std::views::drop(1))
1054 stream_it.write_number(elem);
1059 else if constexpr (std::same_as<std::remove_cvref_t<T>,
char>)
1065 stream_it.write_number(arg);
1069 for (
auto & [tag, variant] : tag_dict)
1071 *stream_it = separator;
1073 char const char0 = tag / 256;
1074 char const char1 = tag % 256;
Core alphabet concept and free function/type trait wrappers.
Functionally the same as std::istreambuf_iterator, but faster.
Definition: fast_istreambuf_iterator.hpp:40
Functionally the same as std::ostreambuf_iterator, but offers writing a range more efficiently.
Definition: fast_ostreambuf_iterator.hpp:40
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:343
Provides seqan3::detail::fast_ostreambuf_iterator.
auto const to_char
A view that calls seqan3::to_char() on each element in the input range.
Definition: to_char.hpp:63
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition: alphabet/concept.hpp:524
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:76
constexpr char sam_tag_type_char_extra[12]
Each types SAM tag type extra char id. Index corresponds to the seqan3::detail::sam_tag_variant types...
Definition: sam_tag_dictionary.hpp:45
constexpr char sam_tag_type_char[12]
Each SAM tag type char identifier. Index corresponds to the seqan3::detail::sam_tag_variant types.
Definition: sam_tag_dictionary.hpp:42
constexpr std::vector< cigar > parse_cigar(std::string_view const cigar_str)
Parses a cigar string into a vector of operation-count pairs (e.g. (M, 3)).
Definition: io/sam_file/detail/cigar.hpp:90
@ none
None of the flags below are set.
constexpr auto take_until_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until_view.hpp:588
constexpr auto istreambuf
A view factory that returns a view over the stream buffer of an input stream.
Definition: istreambuf_view.hpp:107
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ bit_score
The bit score (statistical significance indicator), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
std::string make_printable(char const c)
Returns a printable value for the given character c.
Definition: pretty_print.hpp:48
constexpr auto is_char
Checks whether a given letter is the same as the template non-type argument.
Definition: predicate.hpp:63
constexpr auto is_space
Checks whether c is a space character.
Definition: predicate.hpp:125
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be implicityly converted to to.
The generic concept for a (biological) sequence.
Whether a type behaves like a tuple.
Auxiliary functions for the SAM IO.
Provides seqan3::detail::istreambuf.
std::string to_string(value_type &&... values)
Streams all parameters via the seqan3::debug_stream and returns a concatenated string.
Definition: to_string.hpp:29
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::sam_file_output_options.
Provides helper data structures for the seqan3::sam_file_output.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides seqan3::sequence_file_output_options.
Provides seqan3::views::slice.
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition: io/exception.hpp:48
The options type defines various option members that influence the behavior of all or some formats.
Definition: sam_file/output_options.hpp:26
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: sam_file/output_options.hpp:30
bool sam_require_header
Whether to require a header for SAM files.
Definition: sam_file/output_options.hpp:44
The options type defines various option members that influence the behaviour of all or some formats.
Definition: sequence_file/output_options.hpp:26
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
Provides seqan3::ranges::to.
Provides seqan3::views::to_char.
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::tuple_like.