34 #include <seqan3/io/detail/record.hpp>
113 template <
typename t>
131 requires std::same_as<typename t::ref_sequences, ref_info_not_given> || requires ()
138 (!std::same_as<typename t::ref_sequences, ref_info_not_given> ||
140 requires std::ranges::forward_range<std::ranges::range_reference_t<typename t::ref_ids>>;
141 requires std::ranges::forward_range<typename t::ref_ids>;
182 template <
typename ref_sequences_t = ref_info_not_given,
typename ref_
ids_t = std::deque<std::
string>>
197 template <
typename _sequence_alphabet>
201 template <
typename _
id_alphabet>
208 template <
typename _quality_alphabet>
368 using dummy_ref_type = decltype(
views::repeat_n(
typename traits_type::sequence_alphabet{},
size_t{}) |
372 using ref_sequence_unsliced_type =
373 detail::lazy_conditional_t<std::ranges::range<typename traits_type::ref_sequences const>,
374 detail::lazy<std::ranges::range_reference_t,
375 typename traits_type::ref_sequences
const>,
379 using ref_sequence_sliced_type = decltype(std::declval<ref_sequence_unsliced_type>() |
views::slice(0, 0));
388 typename traits_type::sequence_alphabet>;
390 using id_type =
typename traits_type::template id_container<char>;
399 using ref_sequence_type = std::conditional_t<std::same_as<typename traits_type::ref_sequences, ref_info_not_given>,
401 ref_sequence_sliced_type>;
421 typename traits_type::quality_alphabet>;
427 using mate_type = std::tuple<ref_id_type, ref_offset_type, int32_t>;
433 using alignment_query_type = std::conditional_t<
436 decltype(std::declval<sequence_type &>() |
views::slice(0, 0))>,
442 using alignment_type = std::tuple<gap_decorator<ref_sequence_type>, alignment_query_type>;
494 static_assert([] () constexpr
496 for (
field f : selected_field_ids::as_array)
497 if (!field_ids::contains(f))
501 "You selected a field that is not valid for alignment files, please refer to the documentation "
502 "of sam_file_input::field_ids for the accepted values.");
524 using iterator = detail::in_file_iterator<sam_file_input>;
566 primary_stream{
new std::ifstream{}, stream_deleter_default}
568 init_by_filename(std::move(filename));
590 template <input_stream stream_t, sam_file_input_format file_format>
592 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type,
stream_char_type>
595 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
597 primary_stream{&stream, stream_deleter_noop}
599 init_by_format<file_format>();
603 template <input_stream stream_t, sam_file_input_format file_format>
605 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type,
stream_char_type>
608 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
610 primary_stream{
new stream_t{std::move(stream)}, stream_deleter_default}
612 init_by_format<file_format>();
639 typename traits_type::ref_ids & ref_ids,
640 typename traits_type::ref_sequences & ref_sequences,
642 primary_stream{
new std::ifstream{}, stream_deleter_default}
645 set_references(ref_ids, ref_sequences);
647 init_by_filename(std::move(filename));
675 template <input_stream stream_t, sam_file_input_format file_format>
677 typename traits_type::ref_ids & ref_ids,
678 typename traits_type::ref_sequences & ref_sequences,
679 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
681 primary_stream{&stream, stream_deleter_noop}
684 set_references(ref_ids, ref_sequences);
686 init_by_format<file_format>();
690 template <input_stream stream_t, sam_file_input_format file_format>
692 typename traits_type::ref_ids & ref_ids,
693 typename traits_type::ref_sequences & ref_sequences,
694 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
696 primary_stream{
new stream_t{std::move(stream)}, stream_deleter_default}
699 set_references(ref_ids, ref_sequences);
701 init_by_format<file_format>();
707 typename traits_type::ref_ids &&,
708 typename traits_type::ref_sequences &&,
711 template <input_stream stream_t, sam_file_input_format file_format>
713 typename traits_type::ref_ids &&,
714 typename traits_type::ref_sequences &&,
743 if (!first_record_was_read)
746 first_record_was_read =
true;
817 if (!first_record_was_read)
820 first_record_was_read =
true;
830 void init_by_filename(std::filesystem::path filename)
832 primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
833 static_cast<std::basic_ifstream<char> *
>(primary_stream.get())->open(filename,
834 std::ios_base::in | std::ios::binary);
836 if (!primary_stream->good())
837 throw file_open_error{
"Could not open file " + filename.string() +
" for reading."};
839 secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
840 detail::set_format(format, filename);
844 template <
typename format_type>
845 void init_by_format()
847 static_assert(list_traits::contains<format_type, valid_formats>,
848 "You selected a format that is not in the valid_formats of this file.");
850 format = detail::sam_file_input_format_exposer<format_type>{};
851 secondary_stream = detail::make_secondary_istream(*primary_stream);
855 std::unique_ptr<header_type> header_ptr{
new header_type{}};
863 std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
870 using stream_ptr_t = std::unique_ptr<std::basic_istream<stream_char_type>,
871 std::function<void(std::basic_istream<stream_char_type>*)>>;
873 static void stream_deleter_noop(std::basic_istream<stream_char_type> *) {}
875 static void stream_deleter_default(std::basic_istream<stream_char_type> * ptr) {
delete ptr; }
878 stream_ptr_t primary_stream{
nullptr, stream_deleter_noop};
880 stream_ptr_t secondary_stream{
nullptr, stream_deleter_noop};
883 bool first_record_was_read{
false};
888 using format_type =
typename detail::variant_from_tags<
valid_formats,
889 detail::sam_file_input_format_exposer>::type;
899 typename traits_type::ref_sequences
const * reference_sequences_ptr{
nullptr};
911 template <std::ranges::forward_range ref_sequences_t>
912 void set_references(
typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
914 assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
916 header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
917 reference_sequences_ptr = &ref_sequences;
920 for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
922 header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]),
"");
924 if constexpr (std::ranges::contiguous_range<std::ranges::range_reference_t<
925 typename traits_type::ref_ids>> &&
926 std::ranges::sized_range<std::ranges::range_reference_t<typename traits_type::ref_ids>> &&
927 std::ranges::borrowed_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>)
929 auto &&
id = header_ptr->ref_ids()[idx];
930 header_ptr->ref_dict[std::span{std::ranges::data(
id),
std::ranges::size(
id)}] = idx;
934 header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
941 void read_next_record()
944 record_buffer.clear();
945 detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
948 if (std::istreambuf_iterator<stream_char_type>{*secondary_stream} ==
949 std::istreambuf_iterator<stream_char_type>{})
955 auto call_read_func = [
this] (
auto & ref_seq_info)
957 std::visit([&] (
auto & f)
959 f.read_alignment_record(*secondary_stream,
963 detail::get_or_ignore<field::seq>(record_buffer),
964 detail::get_or_ignore<field::qual>(record_buffer),
965 detail::get_or_ignore<field::id>(record_buffer),
966 detail::get_or_ignore<field::offset>(record_buffer),
967 detail::get_or_ignore<field::ref_seq>(record_buffer),
968 detail::get_or_ignore<field::ref_id>(record_buffer),
969 detail::get_or_ignore<field::ref_offset>(record_buffer),
970 detail::get_or_ignore<field::alignment>(record_buffer),
971 detail::get_or_ignore<field::cigar>(record_buffer),
972 detail::get_or_ignore<field::flag>(record_buffer),
973 detail::get_or_ignore<field::mapq>(record_buffer),
974 detail::get_or_ignore<field::mate>(record_buffer),
975 detail::get_or_ignore<field::tags>(record_buffer),
976 detail::get_or_ignore<field::evalue>(record_buffer),
977 detail::get_or_ignore<field::bit_score>(record_buffer));
981 assert(!format.valueless_by_exception());
983 if constexpr (!std::same_as<typename traits_type::ref_sequences, ref_info_not_given>)
984 call_read_func(*reference_sequences_ptr);
986 call_read_func(std::ignore);
998 template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_
ids>
1005 template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_
ids>
1012 template <input_stream stream_type, sam_file_input_format file_format>
1019 template <input_stream stream_type, sam_file_input_format file_format>
1026 template <std::ranges::forward_range ref_ids_t,
1027 std::ranges::forward_range ref_sequences_t,
1031 std::remove_reference_t<ref_ids_t>>,
1036 template <std::ranges::forward_range ref_ids_t,
1037 std::ranges::forward_range ref_sequences_t>
1040 std::remove_reference_t<ref_ids_t>>,
1045 template <input_stream stream_type,
1046 std::ranges::forward_range ref_ids_t,
1047 std::ranges::forward_range ref_sequences_t,
1052 std::remove_reference_t<ref_ids_t>>,
1057 template <input_stream stream_type,
1058 std::ranges::forward_range ref_ids_t,
1059 std::ranges::forward_range ref_sequences_t,
1064 std::remove_reference_t<ref_ids_t>>,
1069 template <input_stream stream_type,
1070 std::ranges::forward_range ref_ids_t,
1071 std::ranges::forward_range ref_sequences_t,
1073 sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format
const &)
1075 std::remove_reference_t<ref_ids_t>>,
1080 template <input_stream stream_type,
1081 std::ranges::forward_range ref_ids_t,
1082 std::ranges::forward_range ref_sequences_t,
1084 sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format
const &)
1086 std::remove_reference_t<ref_ids_t>>,
Provides seqan3::aa27, container aliases and string literals.
Provides the seqan3::cigar alphabet.
Provides alphabet adaptations for standard char types.
A combined alphabet that can hold values of either of its alternatives.
Definition: alphabet_variant.hpp:131
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition: dna15.hpp:51
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition: dna5.hpp:51
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:83
Quality type for traditional Sanger and modern Illumina Phred scores.
Definition: phred42.hpp:47
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:334
Provides seqan3::dna15, container aliases and string literals.
Provides seqan3::dna5, container aliases and string literals.
This header includes C++17 filesystem support and imports it into namespace std::filesystem (independ...
Provides seqan3::gap_decorator.
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:74
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ cigar
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ header_ptr
A pointer to the seqan3::sam_file_header object storing header information.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
@ tags
The optional tags in the SAM format, stored in a dictionary.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr bool contains
Whether a type occurs in a type list or not.
Definition: traits.hpp:231
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:471
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:183
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
Provides the seqan3::detail::in_file_iterator class template.
The generic alphabet concept that covers most data types used in ranges.
Resolves to std::ranges::explicitly_convertible_to<type1, type2>(). <dl class="no-api">This entity i...
A more refined container concept than seqan3::container.
Refines seqan3::alphabet and adds assignability.
A concept that indicates whether a writable alphabet represents quality scores.
Provides exceptions used in the I/O module.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::phred42 quality scores.
Provides quality alphabet composites.
Adaptations of concepts from the Ranges TS.
Provides seqan3::views::repeat_n.
Provides seqan3::sam_record.
Provides helper data structures for the seqan3::sam_file_output.
Provides seqan3::views::slice.
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition: exception.hpp:39
Type that contains multiple types.
Definition: type_list.hpp:29
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.