& message,
103e->SetMessage(message);
111 template<
typenameT>
128}
else if(
id.IsGeneral() ||
id.IsLocal()) {
162 boolfound_in_starts = exon_biostarts.
find(exon_anchor_pos)
163!= exon_biostarts.
end();
165 boolfound_in_stops = exon_biostops.
find(exon_anchor_pos)
166!= exon_biostops.
end();
168 return(offset_pos < 0 && found_in_starts)
169|| (offset_pos > 0 && found_in_stops)
170|| (offset_pos == 0 && (found_in_starts || found_in_stops));
176 return!fuzz.
IsLim() ? 0
192 const intstart_offset_sign =
203 const intstop_offset_sign =
216 const boolstart_ok =
222start_offset_sign * sign);
230stop_offset_sign * sign);
232 returnstart_ok && stop_ok;
251 for(
CFeat_CIci(bsh, sel); ci; ++ci) {
309 CInt_fuzz& fuzz = (minus_strand == is_start) ? loc.
SetInt().SetFuzz_to()
310: loc.
SetInt().SetFuzz_from();
311 if(!fuzz.
Which()) {
312fuzz.
Assign(offset_fuzz);
360 TSeqPos& bio_start_ref = minus_strand ? loc->
SetInt().SetTo()
361: loc->
SetInt().SetFrom();
370: loc->
SetInt().SetTo();
390p.
SetLoc().Assign(*loc);
418 "Expected genomic_id in the variation to be the same as in spliced-seg");
424 longclosest_start = ss.
GetExons().front()->GetGenomic_start();
427 longclosest_stop = ss.
GetExons().front()->GetGenomic_start();
435closest_start = start;
461 if(start != closest_start || stop != closest_stop) {
462int_loc->
SetInt().SetFrom(closest_start);
463int_loc->
SetInt().SetTo(closest_stop);
472 if(start != closest_start) {
473 long offset= (start - closest_start);
481 if(stop != closest_stop) {
482 long offset= (stop - closest_stop);
512 for(
int i= 0;
i< 2;
i++) {
518 if(target_row == -1) {
521 "The alignment has no row for seq-id " 536 boolsource_loc_is_projected =
548 "HGVS exon-boundary position not found in alignment of " 572 "Mismatches in mapping",
578 static const long thr= 5000;
581 boolfar_start = start_offset +
thr< 0
585 boolfar_stop = stop_offset >
thr 589 if(far_start || far_stop) {
591 "Source location overhangs the alignment by at least 5kb ",
596 if(check_placements) {
618se->SetGenomic_start(ci.GetRange().GetFrom());
619se->SetGenomic_end(ci.GetRange().GetTo());
620se->SetProduct_start().SetNucpos(product_pos);
621se->SetProduct_end().SetNucpos(product_pos + ci.GetRange().GetLength() - 1);
622product_pos += ci.GetRange().GetLength();
638 result->SetLoc().SetNull();
654 if(
result->GetLoc().IsNull()) {
712 for(
CFeat_CIci(bsh, sel); ci; ++ci) {
729aln = SerialClone<CSeq_align>(current_aln);
742 if(!
result->GetLoc().IsNull()) {
752 if(!
result->GetLoc().IsNull()) {
755 result->Assign(*mapped_placement);
761 result->SetLoc().Assign(*loc);
767 template<
typenameT>
778 if(c !=
'A'&& c !=
'C'&& c !=
'G'&& c !=
'T') {
788 boolhad_ambiguities =
false;
792had_ambiguities =
true;
804 returnhad_ambiguities;
809 boolinvalid_location =
false;
810 boolout_of_order =
false;
813invalid_location =
true;
818out_of_order =
true;
826out_of_order =
true;
827invalid_location =
true;
833 if(invalid_location) {
835out_of_order ?
"Invalid location - start and stop are out of order" 836:
"Invalid location",
850 "Bioseq is suppressed or withdrawn",
855 returninvalid_location;
876 "Cannot use Mapper-based method to remap intronic cases;" 877 "must remap via spliced-seg alignment instead.");
889 "Mismatches in mapping",
927 const boolequal_offsets = (
937 const boolmerge_single_range =
939&& mapped_loc->
IsPnt()
942 if(mapped_loc->
IsInt()
948mapped_loc->
SetInt().ResetFuzz_to();
966loc1->
SetInt().SetTo() += 500;
969 if(tmp_mapped_loc->
GetId()) {
980p2->
SetLoc(*mapped_loc);
989 if(mapped_len == 0) {
992}
else if(mapped_len < orig_len) {
995}
else if(!orig_is_compound && mapped_is_compound) {
1000exception->SetMessage(
"");
1024 "Source location overhangs the alignment by at least 5kb",
1039p.
SetSeq().SetLength(length);
1044 "Can't get sequence for an offset-based location",
1047}
else if(length > max_len) {
1049 "Sequence is longer than the cutoff threshold",
1059 "Ambiguous residues in reference",
1067 "Cannot fetch sequence at location",
1078 boolhad_exceptions =
false;
1091 if(!
v2.GetData().IsInstance() || (
v2.GetConsequenceParent() && &v != &
v2)) {
1097&& inst.
GetDelta().front()->IsSetSeq()
1098&& inst.
GetDelta().front()->GetSeq().IsLiteral()) {
1100 if(!asserted_literal
1104}
else if(!variant_literal
1107&& (!inst.
GetDelta().front()->IsSetMultiplier() && !inst.
GetDelta().front()->IsSetMultiplier_fuzz())
1115 if(variant_literal) {
1116 LOG_POST(
"Found variant-literal");
1119 LOG_POST(
"Did not find variant-literal");
1135 "Asserted sequence is inconsistent with reference",
1137had_exceptions =
true;
1142&& variant_literal->Equals(p.
GetSeq())) {
1144 "Reference sequence is the same as variant",
1146had_exceptions =
true;
1153v.
SetData().SetSet().SetVariations())
1156had_exceptions = had_exceptions ||
AttachSeq(
v2, max_len);
1159 return!had_exceptions;
1185&& ((
id.GetTextseq_Id()->
GetAccession() >=
"NC_000001"&&
id.GetTextseq_Id()->
GetAccession() <=
"NC_000024")
1186|| (
id.GetTextseq_Id()->
GetAccession() >=
"NC_000067"&&
id.GetTextseq_Id()->
GetAccession() <=
"NC_000087"))) {
1247 if(!loc.
GetId()) {
1278 first.SetInt().SetTo(start - 1);
1281 if(stop == max_pos) {
1284second.
SetInt().SetFrom(stop + 1);
1299 if(prot_str.size() != 1) {
1303 static const char* alphabet =
"ACGT";
1304 stringcodon =
"AAA";
1305 for(
size_ti0 = 0; i0 < 4; i0++) {
1306codon[0] = alphabet[i0];
1307 for(
size_ti1 = 0; i1 < 4; i1++) {
1308codon[1] = alphabet[i1];
1309 for(
size_ti2 = 0; i2 < 4; i2++) {
1310codon[2] = alphabet[i2];
1316 if(
prot== prot_str) {
1317 codons.push_back(codon);
1327 for(
size_t i= 0;
i<
min(
a.size(),
b.size());
i++) {
1328 if(
a[
i] ==
b[
i]) {
1336 const string& codon_from,
1337 const string& prot_to,
1338vector<string>& codons_to)
1340vector<string> candidates1;
1341 size_tmax_matches(0);
1344 boolhave_silent =
false;
1346 ITERATE(vector<string>, it1, candidates1)
1354have_silent =
true;
1358 if(matches >= max_matches) {
1359 if(matches > max_matches) {
1362codons_to.push_back(*it1);
1363max_matches = matches;
1368 if(codons_to.empty() && have_silent) {
1369codons_to.push_back(codon_from);
1375 stringcollapsed_seq;
1379 typedef constvector<string> TConstStrs;
1380 ITERATE(TConstStrs, it, seqs)
1382 const string& seq = *it;
1383 if(seq.size() > bits.size()) {
1384bits.resize(seq.size());
1387 for(
size_t i= 0;
i< seq.size();
i++) {
1389 intm = (nt ==
'T'? 1
1392: nt ==
'A'? 8 : 0);
1401 static const char* iupac_nuc_ambiguity_codes =
"NTGKCYSBAWRDMHVN";
1402collapsed_seq.resize(bits.size());
1403 for(
size_t i= 0;
i< collapsed_seq.size();
i++) {
1404collapsed_seq[
i] = iupac_nuc_ambiguity_codes[bits[
i]];
1406 returncollapsed_seq;
1414v.
SetData().SetSet().SetVariations().clear();
1420 if(
v2->GetData().IsInstance()
1421&&
v2->GetData().GetInstance().IsSetObservation()
1427v.
SetData().SetSet().SetVariations().push_back(
v2);
1440 if(!placements || placements->size() == 0) {
1467 if(!prot2precursor_mapper) {
1487 v2->SetPlacements().push_back(p);
1493 if(!nuc_loc->
IsInt()
1497|| !
delta->IsSetSeq()
1498|| !
delta->GetSeq().IsLiteral()
1499||
delta->GetSeq().GetLiteral().GetLength() != 1)
1505 v2->SetData().SetUnknown();
1506 v2->SetPlacements().push_back(p);
1513 stringoriginal_allele_codon;
1516 stringvariant_codon;
1520 delta->GetSeq().GetLiteral().GetSeq_data(),
1524vector<string> variant_codons;
1533original_allele_codon,
1542&& variant_codon != original_allele_codon) {
1543 while(variant_codon.length() > 0
1544&& original_allele_codon.length() > 0
1545&& variant_codon.at(0) == original_allele_codon.at(0)) {
1546variant_codon = variant_codon.substr(1);
1547original_allele_codon = original_allele_codon.substr(1);
1549nuc_loc->
SetInt().SetTo()--;
1551nuc_loc->
SetInt().SetFrom()++;
1555 while(variant_codon.length() > 0
1556&& original_allele_codon.length() > 0
1557&& variant_codon.at(variant_codon.length() - 1)
1558== original_allele_codon.at(original_allele_codon.length() - 1)) {
1559variant_codon.resize(variant_codon.length() - 1);
1560original_allele_codon.resize(original_allele_codon.length() - 1);
1564nuc_loc->
SetInt().SetFrom()++;
1566nuc_loc->
SetInt().SetTo()--;
1572delta2->SetSeq().SetLiteral().SetLength(variant_codon.length());
1573delta2->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(variant_codon);
1583 v2->SetPlacements().push_back(p2);
1587 v2->SetData().SetUnknown();
1591inst2.
SetDelta().push_back(delta2);
1648 if(cached_literal) {
1669 literal->SetSeq_data().SetNcbieaa().Set().push_back(
1677 literal->SetLength(seq.size());
1679 literal->SetSeq_data().SetNcbieaa().Set(seq);
1681 literal->SetSeq_data().SetIupacna().Set(seq);
1699 if(
b.GetLength() == 0) {
1701}
else if(
a.GetLength() == 0) {
1706 if(
a.IsSetFuzz() ||
b.IsSetFuzz()) {
1710 if(
a.IsSetSeq_data() &&
b.IsSetSeq_data()) {
1712 a.GetSeq_data(), 0,
a.GetLength(),
1713 b.GetSeq_data(), 0,
b.GetLength());
1787v.
SetData().SetSet().SetVariations())
1798 if(!this_literal) {
1801 "Could not find literal for 'this' location in placements");
1805di->SetSeq().SetLiteral().Assign(*this_literal);
1813 if(inst.
GetDelta().size() == 0) {
1815di->SetSeq().SetLiteral().SetLength(0);
1816di->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(
"");
1818}
else if(inst.
GetDelta().size() > 1) {
1826di.
SetSeq().SetLiteral().SetLength(0);
1827di.
SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(
"");
1836 if(!this_literal) {
1840di.
SetSeq().SetLiteral().Assign(*this_literal);
1851 if(!
literal.IsSetSeq_data() || !
literal.GetSeq_data().IsIupacna()) {
1854 stringstr_kernel =
literal.GetSeq_data().GetIupacna().Get();
1855 literal.SetSeq_data().SetIupacna().Set(
"");
1857 literal.SetSeq_data().SetIupacna().Set() += str_kernel;
1879 if(!this_literal) {
1881}
else if(this_literal->GetLength() == 0) {
1889this_literal->GetLength() - 1);
1902 boolignore_genomic)
1909v.
SetData().SetSet().SetVariations())
1923 if(!placements || placements->size() == 0) {
1964consequence->SetVariation(*prot_variation);
1971 static string Translate(
const string& nuc_str,
boolis_mito)
1976 code.SetId(is_mito ? 2 : 1);
1984 if(prot_str.size() * 3 < nuc_str.size()) {
1985prot_str.push_back(
'X');
1989 size_tstop_pos = prot_str.find(
'*');
1990 if(stop_pos !=
NPOS) {
1991prot_str.resize(stop_pos + 1);
1998 const string& prot_ref_str,
1999 const string& prot_delta_str)
2013 const string& prot_ref_str,
2014 const string& prot_delta_str)
2017 for(
size_t i= 0;
i< prot_ref_str.size() &&
i< prot_delta_str.size();
i++) {
2018 if(prot_ref_str[
i] == prot_delta_str[
i]) {
2020}
else if(prot_ref_str[
i] ==
'*') {
2022}
else if(prot_delta_str[
i] ==
'*') {
2033 const string& prot_ref_str,
2034 const string& prot_variant_str)
2038 boolstop_gain =
false;
2039 boolstop_loss =
false;
2040 for(
size_t i= 0;
i<
max(prot_ref_str.size(), prot_variant_str.size());
i++) {
2041 char r=
i>= prot_ref_str.size() ?
'-': prot_ref_str[
i];
2042 charv =
i>= prot_variant_str.size() ?
'-': prot_variant_str[
i];
2044 if(
r==
'*'&& v !=
'*') {
2048 if(
r!=
'*'&& v ==
'*') {
2059 if(nuc_delta_len == 0) {
2060 if(!stop_gain && !stop_loss) {
2064}
else if(nuc_delta_len % 3 == 0) {
2076vp.
SetLoc().FlipStrand();
2079vp.
SetSeq().SetSeq_data(),
2080&vp.
SetSeq().SetSeq_data(),
2088 if(
tmp->IsSetStart_offset()) {
2094 if(
tmp->IsSetStop_offset()) {
2100 if(
tmp->IsSetStart_offset_fuzz()) {
2106 if(
tmp->IsSetStop_offset_fuzz()) {
2151v.
SetData().SetSet().SetVariations())
2172di.
SetSeq().SetLoc().FlipStrand();
2175di.
SetSeq().GetLiteral().GetSeq_data(),
2176&di.
SetSeq().SetLiteral().SetSeq_data(),
2214sub_loc->
Assign(*range_loc);
2217 if(!suffix_loc->
Which()) {
2221sub_loc->
Assign(*range_loc);
2224 if(!prefix_loc->
Which()) {
2229 swap(prefix_loc, suffix_loc);
2241 if(inst.
GetDelta().size() != 1) {
2247 if(!
delta.IsSetSeq() || !
delta.GetSeq().IsLiteral()) {
2253 delta.SetSeq().SetLiteral(*tmp_literal2);
2277p->
SetLoc().SetWhole().Assign(
id);
2280v->SetData().SetUnknown();
2281v->SetPlacements().push_back(p);
2289 boolis_frameshifting,
2306prot_loc = nuc2prot_mapper->
Map(nuc_p.
GetLoc());
2307codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2311 if(codons_loc->
IsNull()) {
2327v->SetData().SetUnknown();
2330prot_p->
SetLoc(*prot_loc);
2333 "Cannot infer consequence; projecting location only",
2335v->SetPlacements().push_back(prot_p);
2338codons_p->
SetLoc(*codons_loc);
2340v->SetPlacements().push_back(codons_p);
2347 if(is_frameshifting) {
2358 while(
i<
a.size() &&
i<
b.size() &&
a[
i] ==
b[
i]) {
2367 while(
i<
a.size() &&
i<
b.size() &&
a[
a.size() - 1 -
i] ==
b[
b.size() - 1 -
i]) {
2406v->SetData().SetInstance().Assign(nuc_inst);
2407v->ResetPlacements();
2412v->SetPlacements().push_back(p);
2428 const CDelta_item& nuc_delta = *v->GetData().GetInstance().GetDelta().front();
2461prot_loc = nuc2prot_mapper->
Map(p->
GetLoc());
2462codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2474&& !codons_loc.
IsNull()
2475&& !codons_loc->
IsNull()
2479 intframeshift_phase = nuc_delta_len % 3;
2480 if(frameshift_phase < 0) {
2481frameshift_phase += 3;
2488frameshift_phase != 0,
2493 stringdownstream_cds_suffix_seq_str;
2509downstream_cds_loc = ext_cds_loc->
Intersect(
2517 if(
literal->GetLength() > 0) {
2518downstream_cds_suffix_seq_str =
literal->GetSeq_data().GetIupacna().Get();
2536 if(!v->GetPlacements().front()->GetSeq().IsSetSeq_data()) {
2540frameshift_phase != 0,
2546 stringnuc_ref_prefix = v->GetPlacements().front()->GetSeq().GetSeq_data().GetIupacna().Get();
2548 const CSeq_literal& nuc_var_literal = v->GetData().GetInstance().GetDelta().front()->GetSeq().GetLiteral();
2551 stringnuc_ref_str = nuc_ref_prefix + downstream_cds_suffix_seq_str;
2552 stringnuc_var_str = nuc_var_prefix + downstream_cds_suffix_seq_str;
2556 intnum_ref_codons = (nuc_ref_prefix.size() + 2) / 3;
2557 intnum_var_codons = (nuc_var_prefix.size() + 2) / 3;
2561<<
"nuc_var_str: "<< nuc_var_str <<
"\n";
2565<<
"prot_var_str: "<< prot_var_str <<
"\n";
2568 intcommon_prot_prefix_len(0);
2571 if(prot_ref_str == prot_var_str) {
2574prot_ref_str.resize(
min(
static_cast<int>(prot_ref_str.size()), num_ref_codons));
2575prot_var_str.resize(prot_ref_str.size());
2577 if(prot_ref_str.size() > 0 && *prot_ref_str.rbegin() ==
'*') {
2579frameshift_phase = 0;
2593 if(common_prot_prefix_len > 0
2594&& common_prot_prefix_len ==
static_cast<int>(prot_ref_str.size())) {
2595common_prot_prefix_len -= 1;
2599prot_ref_str = prot_ref_str.substr(common_prot_prefix_len);
2600prot_var_str = prot_var_str.substr(common_prot_prefix_len);
2602 if(
verbose)
NcbiCerr<<
"prot_ref_str: "<< prot_ref_str <<
":"<< prot_ref_str.size() <<
"\n" 2603<<
"prot_var_str: "<< prot_var_str <<
":"<< prot_var_str.size() <<
"\n";
2605 if(frameshift_phase == 0) {
2608 size_tmin_len =
min(prot_ref_str.size(), prot_var_str.size());
2609 size_tref_stop_pos = prot_ref_str.find(
'*');
2610 size_tvar_stop_pos = prot_var_str.find(
'*');
2611 size_tmin_stop_pos =
min(ref_stop_pos, var_stop_pos);
2614 booltruncate_at_stop = min_stop_pos < min_len
2615&& ref_stop_pos != var_stop_pos
2616&& nuc_delta_len == 0;
2618 if(truncate_at_stop) {
2619prot_ref_str.resize(min_stop_pos + 1);
2620prot_var_str.resize(min_stop_pos + 1);
2622prot_ref_str.resize(prot_ref_str.size() - suffix_len);
2623prot_var_str.resize(prot_var_str.size() - suffix_len);
2628prot_ref_str.resize(
min(
static_cast<size_t>(1), prot_ref_str.size()));
2629prot_var_str.resize(
min(
static_cast<size_t>(1), prot_var_str.size()));
2634 if(prot_ref_str.size() == 0) {
2637prot_loc->
SetInt().SetFrom() += common_prot_prefix_len - 1;
2638prot_loc->
SetInt().SetTo(prot_loc->
SetInt().SetFrom() + 1);
2641prot_loc->
SetInt().SetFrom() += common_prot_prefix_len;
2642prot_loc->
SetInt().SetTo(prot_loc->
SetInt().SetFrom() + prot_ref_str.size() - 1);
2646codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2648 if(codons_loc->
IsNull()) {
2653frameshift_phase != 0,
2660 if(
verbose)
NcbiCerr<<
"prot_ref_str: "<< prot_ref_str <<
":"<< prot_ref_str.size() <<
"\n" 2661<<
"prot_var_str: "<< prot_var_str <<
":"<< prot_var_str.size() <<
"\n";
2666<<
"; variant codons: "<< num_var_codons
2667<<
"; common prefix: "<< common_prot_prefix_len <<
"\n";
2683prot_p->
SetSeq().SetLength(prot_ref_str.size());
2684prot_p->
SetSeq().SetSeq_data().SetNcbieaa().Set(prot_ref_str);
2686prot_p->
SetLoc(*prot_loc);
2690prot_v->SetPlacements().push_back(prot_p);
2694codons_p->
SetLoc(*codons_loc);
2698prot_v->SetPlacements().push_back(codons_p);
2703 if(frameshift_phase == 0 && prot_ref_str.size() == prot_var_str.size()) {
2707prot_v->SetVariant_prop().SetEffect(prop);
2714 copy(so_terms.begin(), so_terms.end(), back_inserter(prot_v->SetSo_terms()));
2718prot_v->SetData().SetInstance().SetType(
CalcInstTypeForAA(prot_ref_str, prot_var_str));
2722prot_v->SetData().SetInstance().SetDelta().push_back(di);
2724 if(prot_var_str.size() > 0) {
2732 if(
false&& common_prot_prefix_len == 0) {
2733di->SetSeq().Assign(v->GetData().GetInstance().GetDelta().front()->GetSeq());
2737<<
"inst-type: "<< prot_v->GetData().GetInstance().GetType()
2738<<
"; nuc_var_len: "<< nuc_var_str.size()
2739<<
"; nuc_var_str: "<< nuc_var_str
2740<<
"; prefix_len: "<< common_prot_prefix_len * 3
2741<<
"; var_codons:"<< prot_var_str.size() * 3 <<
"\n";
2745 stringadjusted_codons_str = nuc_var_str.substr(
2746min<int>(nuc_var_str.size(), common_prot_prefix_len * 3),
2747prot_var_str.size() * 3);
2749 if(adjusted_codons_str.size() > 0) {
2750di->SetSeq().SetLiteral().SetLength(adjusted_codons_str.size());
2751di->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set() = adjusted_codons_str;
2753di->SetSeq().SetThis();
2758 if(prot_ref_str.size() == 0) {
2762di->SetSeq().SetThis();
2767 if(frameshift_phase != 0) {
2770prot_v->SetVariant_prop().SetEffect(
2772| (prot_v->IsSetVariant_prop()
2773&& prot_v->GetVariant_prop().IsSetEffect()
2774? prot_v->GetVariant_prop().GetEffect() : 0));
2776prot_v->SetFrameshift().SetPhase(frameshift_phase);
2902 if(parent ==
NULL) {
2929 if(p1.size() != p2.size()) {
2932CVariation::TPlacements::const_iterator it1 = p1.begin();
2933CVariation::TPlacements::const_iterator it2 = p2.begin();
2935 for(; it1 != p1.end() && it2 != p2.end(); ++it1, ++it2) {
2953v.
SetData().SetSet().SetVariations())
2963v.
SetData().SetSet().SetVariations())
2966 if(!
v2.IsSetPlacements()) {
2970p1 = &
v2.SetPlacements();
2973 if(!
Equals(*p1,
v2.GetPlacements())) {
2989v.
SetData().SetSet().SetVariations())
2992 v2.ResetPlacements();
3006 constCVariation::TConsequence::value_type::TObjectType& cons = **it;
3007 if(cons.IsVariation()
3008&& cons.GetVariation().IsSetPlacements()) {
3015cons_v.
Reset(&cons.GetVariation());
3043 const CDbtag& dbtag = **it;
3044 if(dbtag.
GetDb() ==
"GeneID" 3052dbtag->
SetDb(
"GeneID");
3053dbtag->
SetTag().SetId(gene_id);
3099 boolis_completely_intronic =
false;
3109&& (is_start_offset || is_stop_offset);
3118is_completely_intronic = is_case1 || is_case2;
3127 for(
size_t i= 0;
i< 3;
i++) {
3140 intgene_id = it->first;
3143 if(loc_prop &
flags[
i]) {
3151 if(!is_completely_intronic) {
3172genomic_query_loc = mapper->
Map(query_loc);
3174genomic_query_loc.
Reset(&query_loc);
3207TIdRangeMap loc_map;
3212loc_map[ci.GetSeq_id_Handle()][ci.GetRange()] = term;
3219 if(!rna_loc && !cds_loc) {
3223 const CSeq_loc& main_loc = rna_loc ? *rna_loc : *cds_loc;
3243*ci.GetRangeAsSeq_loc());
3268 constSPropsMap::TRangeMap& rm = props_map.loc_map[ci.GetSeq_id_Handle()];
3269 for(SPropsMap::TRangeMap::const_iterator it2 = rm.begin(ci.GetRange()); it2.Valid(); ++it2) {
3270terms_set.
insert(it2->second);
3273 copy(terms_set.
begin(), terms_set.
end(), back_inserter(terms));
3294 for(
CFeat_CIci(bsh, sel); ci; ++ci) {
3303 returnlast_exon_pos ? last_exon_pos + 1
3304: last_polyA_pos ? last_polyA_pos + 1
3341 if(offset < 0 && offset >= -2) {
3362 if(!
v2.IsSetPlacements()) {
3367 if(!
v2.SetVariant_prop().IsSetGene_location()) {
3368 v2.SetVariant_prop().SetGene_location(0);
3376 if(
v2.GetConsequenceParent()) {
3423 intgene_id = gene_id_and_prop.first;
3426 if(m.find(gene_id) == m.end()) {
3429m[gene_id] |= properties;
3447m_loc2prop[ci.GetSeq_id_Handle()][ci.GetRange()].push_back(
TGeneIDAndProp(gene_id, prop));
3463m_rangemap[ci.GetSeq_id_Handle()][ci.GetRange()] = ci.GetRangeAsSeq_loc();
3476 if(it2 == m_rangemap.end()) {
3505 if(std::find(k.begin(), k.end(),
"RefSeqGene") != k.end()) {
3535 for(
CFeat_CIci(bsh, sel); ci; ++ci) {
3544 if(transcript_seq_ids.
find(product_id) == transcript_seq_ids.
end()) {
3553 const CDbtag& dbtag = **it;
3554 if(dbtag.
GetDb() ==
"GeneID" 3555|| dbtag.
GetDb() ==
"LocusID") {
3580feature::CFeatTree ft(ci);
3582 for(ci.
Rewind(); ci; ++ci) {
3590 returns_GetGeneID(mf, ft);
3608feature::CFeatTree ft(ci);
3610m_loc2prop[idh].size();
3625 for(ci.
Rewind(); ci; ++ci) {
3631 const intgene_id = s_GetGeneID(mf, ft);
3632 const boolis_focus_locus = focus_loci.
empty()
3633|| focus_loci.count(gene_id);
3635(is_focus_locus ? focus_gene_ranges
3636: non_focus_gene_ranges)
3637->SetMix().Set().push_back(
3649 boolfound_some_gene_ids =
false;
3651 for(ci.
Rewind(); ci; ++ci) {
3659 const intgene_id = s_GetGeneID(mf, ft);
3660 if(!focus_loci.
empty()
3661&& focus_loci.
find(gene_id) == focus_loci.
end()) {
3665 if(!parent_mf && gene_id) {
3673found_some_gene_ids =
true;
3695p.first->ResetStrand();
3696p.second->ResetStrand();
3717subtract_gene_ranges_from(*p.first);
3718subtract_gene_ranges_from(*p.second);
3724all_gene_neighborhoods->
SetMix().Set().push_back(p.first);
3725all_gene_neighborhoods->
SetMix().Set().push_back(p.second);
3747x_Add(*ci.GetRangeAsSeq_loc(),
3792 if(ft.GetChildren(mf).size() == 0) {
3812genes_and_neighborhoods_loc =
3814*genes_and_neighborhoods_loc,
3815*non_focus_gene_ranges,
3823*genes_and_neighborhoods_loc,
3827x_Add(*intergenic_loc,
3833&& !found_some_gene_ids) {
3843 intgene_id = s_GetGeneIdForProduct(bsh);
3845x_Add(*whole_range_loc, gene_id, 0);
3852feature::CFeatTree& ft)
3861 const CDbtag& dbtag = **it;
3862 if(dbtag.
GetDb() ==
"GeneID" 3863|| dbtag.
GetDb() ==
"LocusID") {
3876 const CDbtag& dbtag = **it;
3877 if(dbtag.
GetDb() ==
"GeneID" 3878|| dbtag.
GetDb() ==
"LocusID") {
3886 returnparent ? s_GetGeneID(parent, ft) : gene_id;
3899p.second->Assign(*p.first);
3900p.first->SetInt().SetTo(p.first->GetInt().GetFrom() + 2);
3901p.second->SetInt().SetFrom(p.second->GetInt().GetTo() - 2);
3904 swap(p.first, p.second);
3911p.second->SetNull();
3923sub_loc2->
Assign(*sub_loc1);
3932 swap(p.first, p.second);
3940 TSeqPosflank1_len(2000), flank2_len(500);
3942 swap(flank1_len, flank2_len);
3948p.second->Assign(*p.first);
3950 if(p.first->GetTotalRange().GetFrom() == 0) {
3953p.first->SetInt().SetTo(p.first->GetTotalRange().GetFrom() - 1);
3954p.first->SetInt().SetFrom(p.first->GetTotalRange().GetFrom() < flank1_len ? 0 : p.first->GetTotalRange().GetFrom() - flank1_len);
3957 if(p.second->GetTotalRange().GetTo() == max_pos) {
3958p.second->SetNull();
3960p.second->SetInt().SetFrom(p.second->GetTotalRange().GetTo() + 1);
3961p.second->SetInt().SetTo(p.second->GetTotalRange().GetTo() > max_pos ? max_pos : p.second->GetTotalRange().GetTo() + flank2_len);
3965 swap(p.first, p.second);
3978introns_loc_without_splice_sites->
Assign(*introns_loc_with_splice_sites);
3985seqint.
SetTo() -= 2;
3990p.first = introns_loc_without_splice_sites;
3992*introns_loc_without_splice_sites,
4013m_seq_data_map[idh].mapper.
Reset();
4017 for(
CFeat_CIci(bsh, sel); ci; ++ci) {
4031m_data[ci.GetSeq_id_Handle()][ci.GetRange()].push_back(s);
4046x_CacheSeqData(*all_rna_loc, idh);
4053 SSeqData& d = m_seq_data_map[idh2];
4082target_loc->
SetInt().SetId().SetLocal().SetStr(
"all_cds");
4083target_loc->
SetInt().SetFrom(0);
4093 literal->SetSeq_data().SetIupacna().Set(
"");
4101 if(m_seq_data_map.find(ci.GetSeq_id_Handle()) == m_seq_data_map.end()) {
4104 const SSeqData& d = m_seq_data_map.find(ci.GetSeq_id_Handle())->second;
4113 if((!mapped_loc->
IsInt() && !mapped_loc->
IsPnt())
4121 literal->SetSeq_data().SetIupacna().Set() += seq_chunk;
4137 if(m_data.find(idh) == m_data.end()) {
4143 if(it == m_data.end()) {
4157cdregions.push_back(*it);
4175 if(!v->IsSetId() && parent.
IsSetId()) {
4176v->SetId().Assign(parent.
GetId());
4210feat->
SetData().SetVariation(*vr);
4211feats.push_back(feat);
4231feat.
SetExts().push_back(uo);
4239out_feats.insert(out_feats.end(), feats.begin(), feats.end());
4247vr->SetId().Assign(v.
GetId());
4255vr->SetSample_id().Assign(*v.
GetSample_id().front());
4282vr->SetPhenotype().push_back(p);
4297 newCVariation_ref::TConsequence::value_type::TObjectType);
4298vr->SetConsequence().push_back(fr_cons);
4299fr_cons->SetFrameshift();
4309vr->SetData().SetComplex();
4316vr->SetData().SetUniparental_disomy();
4318vr->SetData().SetUnknown();
4335vr->SetConsequence();
4338 constCVariation::TConsequence::value_type::TObjectType& v_cons = **it;
4340 newCVariation_ref::TConsequence::value_type::TObjectType);
4341vr->SetConsequence().push_back(vr_cons);
4342vr_cons->SetUnknown();
4344 if(v_cons.IsSplicing()) {
4345vr_cons->SetSplicing();
4346}
else if(v_cons.IsNote()) {
4347vr_cons->SetNote(v_cons.GetNote());
4348}
else if(v_cons.IsVariation()) {
4350vr_cons->SetVariation(*cons_variation);
4351}
else if(v_cons.IsLoss_of_heterozygosity()) {
4352vr_cons->SetLoss_of_heterozygosity();
4353 if(v_cons.GetLoss_of_heterozygosity().IsSetReference()) {
4354vr_cons->SetLoss_of_heterozygosity().SetReference(
4355v_cons.GetLoss_of_heterozygosity().GetReference());
4357 if(v_cons.GetLoss_of_heterozygosity().IsSetTest()) {
4358vr_cons->SetLoss_of_heterozygosity().SetTest(
4359v_cons.GetLoss_of_heterozygosity().GetTest());
4366vr->SetSomatic_origin();
4369 constCVariation::TSomatic_origin::value_type::TObjectType& v_so = **it;
4372 newCVariation_ref::TSomatic_origin::value_type::TObjectType);
4374 if(v_so.IsSetSource()) {
4375vr_so->SetSource().Assign(v_so.GetSource());
4378 if(v_so.IsSetCondition()) {
4379vr_so->SetCondition();
4380 if(v_so.GetCondition().IsSetDescription()) {
4381vr_so->SetCondition().SetDescription(
4382v_so.GetCondition().GetDescription());
4384 if(v_so.GetCondition().IsSetObject_id()) {
4385vr_so->SetCondition().SetObject_id();
4386 ITERATE(CVariation::TSomatic_origin::value_type::TObjectType::TCondition::TObject_id,
4388v_so.GetCondition().GetObject_id())
4392vr_so->SetCondition().SetObject_id().push_back(dbtag);
4397vr->SetSomatic_origin().push_back(vr_so);
4411 delta->SetMultiplier(-1);
4414 delta->SetSeq().SetLiteral().SetFuzz().Assign(*fuzz);
4451v->SetPlacements().push_back(p);
4455v->SetPub().Assign(variation_feat.
GetCit());
4461v->SetExt().push_back(uo);
4469v->SetExt().push_back(uo);
4481v->SetId().Assign(vr.
GetId());
4516v->SetPhenotype().push_back(p);
4521v->SetMethod().SetMethod() = vr.
GetMethod();
4529v->SetData().SetComplex();
4535v->SetData().SetUniparental_disomy();
4537v->SetData().SetUnknown();
4554v->SetConsequence();
4557 constCVariation_ref::TConsequence::value_type::TObjectType& vr_cons = **it;
4559 if(vr_cons.IsFrameshift()) {
4564 if(vr_cons.GetFrameshift().IsSetPhase()) {
4565cons_variation.
SetFrameshift().SetPhase(vr_cons.GetFrameshift().GetPhase());
4567 if(vr_cons.GetFrameshift().IsSetX_length()) {
4568cons_variation.
SetFrameshift().SetX_length(vr_cons.GetFrameshift().GetX_length());
4575 if(vr_cons.IsUnknown()) {
4576v_cons->SetUnknown();
4577}
else if(vr_cons.IsSplicing()) {
4578v_cons->SetSplicing();
4579}
else if(vr_cons.IsNote()) {
4580v_cons->SetNote(vr_cons.GetNote());
4581}
else if(vr_cons.IsVariation()) {
4583v_cons->SetVariation(*cons_variation);
4584}
else if(vr_cons.IsLoss_of_heterozygosity()) {
4585v_cons->SetLoss_of_heterozygosity();
4586 if(vr_cons.GetLoss_of_heterozygosity().IsSetReference()) {
4587v_cons->SetLoss_of_heterozygosity().SetReference(vr_cons.GetLoss_of_heterozygosity().GetReference());
4589 if(vr_cons.GetLoss_of_heterozygosity().IsSetTest()) {
4590v_cons->SetLoss_of_heterozygosity().SetTest(vr_cons.GetLoss_of_heterozygosity().GetTest());
4594v->SetConsequence().push_back(v_cons);
4596 if(v->GetConsequence().empty()) {
4597v->ResetConsequence();
4602v->SetSomatic_origin();
4605 constCVariation_ref::TSomatic_origin::value_type::TObjectType& vr_so = **it;
4608 if(vr_so.IsSetSource()) {
4609v_so->SetSource().Assign(vr_so.GetSource());
4612 if(vr_so.IsSetCondition()) {
4613v_so->SetCondition();
4614 if(vr_so.GetCondition().IsSetDescription()) {
4615v_so->SetCondition().SetDescription(vr_so.GetCondition().GetDescription());
4617 if(vr_so.GetCondition().IsSetObject_id()) {
4618v_so->SetCondition().SetObject_id();
4619 ITERATE(CVariation_ref::TSomatic_origin::value_type::TObjectType::TCondition::TObject_id,
4621vr_so.GetCondition().GetObject_id())
4625v_so->SetCondition().SetObject_id().push_back(dbtag);
4630v->SetSomatic_origin().push_back(v_so);
4640 return delta.GetSeq().GetLiteral().GetLength()
4641* (
delta.IsSetMultiplier() ?
delta.GetMultiplier() : 1);
4646 return delta.GetSeq().GetLiteral().IsSetFuzz() ?
4647&
delta.GetSeq().GetLiteral().GetFuzz() :
NULL;
4655v.
SetData().SetSet().SetVariations())
4663 if(delta_first->IsSetAction()
4671v.
SetData().SetInstance().SetDelta().pop_front();
4674 if(delta_last != delta_first
4675&& delta_last->IsSetAction()
4683v.
SetData().SetInstance().SetDelta().pop_back();
4693 string* asserted_out,
4694 string* actual_out)
4706s_PropagateLocsInPlace(vr);
4709 boolhave_asserted_seq =
false;
4717 stringasserted_seq;
4719 if(
literal.GetSeq_data().IsIupacna()) {
4720asserted_seq =
literal.GetSeq_data().GetIupacna();
4721have_asserted_seq =
true;
4722}
else if(
literal.GetSeq_data().IsNcbieaa()) {
4723asserted_seq =
literal.GetSeq_data().GetNcbieaa();
4724have_asserted_seq =
true;
4729 stringprefix, suffix;
4730 stringstr_tmp =
NStr::Replace(asserted_seq,
"..",
"\t");
4735v.GetSeqData(v.begin(), v.end(), actual_seq);
4742*asserted_out = asserted_seq;
4745*actual_out = actual_seq;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
bool SameOrientation(ENa_strand a, ENa_strand b)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
ESubtype GetSubtype(void) const
TSeqPos GetSeqStop(TDim row) const
CRef< CSeq_loc > CreateRowSeq_loc(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
namespace ncbi::objects::
TSeqPos GetLength(void) const
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static TSeqPos Keep(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos ReverseComplement(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos Append(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2)
Template class for iteration on objects of class C (non-medifiable version)
Template class for iteration on objects of class C.
Set of related Variations.
Set of related Variations.
const TLocation & GetLocation(void) const
void SetLocation(TLocation &value)
bool IsSetLocation(void) const
NOTE: THESE ARE GOING AWAY SOON!!
const CVariation * GetParent() const
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
void Get(const CSeq_loc &loc, TCdregions &cdregions)
vector< SCdregion > TCdregions
void x_Index(const CSeq_id_Handle &idh)
CRef< CSeq_literal > GetCachedLiteralAtLoc(const CSeq_loc &loc)
void x_CacheSeqData(const CSeq_loc &loc, const CSeq_id_Handle &idh)
static TLocsPair s_GetIntronsAndSpliceSiteLocs(const CSeq_loc &rna_loc)
pair< CRef< CSeq_loc >, CRef< CSeq_loc > > TLocsPair
static int s_GetGeneIdForProduct(CBioseq_Handle bsh)
void GetLocationProperties(const CSeq_loc &loc, TGeneIDAndPropVector &v)
pair< int, CVariantProperties::TGene_location > TGeneIDAndProp
static TLocsPair s_GetNeighborhoodLocs(const CSeq_loc &gene_loc, TSeqPos max_pos)
static TLocsPair s_GetUTRLocs(const CSeq_loc &cds_loc, const CSeq_loc &parent_loc)
static TLocsPair s_GetStartAndStopCodonsLocs(const CSeq_loc &cds_loc)
static int s_GetGeneID(const CMappedFeat &mf, feature::CFeatTree &ft)
void x_Add(const CSeq_loc &loc, int gene_id, CVariantProperties::TGene_location prop)
void x_Index(const CSeq_id_Handle &idh)
vector< TGeneIDAndProp > TGeneIDAndPropVector
CRef< CVariantPlacement > RemapToAnnotatedTarget(const CVariation &v, const CSeq_id &target)
Remap variation from product coordinates onto a nucleotide sequence on which this product is annotate...
static void s_AddInstOffsetsFromPlacementOffsets(CVariation_inst &vi, const CVariantPlacement &p)
static const CVariation::TPlacements * s_GetPlacements(const CVariation &v)
@ fAA2NA_truncate_common_prefix_and_suffix
void FindLocationProperties(const CSeq_align &transcript_aln, const CSeq_loc &query_loc, TSOTerms &terms)
Find location properties based on alignment.
CRef< CSeq_literal > GetLiteralAtLoc(const CSeq_loc &loc)
CRef< CVariation > x_AsVariation(const CVariation_ref &vr)
SFlankLocs CreateFlankLocs(const CSeq_loc &loc, TSeqPos len)
vector< ESOTerm > TSOTerms
CRef< CVariation > x_CreateUnknownVariation(const CSeq_id &id, CVariantPlacement::TMol mol)
bool CheckAmbiguitiesInLiterals(CVariation &v)
if variation.data contains a seq-literal with non-ACGT residues, attach VariationException to the fir...
static size_t s_CountMatches(const string &a, const string &b)
void x_AdjustDelinsToInterval(CVariation &v, const CSeq_loc &loc)
ETestStatus CheckExonBoundary(const CVariantPlacement &p, const CSeq_align &aln)
static void s_FactorOutPlacements(CVariation &v)
If at any level in variation-set all variations have all same placements, move them to the parent lev...
static CConstRef< CVariation > s_FindConsequenceForPlacement(const CVariation &v, const CVariantPlacement &p)
Find attached consequence variation in v that corresponds to p (has same seq-id).
CRef< CVariation > TranslateNAtoAA(const CVariation_inst &nuc_inst, const CVariantPlacement &p, const CSeq_feat &cds_feat)
Evaluate protein effect of a single-inst @ single-placement.
static string AsString(ESOTerm term)
static CRef< CSeq_literal > s_SpliceLiterals(const CSeq_literal &payload, const CSeq_literal &ref, TSeqPos pos)
insert seq-literal payload into ref before pos (pos=0 -> prepend; pos=ref.len -> append)
void x_SetVariantPropertiesForIntronic(CVariantPlacement &p, int offset, const CSeq_loc &loc, CBioseq_Handle &bsh)
bool AttachSeq(CVariantPlacement &p, TSeqPos max_len=kMaxAttachSeqLen)
If have offsets (intronic) or too long, return false; else set seq field on the placement and return ...
CRef< CVariation > AsVariation(const CSeq_feat &variation_ref)
@ fOpt_cache_exon_sequence
Use when there will be many calls to calculate protein consequnece per sequence.
bool CheckPlacement(CVariantPlacement &p)
if placement is invalid SeqLocCheck fails, or offsets out of order, attach VariationException and ret...
static void s_AddIntronicOffsets(CVariantPlacement &p, const CSpliced_seg &ss, CScope *scope)
CVariantPlacement::TMol GetMolType(const CSeq_id &id)
void x_InferNAfromAA(CVariation &v, TAA2NAFlags flags)
static string s_CollapseAmbiguities(const vector< string > &seqs)
static void s_FindLocationProperties(CConstRef< CSeq_loc > rna_loc, CConstRef< CSeq_loc > cds_loc, const CSeq_loc &query_loc, TSOTerms &terms)
void ChangeToDelins(CVariation &v)
static void s_ResolveIntronicOffsets(CVariantPlacement &p)
static void s_UntranslateProt(const string &prot_str, vector< string > &codons)
void AsSOTerms(const CVariantProperties &p, TSOTerms &terms)
static void s_AttachGeneIDdbxref(CVariantPlacement &p, int gene_id)
static const CConstRef< CSeq_literal > s_FindFirstLiteral(const CVariation &v)
CRef< CSeq_literal > x_GetLiteralAtLoc(const CSeq_loc &loc)
CRef< CVariation > InferNAfromAA(const CVariation &prot_variation, TAA2NAFlags flags=fAA2NA_default)
void FlipStrand(CVariation &v) const
Other utility methods:
void SetPlacementProperties(CVariantPlacement &placement)
Methods to compute properties.
CVariantPropertiesIndex m_variant_properties_index
CRef< CVariation_ref > x_AsVariation_ref(const CVariation &v, const CVariantPlacement &p)
CRef< CVariantPlacement > Remap(const CVariantPlacement &p, const CSeq_align &aln, bool check_placements=true)
Methods to remap a VariantPlacement.
static bool s_IsInstStrandFlippable(const CVariation &v, const CVariation_inst &inst)
CCdregionIndex m_cdregion_index
static TSeqPos s_GetLength(const CVariantPlacement &p, CScope *scope)
ESOTerm
Supported SO-terms.
@ eSO_splice_acceptor_variant
@ eSO_nc_transcript_variant
@ eSO_initiator_codon_variant
@ eSO_coding_sequence_variant
@ eSO_2KB_upstream_variant
@ eSO_splice_donor_variant
@ eSO_3_prime_UTR_variant
@ eSO_5_prime_UTR_variant
@ eSO_500B_downstream_variant
@ eSO_terminator_codon_variant
TSeqPos GetEffectiveTranscriptLength(const CBioseq_Handle &bsh)
Length up to last position of the last exon (i.e.
void AttachProteinConsequences(CVariation &nuc_variation, const CSeq_id *=NULL, bool ignore_genomic=false)
Find the CDSes for the first placement; Compute prot consequence using TranslateNAtoAA for each and a...
CConstRef< CSeq_literal > x_FindOrCreateLiteral(const CVariation &v)
static void s_ConvertInstOffsetsToPlacementOffsets(CVariation &v, CVariantPlacement &p)
static const CConstRef< CSeq_literal > s_FindAssertedLiteral(const CVariation &v)
void AsVariation_feats(const CVariation &v, CSeq_annot::TData::TFtable &feats)
void SetVariantProperties(CVariation &v)
static CRef< CSeq_literal > s_CatLiterals(const CSeq_literal &a, const CSeq_literal &b)
join two seq-literals
int TAA2NAFlags
Methods to convert between nucleotide and protein.
void s_CalcPrecursorVariationCodon(const string &codon_from, const string &prot_to, vector< string > &codons_to)
CRef< CVariantPlacement > x_Remap(const CVariantPlacement &p, CSeq_loc_Mapper &mapper)
Note: this is strand-agnostic.
CRangeMap< CConstRef< CSeq_loc >, TSeqPos > TRangeMap
map< CSeq_id_Handle, TRangeMap > TIdRangeMap
SFastLocSubtract(const CSeq_loc &loc)
void operator()(CSeq_loc &container_loc) const
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
const TResidue codons[4][4]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_USER_THROW(message)
Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...
#define NCBI_USER_THROW_FMT(message)
Throw a "user exception" with message processed as output to ostream.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define NCBI_RETHROW_SAME(prev_exception, message)
Generic macro to re-throw the same exception.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
C * SerialClone(const C &src)
Create on heap a clone of the source object.
#define MSerial_AsnText
I/O stream manipulators â.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
const string AsFastaString(void) const
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
CSeq_id::EAccessionInfo IdentifyAccession(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
@ eAcc_refseq_contig_ncbo
@ eAcc_refseq_mrna_predicted
@ eAcc_refseq_prot_predicted
@ eAcc_refseq_ncrna_predicted
@ eAcc_refseq_wgs_intermed
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsTruncatedStop(ESeqLocExtremes ext) const
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
CRef< CSeq_loc > Subtract(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper, ILengthGetter *len_getter) const
Subtract seq-loc from this, merge/sort resulting ranges depending on flags.
bool IsTruncatedStart(ESeqLocExtremes ext) const
check if parts of the seq-loc are missing
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void ResetStrand(void)
Reset the strand on this location.
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
void SetNull(void)
Override all setters to incorporate cache invalidation.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
@ eOrder_Biological
Iterate sub-locations in positional order.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
ESeqLocCheck SeqLocCheck(const CSeq_loc &loc, CScope *scope)
Checks that a CSeq_loc is all on one strand on one CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
const CMolInfo * GetMolInfo(const CBioseq &bioseq)
Retrieve the MolInfo object for a given bioseq handle.
const CBioseq * GetNucleotideParent(const CBioseq &product, CScope *scope)
Get the encoding nucleotide sequnce of a protein.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ fIs5PrimePartial
= 0x4 Translate first codon even if not start codon (because sequence is 5' partial)
@ eGetId_ForceAcc
return only an accession based seq-id
@ eGetId_ForceGi
return only a gi-based seq-id
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_loc_Mapper_Base & SetMergeAll(void)
Merge any abutting or overlapping intervals.
@ eSeqMap_Up
map from segments to the top level bioseq
@ eProductToLocation
Map from the feature's product to location.
@ eLocationToProduct
Map from the feature's location to product.
const TDescr & GetDescr(void) const
TBioseqStateFlags GetState(void) const
Get state of the bioseq.
bool IsSetDbxref(void) const
const CSeqFeatData & GetData(void) const
TInst_Mol GetInst_Mol(void) const
bool IsSetProduct(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
const TDescr & GetDescr(void) const
TInst_Length GetInst_Length(void) const
void Reset(void)
Reset handle and make it not to point to any bioseq.
bool IsSetDescr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
const CSeq_feat::TDbxref & GetDbxref(void) const
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
TMol GetBioseqMolType(void) const
Get some values from core:
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
bool CanGetDescr(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetResolveTSE(void)
SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
bool IsProtein(void) const
const_iterator begin(void) const
bool IsNucleotide(void) const
const_iterator end(void) const
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null â same effect as Empty().
position_type GetLength(void) const
const_iterator begin(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
size_type size(void) const
Return the length of the represented array.
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
const TDb & GetDb(void) const
Get the Db member data.
vector< CRef< CDbtag > > TDb
bool IsLim(void) const
Check if variant Lim is selected.
const TTag & GetTag(void) const
Get the Tag member data.
void SetTag(TTag &value)
Assign a value to Tag data member.
bool IsId(void) const
Check if variant Id is selected.
const TDb & GetDb(void) const
Get the Db member data.
TLim GetLim(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
TLim & SetLim(void)
Select the variant.
bool IsRange(void) const
Check if variant Range is selected.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId GetId(void) const
Get the variant data.
@ eLim_tl
space to left of position
@ eLim_tr
space to right of position
const TProtpos & GetProtpos(void) const
Get the variant data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
TExons & SetExons(void)
Assign a value to Exons data member.
void SetProduct_strand(TProduct_strand value)
Assign a value to Product_strand data member.
TAmin GetAmin(void) const
Get the Amin member data.
void SetType(TType value)
Assign a value to Type data member.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
void SetGenomic_strand(TGenomic_strand value)
Assign a value to Genomic_strand data member.
bool IsSpliced(void) const
Check if variant Spliced is selected.
bool IsNucpos(void) const
Check if variant Nucpos is selected.
TNucpos GetNucpos(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
@ eProduct_type_transcript
vector< CRef< CDbtag > > TDbxref
const TExts & GetExts(void) const
Get the Exts member data.
bool IsSetExt(void) const
user defined structure extension Check if a value has been assigned to Ext data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
list< CRef< CUser_object > > TExts
const TCit & GetCit(void) const
Get the Cit member data.
void SetCit(TCit &value)
Assign a value to Cit data member.
TExts & SetExts(void)
Assign a value to Exts data member.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
void SetData(TData &value)
Assign a value to Data data member.
const TProduct & GetProduct(void) const
Get the Product member data.
bool IsSetExts(void) const
set of extensions; will replace 'ext' field Check if a value has been assigned to Exts data member.
bool IsSetCit(void) const
citations for this feature Check if a value has been assigned to Cit data member.
bool IsVariation(void) const
Check if variant Variation is selected.
const TGene & GetGene(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
const TExt & GetExt(void) const
Get the Ext member data.
const TVariation & GetVariation(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
void SetTo(TTo value)
Assign a value to To data member.
bool IsMix(void) const
Check if variant Mix is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
TFrom GetFrom(void) const
Get the From member data.
E_Choice Which(void) const
Which variant is currently selected.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
bool IsPnt(void) const
Check if variant Pnt is selected.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
void SetLength(TLength value)
Assign a value to Length data member.
bool IsGenbank(void) const
Check if variant Genbank is selected.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
list< CRef< CSeqdesc > > Tdata
bool IsMolinfo(void) const
Check if variant Molinfo is selected.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
const TIupacna & GetIupacna(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSource(void) const
Check if variant Source is selected.
const Tdata & Get(void) const
Get the member data.
TLength GetLength(void) const
Get the Length member data.
const TGenbank & GetGenbank(void) const
Get the variant data.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
list< CRef< CSeq_feat > > TFtable
bool IsSetSeq_data(void) const
may have the data Check if a value has been assigned to Seq_data data member.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
bool IsIupacna(void) const
Check if variant Iupacna is selected.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
E_Choice Which(void) const
Which variant is currently selected.
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Iupacaa
IUPAC 1 letter amino acid code.
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_genomic_mRNA
reported a mix of genomic and cdna sequence
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
@ eBiomol_other_genetic
other genetic material
void SetStop_offset_fuzz(TStop_offset_fuzz &value)
Assign a value to Stop_offset_fuzz data member.
void SetGene_location(TGene_location value)
Assign a value to Gene_location data member.
TMol GetMol(void) const
Get the Mol member data.
bool IsSetName(void) const
names and synonyms some variants have well-known canonical names and possible accepted synonyms Check...
bool IsSetStop_offset_fuzz(void) const
Check if a value has been assigned to Stop_offset_fuzz data member.
TMethod & SetMethod(void)
Assign a value to Method data member.
void SetFrame(TFrame value)
Assign a value to Frame data member.
void SetFrameshift(TFrameshift &value)
Assign a value to Frameshift data member.
TPlacement_method GetPlacement_method(void) const
Get the Placement_method member data.
TDbxrefs & SetDbxrefs(void)
Assign a value to Dbxrefs data member.
bool IsSetOther_ids(void) const
Check if a value has been assigned to Other_ids data member.
bool IsComplex(void) const
Check if variant Complex is selected.
list< CRef< CVariantPlacement > > TPlacements
bool IsSetPhenotype(void) const
phenotype Check if a value has been assigned to Phenotype data member.
bool IsSetFrameshift(void) const
Check if a value has been assigned to Frameshift data member.
void SetPlacement_method(TPlacement_method value)
Assign a value to Placement_method data member.
void SetStop_offset(TStop_offset value)
Assign a value to Stop_offset data member.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
list< CRef< CVariation > > TVariations
const TStop_offset_fuzz & GetStop_offset_fuzz(void) const
Get the Stop_offset_fuzz member data.
bool IsSetPlacements(void) const
where this beast is seen note that this is a set of locations, and there are no restrictions to the c...
bool IsSetGene_location(void) const
Same semantics as VariantProperties.gene-location, except placement-specific Check if a value has bee...
void SetName(const TName &value)
Assign a value to Name data member.
const TName & GetName(void) const
Get the Name member data.
bool IsUnknown(void) const
Check if variant Unknown is selected.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetPub(void) const
publication support; same type as in seq-feat Check if a value has been assigned to Pub data member.
TX_length GetX_length(void) const
Get the X_length member data.
void ResetStop_offset(void)
Reset Stop_offset data member.
bool IsInstance(void) const
Check if variant Instance is selected.
void ResetStart_offset_fuzz(void)
Reset Start_offset_fuzz data member.
const TSample_id & GetSample_id(void) const
Get the Sample_id member data.
const TExt & GetExt(void) const
Get the Ext member data.
list< CRef< CPhenotype > > TPhenotype
const TPhenotype & GetPhenotype(void) const
Get the Phenotype member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSetSeq(void) const
for situations in which a raw location isn't sufficient Check if a value has been assigned to Seq dat...
bool IsSetId(void) const
ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) expected values include 'dbSNP|rs12334',...
const TInstance & GetInstance(void) const
Get the variant data.
const TOther_ids & GetOther_ids(void) const
Get the Other_ids member data.
bool IsSetParent_id(void) const
Check if a value has been assigned to Parent_id data member.
const TDescription & GetDescription(void) const
Get the Description member data.
bool IsSetStart_offset_fuzz(void) const
Check if a value has been assigned to Start_offset_fuzz data member.
const TSomatic_origin & GetSomatic_origin(void) const
Get the Somatic_origin member data.
list< CRef< CObject_id > > TSample_id
TExceptions & SetExceptions(void)
Assign a value to Exceptions data member.
const TNote & GetNote(void) const
Get the variant data.
const TPub & GetPub(void) const
Get the Pub member data.
const TVariations & GetVariations(void) const
Get the Variations member data.
TStop_offset GetStop_offset(void) const
Get the Stop_offset member data.
void SetSeq(TSeq &value)
Assign a value to Seq data member.
const TFrameshift & GetFrameshift(void) const
Get the Frameshift member data.
const TName & GetName(void) const
Get the Name member data.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
void ResetStop_offset_fuzz(void)
Reset Stop_offset_fuzz data member.
bool IsNote(void) const
Check if variant Note is selected.
const TMethod & GetMethod(void) const
Get the Method member data.
const TPlacements & GetPlacements(void) const
Get the Placements member data.
bool IsSetVariant_prop(void) const
variant properties bit fields Check if a value has been assigned to Variant_prop data member.
bool IsSetSample_id(void) const
Check if a value has been assigned to Sample_id data member.
list< CRef< CUser_object > > TExt
TGene_location GetGene_location(void) const
Get the Gene_location member data.
void ResetStart_offset(void)
Reset Start_offset data member.
void SetType(TType value)
Assign a value to Type data member.
const TSynonyms & GetSynonyms(void) const
Get the Synonyms member data.
list< CRef< CDbtag > > TDbxrefs
const TSeq & GetSeq(void) const
Get the Seq member data.
TPlacements & SetPlacements(void)
Assign a value to Placements data member.
void SetStart_offset(TStart_offset value)
Assign a value to Start_offset data member.
void ResetPlacements(void)
Reset Placements data member.
void ResetSeq(void)
Reset Seq data member.
bool IsSetPhase(void) const
Check if a value has been assigned to Phase data member.
const TData & GetData(void) const
Get the Data member data.
void SetMol(TMol value)
Assign a value to Mol data member.
bool IsSetSynonyms(void) const
Check if a value has been assigned to Synonyms data member.
bool IsSetExt(void) const
Additional undescribed extensions Check if a value has been assigned to Ext data member.
bool IsSetMethod(void) const
sequencing / acuisition method Check if a value has been assigned to Method data member.
const TParent_id & GetParent_id(void) const
Get the Parent_id member data.
bool IsSetX_length(void) const
Check if a value has been assigned to X_length data member.
bool IsSetStop_offset(void) const
Check if a value has been assigned to Stop_offset data member.
list< CRef< CDbtag > > TOther_ids
const TStart_offset_fuzz & GetStart_offset_fuzz(void) const
Get the Start_offset_fuzz member data.
TVariations & SetVariations(void)
Assign a value to Variations data member.
bool IsSetSomatic_origin(void) const
Check if a value has been assigned to Somatic_origin data member.
const TId & GetId(void) const
Get the Id member data.
list< CRef< C_E_Somatic_origin > > TSomatic_origin
const TMethod & GetMethod(void) const
Get the Method member data.
const TVariant_prop & GetVariant_prop(void) const
Get the Variant_prop member data.
bool IsSetStart_offset(void) const
location refinements, describing offsets into introns from product coordinates.
const TLoc & GetLoc(void) const
Get the Loc member data.
bool IsSet(void) const
Check if variant Set is selected.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
const TConsequence & GetConsequence(void) const
Get the Consequence member data.
bool IsUniparental_disomy(void) const
Check if variant Uniparental_disomy is selected.
TType GetType(void) const
Get the Type member data.
void SetStart_offset_fuzz(TStart_offset_fuzz &value)
Assign a value to Start_offset_fuzz data member.
TStart_offset GetStart_offset(void) const
Get the Start_offset member data.
bool IsSetDescription(void) const
tag for comment and descriptions Check if a value has been assigned to Description data member.
TConsequence & SetConsequence(void)
Assign a value to Consequence data member.
void ResetGene_location(void)
Reset Gene_location data member.
bool IsSetPlacement_method(void) const
Check if a value has been assigned to Placement_method data member.
list< CRef< C_E_Consequence > > TConsequence
bool IsSetConsequence(void) const
Check if a value has been assigned to Consequence data member.
TPhase GetPhase(void) const
Get the Phase member data.
@ eMol_cdna
"c." coordinates in HGVS
@ eMol_mitochondrion
"mt." coordinates in HGVS
@ eMol_rna
"n." coordinates in HGVS
@ eMol_protein
"p." coordinates in HGVS
@ eMol_genomic
"g." coordinates in HGVS
@ eCode_ambiguous_sequence
@ eCode_inconsistent_consequence
consequence protein variation attached to precursor variation's consequence could not be derived from...
@ eCode_seqfetch_intronic
can't fetch sequence for an intronic (anchor+offset)-based location
@ eCode_ref_same_as_variant
reference sequence at the location is same as variant sequence in the variation
@ eCode_source_location_overhang
The source location overhangs the alignment by at least 5kb (VAR-1307)
@ eCode_hgvs_exon_boundary
anchor position in an intronic HGVS expression is not at an exon boundary
@ eCode_split_mapping
a source interval maps to multiple non-abutting intervals.
@ eCode_hgvs_exon_boundary_induced
Similar to (2), except induced by 5'/3'-terminal or an exon extension (VAR-1309)
@ eCode_inconsistent_asserted_allele
asserted allele is inconsistent with the reference
@ eCode_mismatches_in_mapping
the source sequence differs from sequence at mapped loc
@ eCode_seqfetch_too_long
can't fetch sequence because location is longer than specified threshold
@ eCode_partial_mapping
mapped location is shorter than the query
@ eCode_seqfetch_invalid
can't fetch sequence because location is invalid (e.g. extends past the end)
@ eCode_hgvs_parsing
invalid hgvs expression
@ eCode_no_mapping
could not remap
@ ePlacement_method_projected
@ eMethod_E_computational
TType GetType(void) const
Get the Type member data.
const TInstance & GetInstance(void) const
Get the variant data.
const TVariant_prop & GetVariant_prop(void) const
Get the Variant_prop member data.
TAction GetAction(void) const
Get the Action member data.
bool IsSetSomatic_origin(void) const
Check if a value has been assigned to Somatic_origin data member.
list< CRef< CVariation_ref > > TVariations
TObservation GetObservation(void) const
Get the Observation member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSetDelta(void) const
Sequence that replaces the location, in biological order.
bool IsSetSample_id(void) const
Check if a value has been assigned to Sample_id data member.
bool IsSetVariant_prop(void) const
variant properties bit fields Check if a value has been assigned to Variant_prop data member.
list< CRef< CDbtag > > TOther_ids
TType GetType(void) const
Get the Type member data.
const TSample_id & GetSample_id(void) const
Get the Sample_id member data.
void SetType(TType value)
Assign a value to Type data member.
bool IsSetOther_ids(void) const
Check if a value has been assigned to Other_ids data member.
bool IsSetSeq(void) const
Check if a value has been assigned to Seq data member.
bool IsSetAction(void) const
Check if a value has been assigned to Action data member.
list< CRef< CPhenotype > > TPhenotype
const TNote & GetNote(void) const
Get the variant data.
const TLoc & GetLoc(void) const
Get the variant data.
TEffect GetEffect(void) const
Get the Effect member data.
void SetObservation(TObservation value)
Assign a value to Observation data member.
const TId & GetId(void) const
Get the Id member data.
const TDelta & GetDelta(void) const
Get the Delta member data.
list< CRef< C_E_Somatic_origin > > TSomatic_origin
void SetMultiplier_fuzz(TMultiplier_fuzz &value)
Assign a value to Multiplier_fuzz data member.
bool IsSetConsequence(void) const
Check if a value has been assigned to Consequence data member.
const TData & GetData(void) const
Get the Data member data.
const TPhenotype & GetPhenotype(void) const
Get the Phenotype member data.
bool IsSetPhenotype(void) const
phenotype Check if a value has been assigned to Phenotype data member.
const TSeq & GetSeq(void) const
Get the Seq member data.
const TDescription & GetDescription(void) const
Get the Description member data.
bool IsInstance(void) const
Check if variant Instance is selected.
const TName & GetName(void) const
Get the Name member data.
list< CRef< C_E_Consequence > > TConsequence
const TLiteral & GetLiteral(void) const
Get the variant data.
const TSomatic_origin & GetSomatic_origin(void) const
Get the Somatic_origin member data.
const TConsequence & GetConsequence(void) const
Get the Consequence member data.
void ResetAction(void)
Reset Action data member.
bool IsSetDescription(void) const
tag for comment and descriptions Check if a value has been assigned to Description data member.
bool IsSetSynonyms(void) const
Check if a value has been assigned to Synonyms data member.
bool IsUniparental_disomy(void) const
Check if variant Uniparental_disomy is selected.
bool IsSet(void) const
Check if variant Set is selected.
bool IsSetMethod(void) const
Check if a value has been assigned to Method data member.
bool IsSetParent_id(void) const
Check if a value has been assigned to Parent_id data member.
void SetName(const TName &value)
Assign a value to Name data member.
void SetType(TType value)
Assign a value to Type data member.
void SetSeq(TSeq &value)
Assign a value to Seq data member.
TMultiplier GetMultiplier(void) const
Get the Multiplier member data.
bool IsSetMultiplier_fuzz(void) const
Check if a value has been assigned to Multiplier_fuzz data member.
const TParent_id & GetParent_id(void) const
Get the Parent_id member data.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
list< CRef< CDelta_item > > TDelta
bool IsUnknown(void) const
Check if variant Unknown is selected.
TVariations & SetVariations(void)
Assign a value to Variations data member.
bool IsLiteral(void) const
Check if variant Literal is selected.
const TName & GetName(void) const
Get the Name member data.
bool IsSetId(void) const
ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) expected values include 'dbSNP|rs12334',...
bool IsSetMultiplier(void) const
Multiplier allows representing a tandem, e.g.
void ResetMultiplier(void)
Reset Multiplier data member.
bool IsSetObservation(void) const
Check if a value has been assigned to Observation data member.
const TOther_ids & GetOther_ids(void) const
Get the Other_ids member data.
bool IsSetName(void) const
names and synonyms some variants have well-known canonical names and possible accepted synonyms Check...
TDelta & SetDelta(void)
Assign a value to Delta data member.
const TMethod & GetMethod(void) const
Get the Method member data.
bool IsNote(void) const
Check if variant Note is selected.
bool IsThis(void) const
Check if variant This is selected.
TGene_location GetGene_location(void) const
Get the Gene_location member data.
bool IsLoc(void) const
Check if variant Loc is selected.
const TVariations & GetVariations(void) const
Get the Variations member data.
const TSynonyms & GetSynonyms(void) const
Get the Synonyms member data.
bool IsComplex(void) const
Check if variant Complex is selected.
@ eType_snv
delta=[morph of length 1] NOTE: this is snV not snP; the latter requires frequency-based validation t...
@ eType_inv
delta=[del, ins.seq= RevComp(variation-location)]
@ eType_mnp
delta=[morph of length >1]
@ eType_delins
delta=[del, ins]
@ eType_prot_nonsense
delta=[del]; variation-location is the tail of the protein being truncated
@ eType_prot_other
delta=any
@ eType_prot_silent
delta=[morph of length 1, same AA as at variation-location]
@ eType_prot_missense
delta=[morph of length 1]
@ eEffect_stop_gain
reference codon is not stop codon, but the snp variant allele changes the codon to a terminating codo...
@ eEffect_missense
one allele in the set changes protein peptide (0x4)
@ eEffect_nonsense
one allele in the set changes to STOP codon (TER). (0x2)
@ eEffect_stop_loss
reverse of STOP-GAIN: reference codon is a stop codon, but a snp variant allele changes the codon to ...
@ eEffect_synonymous
one allele in the set does not change the encoded amino acid (0x1)
@ eEffect_frameshift
one allele in the set changes all downstream amino acids (0x8)
@ eGene_location_in_start_codon
the variant is observed in a start codon (0x100)
@ eGene_location_acceptor
In acceptor splice-site (0x20)
@ eGene_location_near_gene_5
Within 2kb of the 5' end of a gene feature.
@ eGene_location_near_gene_3
Within 0.5kb of the 3' end of a gene feature.
@ eGene_location_utr_3
In 3' UTR (0x80)
@ eGene_location_in_gene
Sequence intervals covered by a gene ID but not having an aligned transcript (0x01)
@ eGene_location_utr_5
In 5' UTR (0x40)
@ eGene_location_intron
In Intron (0x08)
@ eGene_location_intergenic
variant located between genes (0x400)
@ eGene_location_donor
In donor splice-site (0x10)
@ eGene_location_in_stop_codon
the variant is observed in a stop codon (0x200)
@ eGene_location_conserved_noncoding
variant is located in a conserved non-coding region (0x800)
@ eAction_offset
go downstream by distance specified by multiplier (upstream if < 0), in genomic context.
@ eAction_morph
replace len(seq) positions starting with location.start with seq
@ eAction_del_at
excise sequence at location if multiplier is specified, delete len(location)*multiplier positions dow...
@ eAction_ins_before
insert seq before the location.start
@ eObservation_variant
inst represent the observed variant at a given position
@ eObservation_asserted
inst represents the asserted base at a position
unsigned int
A callback function used to compare two keys in a database.
double value_type
The numeric datatype used by the parser.
static void ChangeIdsInPlace(T &container, sequence::EGetIdType id_type, CScope &scope)
static set< int > GetFocusLocusIDs(const CBioseq_Handle &bsh)
static CVariantProperties::TEffect CalcEffectForProt(const string &prot_ref_str, const string &prot_delta_str)
static bool IsRightPartial(CBioseq_Handle bsh)
static bool HasProblematicExceptions(const CSeq_feat &cds_feat)
static CRef< CDelta_item > CreateDeltaForOffset(int offset, const CInt_fuzz *fuzz)
static bool Equals(const CVariation::TPlacements &p1, const CVariation::TPlacements &p2)
static void ApplyOffsetFuzz(CSeq_loc &loc, const CInt_fuzz &offset_fuzz, bool is_start)
static bool IsRefSeqGene(const CBioseq_Handle &bsh)
static CRef< CVariationException > CreateException(const string &message, CVariationException::ECode code=static_cast< CVariationException::ECode >(0))
static string Translate(const string &nuc_str, bool is_mito)
static int GetFuzzSign(const CInt_fuzz &fuzz, int loc_sign)
static void SwapLtGtFuzz(CInt_fuzz &fuzz)
CVariation_inst::EType CalcInstTypeForAA(const string &prot_ref_str, const string &prot_delta_str)
CRef< CVariation > InheritParentAttributes(const CVariation &child, const CVariation &parent)
static CRef< CVariation > CreateUnknownProtConsequenceVariation(const CVariantPlacement &nuc_p, const CSeq_feat &cds_feat, bool is_frameshifting, CScope &scope)
static CVariationUtil::TSOTerms CalcSOTermsForProt(TSignedSeqPos nuc_delta_len, const string &prot_ref_str, const string &prot_variant_str)
static CRef< CSeq_align > CreateSplicedSeqAlignFromFeat(const CSeq_feat &rna_feat)
static bool ValidExonTerminal(const set< TSeqPos > &exon_biostarts, const set< TSeqPos > &exon_biostops, TSeqPos exon_anchor_pos, int offset_pos)
static int GetSignedOffset(const CDelta_item &delta)
static bool ContainsIupacNaAmbiguities(const T &obj)
static bool Contains(const CSeq_loc &a, const CSeq_loc &b, CScope *scope)
static bool ValidExonTerminals(const set< TSeqPos > &exon_biostarts, const set< TSeqPos > &exon_biostops, const CVariantPlacement &p)
static size_t GetCommonSuffixLen(const string &a, const string &b)
static const CInt_fuzz * GetFuzz(const CDelta_item &delta)
static size_t GetCommonPrefixLen(const string &a, const string &b)
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CConstRef< CSeq_feat > cdregion_feat
CRef< CSeq_loc_Mapper > mapper
Calculate upstream (first) and downstream(second) flanks for loc.
CRef< CSeq_loc > upstream
CRef< CSeq_loc > downstream
CRef< CTestThread > thr[k_NumThreadsMax]
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4