kAssemblyGap_feature =
"assembly_gap";
96 static stringkGapType_qual =
"gap_type";
97 static stringkLinkageEvidence_qual =
"linkage_evidence";
102CSeq_descr::Tdata::iterator it = src.
SetDescr().Set().begin();
104 while(it != src.
SetDescr().Set().end())
106 switch((**it).Which())
111dest.
SetDescr().Set().push_back(*it);
122dest.
SetDescr().Set().push_back(*it);
132 const charmapids[] = {
158 structSSeqAnnotCompare
163 const char* m = mapids;
167 returnstrchr(m, c)-m;
175 returnmapwhich(left->
GetData().
Which()) < mapwhich(right->GetData().Which());
183 if(!(**annot_it).IsFtable())
continue;
187 const CSeq_feat& feature = **feature_it;
198 voidFindMaximumId(
const CSeq_entry& entry,
int&
id)
202FindMaximumId(entry.
GetAnnot(),
id);
212FindMaximumId(**set_it,
id);
234 if((**xref_it).IsSetData())
236 if((**xref_it).GetData().IsProt() &&
237(**xref_it).GetData().GetProt().IsSetName())
239protein_name = (**xref_it).GetData().GetProt().GetName().front();
262 id->SetLocal().SetStr(id_label);
275 if(!hid || !it->IsBetter(hid)) {
284 returnGetNewProteinId(seh.
GetScope(), id_base);
287 stringNewProteinName(
const CSeq_feat& feature,
boolmake_hypotethic)
293 if(protein_name.empty() && make_hypotethic)
295protein_name =
"hypothetical protein";
308 for(
autopId : pProtEntry->GetSeq().GetId()) {
331seqid->Assign(*bioseq.
GetId().begin()->GetPointerOrNull());
333best.push_back(orfs.front());
336 if((**it).GetTotalRange().GetLength() >
337best.front()->GetTotalRange().GetLength() )
353 for(
autoit: seq.
GetId()) {
365 for(
autoit: seq_ids) {
366 if(!BioseqHasId(bioseq, it))
368bioseq.
SetId().push_back(it);
376 for(
autoit: ids) {
377 if(it->IsGenbank() || best.
Empty())
385 for(
autoit = seq_ftable.begin(); it != seq_ftable.end(); ++it) {
386 autoprot_feat = *it;
392seq_ftable.erase(it);
408 ftable= &it->SetData().SetFtable();
416 ftable= &annot->SetData().SetFtable();
417bioseq.
SetAnnot().push_back(annot);
422 if(prot_feat.
Empty())
424 ftable->push_back(prot_feat);
426prot_feat =
ftable->front();
430 intGetGenomicCodeOfBioseq(
const CBioseq& bioseq)
433 if(closest_biosource.
Empty())
454 autoappend_nonduplicated_item = [](list<string>& current_list,
455 constlist<string>& other_list)
457unordered_set<string> current_set;
458 for(
const auto& item : current_list) {
459current_set.insert(item);
462 for(
const auto& item : other_list) {
463 if(current_set.find(item) == current_set.end()) {
464current_list.push_back(item);
470append_nonduplicated_item(current_ref.
SetName(),
479append_nonduplicated_item(current_ref.
SetEc(),
480other_ref.
GetEc());
484append_nonduplicated_item(current_ref.
SetActivity(),
489 for(
const auto& pDBtag : other_ref.
GetDb()) {
490current_ref.
SetDb().push_back(pDBtag);
511 boolnameFromRNAProduct{
false};
516nameFromRNAProduct =
true;
519prot_ref.
SetName().push_back(product_name);
523 if(pMrna.
Empty() || nameFromRNAProduct) {
530 if(extName.empty()) {
535 for(
auto& protName : prot_ref.
SetName()) {
541prot_ref.
SetName().push_back(extName);
553 boolwas_extended =
false;
563 if(protein.
Empty())
568protein_entry->
SetSeq(*protein);
584 if(protein->
GetId().empty())
586 const string* protein_ids =
nullptr;
588qual_to_remove =
"protein_id";
589protein_ids = &cd_feature.
GetNamedQual(qual_to_remove);
591 if(protein_ids->empty())
593qual_to_remove =
"orig_protein_id";
594protein_ids = &cd_feature.
GetNamedQual(qual_to_remove);
597 if(protein_ids->empty())
603 if(protein_ids->empty())
605protein_ids = &cd_feature.
GetNamedQual(
"product_id");
609 if(protein_ids->empty()) {
614MergeSeqIds(*protein, {
whole});
622MergeSeqIds(*protein, new_ids);
631 if(protein->
GetId().empty())
634 if(!bioseq.
GetId().empty()) {
637protein->
SetId().push_back(GetNewProteinId(*token.
scope, base_name));
640 for(
autoprot_id : protein->
GetId()) {
641prot_feat = MoveParentProt(seq_ftable, *prot_id);
646CreateOrSetFTable(*protein, prot_feat);
652prot_ref.
GetName().empty()) &&
654prot_ref.
SetName().push_back(
"hypothetical protein");
659prot_feat->
SetLocation().SetInt().SetId().Assign(*GetAccessionId(protein->
GetId()));
664cd_feature.
SetProduct().SetWhole().Assign(*GetAccessionId(protein->
GetId()));
669cd_feature.
SetXref().clear();
684 auto& ext = mrna->
SetData().SetRna().SetExt();
686(ext.IsName() && ext.SetName().empty()))
687ext.SetName() = prot_ref.
GetName().front();
703 returnprotein_entry;
717 return*left < *right;
739 const CBioseq* pNucSeq=
nullptr;
741 const auto& bioseqSet = nuc_prot.
GetSet();
742 for(
const auto& pSubEntry : bioseqSet.GetSeq_set()) {
743 const auto& bioseq = pSubEntry->GetSeq();
744 if(bioseq.
IsNa()) {
754inserter(proteinIds, proteinIds.
end()),
755[](
const CRef<CSeq_id>& pId) { return pId.GetPointer(); });
764 for(
autopAnnot : pNucSeq->
GetAnnot()) {
765 if(pAnnot->IsFtable()) {
766 for(
autopSeqFeat : pAnnot->GetData().GetFtable()) {
768!pSeqFeat->IsSetData() ||
769!pSeqFeat->GetData().IsCdregion()) {
773 if(!pSeqFeat->IsSetProduct() ||
774!pSeqFeat->GetProduct().GetId() ||
775proteinIds.
find(pSeqFeat->GetProduct().GetId())
776== proteinIds.
end()) {
798 switch(entry.
Which())
836 switch(entry.
Which())
866seq_ftable.sort(SSeqAnnotCompare());
867 autofeat_it = seq_ftable.begin();
868 while(feat_it != seq_ftable.end())
878 if(
data.IsCdregion())
880 if(!
data.GetCdregion().IsSetCode())
882 int code= GetGenomicCodeOfBioseq(*token.
bioseq);
886 data.SetCdregion().SetCode().SetId(
code);
888 if(!
data.GetCdregion().IsSetFrame())
914set_ftable.push_back(feature);
915feat_it = seq_ftable.erase(feat_it);
927 if(!entry.
IsSet() ||
932 autoentry_it = find_if(seq_set.begin(), seq_set.end(),
937pEntry->GetSeq().IsSetInst() &&
938pEntry->GetSeq().IsNa() &&
939pEntry->GetSeq().IsSetAnnot());
942 if(entry_it == seq_set.end()) {
946 auto& bioseq = token.
bioseq;
947bioseq.
Reset(&((*entry_it)->SetSeq()));
948 auto& annots = bioseq->
SetAnnot();
952find_if(annots.begin(), annots.end(),
955 if(annot_it == annots.end()) {
959 automain_ftable = *annot_it;
962 while(annot_it != annots.end()) {
963 autopAnnot = *annot_it;
965main_ftable->
SetData().SetFtable().splice(
966end(main_ftable->SetData().SetFtable()),
967pAnnot->
SetData().SetFtable());
968annot_it = annots.erase(annot_it);
975 autoseq_ftable = main_ftable->SetData().SetFtable();
993 if(seq_ftable.empty()) {
994bioseq->
SetAnnot().remove(main_ftable);
997main_ftable->SetData().SetFtable() = std::move(seq_ftable);
1005 if(!set_ftable.empty()) {
1026unique_ptr<CFastaReader> pReader(
newCFastaReader(0,
flags));
1027pReader->SetPostponedMods({
"gene",
"allele"});
1038 if(
result->IsSetDescr())
1040 if(
result->GetDescr().Get().empty())
1043 result->SetSeq().ResetDescr();
1045 result->SetSet().ResetDescr();
1052 set->SetSet().SetSeq_set().push_back(
result);
1066list<CConstRef<CBioseq>> proteins;
1067 if(possible_proteins.
IsSeq()) {
1068proteins.emplace_back(&(possible_proteins.
GetSeq()));
1074proteins.emplace_back(&(pSubEntry->GetSeq()));
1082 autoit = proteins.begin();
1083 while(it != proteins.end()) {
1085it = proteins.erase(it);
1113 if((**annot_it).IsFtable())
1117 if((**feat_it).CanGetData())
1119 switch((**feat_it).GetData().Which())
1137 if(entry.
IsSeq())
1144MoveSomeDescr(entry, bioseq);
1189 if(ival.
GetTo() < bioseqLength - 4) {
1192ival.
SetTo(bioseqLength - 1);
1199 boolchanged =
false;
1201 if( partial5 && partial3 ) {
1203}
else if( partial5 ) {
1205}
else if( partial3 ) {
1229 if((*annot_it)->IsFtable()) {
1241 if((*feat_it)->IsSetData() && (*feat_it)->GetData().IsProt() && !(*feat_it)->GetData().GetProt().IsSetProcessed()) {
1242prot_feat = *feat_it;
1248prot_feat->
SetData().SetProt();
1249 ftable->SetData().SetFtable().push_back(prot_feat);
1252prot_id->Assign(*(protein.
GetId().front()));
1253prot_feat->
SetLocation().SetInt().SetId(*prot_id);
1258 if(partial5 || partial3) {
1276 if(!eh.
IsSet()) {
1283 if(
set&&
set->IsSetSeq_set()) {
1286CBioseq_set::TDescr::Tdata::const_iterator it =
nuc->GetDescr().Get().begin();
1287 while(it !=
nuc->GetDescr().Get().end()) {
1288 if(!(*it)->IsMolinfo() && !(*it)->IsTitle()) {
1290 copy->Assign(**it);
1293it =
nuc->GetDescr().Get().begin();
1320 for(; annot_ci; ++annot_ci) {
1321 if((*annot_ci).IsFtable()) {
1337m_feh = aeh.AddFeat(*m_Feat);
1346 const string& idString,
1348objects::ILineErrorListener& logger)
1350 for(
const auto& modName : duplicateMods) {
1351 stringmessage =
"Multiple '"+ modName +
"' modifiers. Only the first will be used.";
1352logger.PutError(*unique_ptr<CLineError>(
1354 "",
"",
"", message)));
1361 CBioseq& protein,
boolpartial5,
boolpartial3)
1365 const auto& proteinIds = pOriginalProtIds.empty() ?
1369 for(
autopId : proteinIds) {
1370 const autoidString = pId->AsFastaString();
1372 const auto& modList = it->second.second;
1373lineNumber = it->second.first;
1375 for(
const auto&
mod: modList) {
1376 if(!
smp.AddMods(
mod.GetName(),
mod.GetValue())) {
1377duplicateMods.
insert(
mod.GetName());
1386 if(!
smp.GetAllMods().empty()) {
1387 smp.ApplyAllMods(protein);
1388 if(
nuc->IsSeq()) {
1389 smp.ApplyAllMods(
nuc->SetSeq(),
"", cds_loc);
1392 for(
autopEntry :
nuc->SetSet().SetSeq_set()) {
1393 if(pEntry->IsSeq() && pEntry->GetSeq().IsNa()) {
1394 smp.ApplyAllMods(pEntry->SetSeq(),
"", cds_loc);
1407 for(
autopId : protein.
GetId()) {
1408 if(seh.
IsSeq()) {
1413 else if(seh.
IsSet()) {
1415 if(bit->IsSynonym(*pId)) {
1431 if(nuc_count > 1) {
1442 constCSeq_loc& genomicLoc,
1449 autoalignment = prosplign.
FindAlignment(scope, proteinId, genomicLoc,
1461 if(!filter.
Match(*alignment)) {
1466 boolfound_start_codon =
false;
1467 boolfound_stop_codon =
false;
1468list<CRef<CSeq_loc>> exonLocs;
1470 if(alignment->IsSetSegs() && alignment->GetSegs().IsSpliced()) {
1472seq_id->
Assign(*(genomicLoc.GetId()));
1473 const auto& splicedSegs = alignment->GetSegs().GetSpliced();
1474 const boolisMinusStrand = (splicedSegs.IsSetGenomic_strand() &&
1477 for(
autopExon : splicedSegs.GetExons()) {
1478 autopExonLoc =
Ref(
newCSeq_loc(*seq_id,
1479pExon->GetGenomic_start(),
1480pExon->GetGenomic_end()));
1482 if(isMinusStrand) {
1484}
else if(pExon->IsSetGenomic_strand()) {
1485pExonLoc->SetStrand(pExon->GetGenomic_strand());
1487exonLocs.push_back(pExonLoc);
1490 for(
autopModifier : splicedSegs.GetModifiers()) {
1491 if(pModifier->IsStart_codon_found()) {
1492found_start_codon = pModifier->GetStart_codon_found();
1494 if(pModifier->IsStop_codon_found()) {
1495found_stop_codon = pModifier->GetStop_codon_found();
1500 if(exonLocs.empty()) {
1504 autopCDSLoc =
Ref(
newCSeq_loc());
1505 if(exonLocs.size() == 1) {
1506pCDSLoc->Assign(*(exonLocs.front()));
1509pCDSLoc->SetMix().Set() = exonLocs;
1512 if(!found_start_codon) {
1516 if(found_stop_codon) {
1518 auto& finalInterval = pCDSLoc->IsMix() ?
1519pCDSLoc->SetMix().Set().back()->SetInt() :
1532pCds->SetLocation(loc);
1534pCds->SetPartial(
true);
1536pCds->SetData().SetCdregion();
1537pCds->SetProduct().SetWhole(productId);
1554 boolid_match{
false};
1568bioseq_id->
Assign(*(bsh_match.GetSeqId()));
1569 CRef<CSeq_loc>match_loc(
newCSeq_loc(*bioseq_id, 0, bsh_match.GetBioseqLength() - 1));
1575pOriginalIds = move(protein_entry->
SetSeq().
SetId());
1578protein_entry->
SetSeq().
SetId().push_back(product_id);
1589 string error=
"Unable to find coding region location for protein sequence "+
label+
".";
1601protein_entry->
SetSeq(), partial5, partial3);
1603AddSeqEntry(bsh_match.GetParentEntry(), protein_entry);
1605 autonew_cds =
s_MakeCDSFeat(*cds_loc, (partial5 || partial3),
1607AddFeature(seh, new_cds);
1612 stringtitle = protein_name;
1613 if(!org_name.empty())
1629 for(CBioseq::TAnnot::iterator annot_it = bioseq.
SetAnnot().begin(); annot_it != bioseq.
SetAnnot().end(); )
1631 if((**annot_it).IsFtable() && (**annot_it).GetData().GetFtable().empty())
1633annot_it = bioseq.
SetAnnot().erase(annot_it);
1649 return(feat.
GetNamedQual(
"estimated_length") ==
"unknown");
1655 const string& sGT = feature_gap.
GetNamedQual(kGapType_qual);
1669gap_type = gap_type_info->
m_eType;
1676 const string& sLE_name = (**sLE_qual).GetQual();
1677 if(sLE_name != kLinkageEvidence_qual)
1683 if(it == linkage_evidence_to_value_map.
end())
1686 string(
"Unrecognized linkage evidence ") + (**sLE_qual).GetVal(),
1702 string(
"Linkage evidence must not be specified for ") + sGT,
1713 string(
"Linkage evidence must be specified for ") + sGT,
1726evidences.
insert(evidence);
1732 string(
"Unrecognized gap type ") + sGT,
1756 for(
CBioseq_CIbioseq_it(seh); bioseq_it; ++bioseq_it)
1760 for(
CFeat_CIfeature_it(*bioseq_it, annot_sel); feature_it; )
1762 if(feature_it->IsSetData() && feature_it->GetData().IsImp())
1764 const CImp_feat& imp = feature_it->GetData().GetImp();
1768 const CSeq_feat& feature_gap = feature_it->GetOriginalFeature();
1773 autopBioseq =
const_cast<CBioseq*
>(bioseq_it->GetCompleteBioseq().GetPointer());
1779 "Failed to convert feature gap into a gap",
1798 CBioseq& bioseq = (
CBioseq&)*bioseq_it->GetEditHandle().GetCompleteBioseq();
1817 for(
autopAnnot : bioseq.
SetAnnot()) {
1818 if(!pAnnot->IsSetData() ||
1824 auto&
ftable= pAnnot->SetData().SetFtable();
1825 autofit =
ftable.begin();
1826 while(fit !=
ftable.end()) {
1827 autopSeqFeat = *fit;
1828 if(pSeqFeat->IsSetData() &&
1829pSeqFeat->GetData().IsImp() &&
1830pSeqFeat->GetData().GetImp().IsSetKey() &&
1831pSeqFeat->GetData().GetImp().GetKey() == kAssemblyGap_feature) {
1834 if(
MakeGap(bioseq, *pSeqFeat)) {
1835fit =
ftable.erase(fit);
1839 "Failed to convert feature gap into a gap",
1862CSeqTranslator::ChangeDeltaProteinToRawProtein(Ref(&bioseq));
1872 switch(loc.Which()) {
1874 return&loc.GetWhole();
1876 return&(loc.GetInt().GetId());
1878 return&(loc.GetPnt().GetId());
1880 if(!loc.GetPacked_int().Get().empty()) {
1881 return&(loc.GetPacked_int().Get().front()->GetId());
1885 if(loc.GetPacked_pnt().IsSetId()) {
1886 return&(loc.GetPacked_pnt().GetId());
1899 using TFeatIt= list<CRef<CSeq_feat>>::const_iterator;
1909list<SRegionIterators>& its)
1912 for(
autoannot_it = annots.begin();
1913annot_it != annots.end();
1916 const auto& annot = **annot_it;
1917 if(annot.IsFtable()) {
1918 const auto&
ftable= annot.GetData().GetFtable();
1919list<SRegionIterators::TFeatIt> feat_its;
1920 for(
autofeat_it =
ftable.begin(); feat_it !=
ftable.end(); ++feat_it) {
1921 const auto& pFeat = *feat_it;
1922 if(pFeat->IsSetData() &&
1923pFeat->GetData().IsRegion()) {
1924feat_its.push_back(feat_it);
1927 if(!feat_its.empty()) {
1937 if(!seq_entry.
IsSet()) {
1941 auto& bioseq_set = seq_entry.
SetSet();
1943 if(!bioseq_set.IsSetClass() ||
1945 if(bioseq_set.IsSetSeq_set()) {
1946 for(
autopEntry : bioseq_set.SetSeq_set()) {
1955 _ASSERT(bioseq_set.IsSetSeq_set());
1961list<SRegionIterators> region_its;
1963 for(
autopSubEntry : bioseq_set.SetSeq_set()) {
1965 auto& seq = pSubEntry->SetSeq();
1976region_its.empty()) {
1981pScope->AddTopLevelSeqEntry(seq_entry);
1984 for(
autoits : region_its) {
1985 for(
autofeat_it : its.feat_its) {
1986 autopRegion = *feat_it;
1992pRegion->SetLocation(*pMappedLoc);
1996(*its.annot_it)->SetData().SetFtable().
erase(feat_it);
1999 if((*its.annot_it)->GetData().GetFtable().empty()) {
2000pNucSeq->
SetAnnot().erase(its.annot_it);
2008 for(
autopSubEntry : bioseq_set.SetSeq_set()) {
2009 auto& bioseq = pSubEntry->SetSeq();
2010 if(bioseq.
IsNa()) {
2015 for(
autopId : bioseq.
GetId()) {
2017 while(it != mapped_regions.
end() && (it->first->Compare(*pId) ==
CSeq_id::e_YES)) {
2023it = mapped_regions.
erase(it);
2028bioseq.
SetAnnot().push_back(pAnnot);
2031 if(mapped_regions.
empty()) {
2040 if(entry.
IsSeq()) {
2044 auto& bioseq_set = entry.
SetSet();
2045 if(!bioseq_set.IsSetSeq_set()) {
2049 boolany_change =
false;
2050 if(!bioseq_set.IsSetClass() ||
2052 for(
autopSubEntry : bioseq_set.SetSeq_set()) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
void g_LogGeneralParsingError(EDiagSev sev, const string &idString, const string &msg, objects::ILineErrorListener &listener)
string GetIdHashOrValue(const string &base, int offset)
void transform(Container &c, UnaryFunction *op)
CAlignFilter exposes a query language for inspecting properties and scores placed on Seq-align object...
bool Match(const objects::CSeq_align &align)
Match a single alignment.
CSeqdesc & Set(bool skip_lookup=false)
int GetGenCode(int def=1) const
CConstRef< CSeqdesc > GetClosestDescriptor(CSeqdesc::E_Choice choice, int *level=NULL) const
TSeqPos GetLength(void) const
static bool ExtendToStopIfShortAndNotPartial(CSeq_feat &f, CBioseq_Handle bsh, bool check_for_stop=true)
Extends a coding region up to 50 nt.
static bool ParseCodeBreaks(CSeq_feat &feat, CScope &scope)
Parses all valid transl_except Gb-quals into code-breaks for cdregion, then removes the transl_except...
static bool ExtendStopPosition(CSeq_feat &f, const CSeq_feat *cdregion, size_t extension=0)
static CRef< CSeq_loc > GetProteinLocationFromNucleotideLocation(const CSeq_loc &nuc_loc, CScope &scope)
static bool MoveProteinSpecificFeats(CSeq_entry_Handle seh)
Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein...
static bool LocationMayBeExtendedToMatch(const CSeq_loc &orig, const CSeq_loc &improved)
Checks whether it is possible to extend the original location up to improved one.
void xParseCdregions(objects::CSeq_entry &entry, TAsyncToken &)
objects::CFastaReader::TPostponedModMap m_PrtModMap
void AddProteins(const objects::CSeq_entry &possible_proteins, objects::CSeq_entry &entry)
void MoveRegionsToProteins(objects::CSeq_entry &entry)
void xMoveCdRegions(objects::CSeq_entry_Handle entry_h, objects::CSeq_annot::TData::TFtable &seq_ftable, objects::CSeq_annot::TData::TFtable &set_ftable, TAsyncToken &)
void FindOpenReadingFrame(objects::CSeq_entry &entry) const
bool xAddProteinToSeqEntry(const objects::CBioseq &protein, objects::CSeq_entry_Handle seh)
void ConvertNucSetToSet(CRef< objects::CSeq_entry > &entry) const
CFeatureTableReader(CTable2AsnContext &context)
CTable2AsnContext & m_context
void MakeGapsFromFeatures(objects::CSeq_entry_Handle seh) const
CRef< objects::CSeq_entry > ReadProtein(ILineReader &line_reader)
void ChangeDeltaProteinToRawProtein(objects::CSeq_entry &entry) const
void xConvertSeqIntoSeqSet(objects::CSeq_entry &entry, bool nuc_prod_set) const
static void RemoveEmptyFtable(objects::CBioseq &bioseq)
CRef< objects::CSeq_feat > x_AddProteinFeatureToProtein(CRef< objects::CSeq_entry > nuc, CConstRef< objects::CSeq_loc > cds_loc, const list< CRef< objects::CSeq_id >> &pOriginalProtIds, objects::CBioseq &protein, bool partial5, bool partial3)
CRef< objects::CDelta_seq > MakeGap(objects::CBioseq &bioseq, const objects::CSeq_feat &feature_gap) const
void xMergeCDSFeatures_impl(objects::CSeq_entry &, TAsyncToken &)
void MergeCDSFeatures(objects::CSeq_entry &, TAsyncToken &)
void MoveProteinSpecificFeats(objects::CSeq_entry &entry)
CRef< objects::CSeq_entry > m_replacement_protein
bool xCheckIfNeedConversion(const objects::CSeq_entry &entry) const
CRef< objects::CSeq_entry > xTranslateProtein(const objects::CBioseq &bioseq, objects::CSeq_feat &cd_feature, list< CRef< CSeq_feat >> &seq_ftable, TAsyncToken &)
CRef< CDelta_seq > CreateGap(CBioseq &bioseq, TSeqPos gap_start, TSeqPos gap_length)
@Imp_feat.hpp User-defined methods of the data storage class.
static CLineError * Create(EProblem eProblem, EDiagSev eSeverity, const std::string &strSeqId, unsigned int uLine, const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const std::string &strErrorMessage=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
vector< CRef< objects::CSeq_loc > > TLocVec
static CRef< objects::CSeq_annot > MakeCDSAnnot(const TLocVec &orfs, int genetic_code=1, objects::CSeq_id *id=NULL)
/ This version returns an annot full of CDS features.
static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)
Find ORFs in both orientations.
CProSplignOptions_Base & SetAltStarts(bool allow_alt_start)
Output filtering parameters.
@ ePassThrough
all zeroes - no filtering
@ eWithHoles
default filtering parameters
spliced protein to genomic alignment
CRef< objects::CSeq_align > FindAlignment(objects::CScope &scope, const objects::CSeq_id &protein, const objects::CSeq_loc &genomic, CProSplignOutputOptions output_options=CProSplignOutputOptions())
Aligns protein to a region on genomic sequence.
void GetLabel(string *label) const
bool IsFtable(void) const
@Seq_descr.hpp User-defined methods of the data storage class.
const TAnnot & GetAnnot(void) const
bool IsSetAnnot(void) const
void SetDescr(CSeq_descr &value)
list< CRef< CSeq_annot > > TAnnot
CSeq_entry * GetParentEntry(void) const
CSeq_feat_EditHandle â.
namespace ncbi::objects::
const CProt_ref * GetProtXref(void) const
get protein (if present) from Seq-feat.xref list
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
void RemoveQualifier(const string &qual_name)
Remove all qualifiers with the given name; do nothing if no such qualifier exists.
bool AddSeqFeatXref(const CSeqFeatXref::TId &id)
@ eLinkEvid_UnspecifiedOnly
only the "unspecified" linkage-evidence is allowed
@ eLinkEvid_Forbidden
no linkage-evidence is allowed
@ eLinkEvid_Required
any linkage-evidence is allowed, and at least one is required
static const SGapTypeInfo * NameToGapTypeInfo(const CTempString &sName)
From a gap-type string, get the SGapTypeInfo, insensitive to case, etc.
static bool GetOrgName(string &name, const objects::CSeq_entry &entry)
objects::ILineErrorListener * m_logger
bool m_use_hypothetic_protein
SPrtAlnOptions prtAlnOptions
static bool IsDBLink(const objects::CSeqdesc &desc)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
@ eProblem_GeneralParsingError
Abstract base class for lightweight line-by-line reading.
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
void SetMolinfoForProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
CRef< objects::CSeq_feat > AddEmptyProteinFeatureToProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
bool SetMolinfoCompleteness(objects::CMolInfo &mi, bool partial5, bool partial3)
Operators to edit gaps in sequences.
static void s_SetProtRef(const CSeq_feat &cds, CConstRef< CSeq_feat > pMrna, CProt_ref &prot_ref)
static CBioseq_Handle s_MatchProteinById(const CBioseq &protein, CSeq_entry_Handle seh)
static void s_AppendProtRefInfo(CProt_ref ¤t_ref, const CProt_ref &other_ref)
static void s_ReportDuplicateMods(const set< string > &duplicateMods, const string &idString, TSeqPos lineNumber, objects::ILineErrorListener &logger)
static bool s_MoveProteinSpecificFeats(CSeq_entry &entry)
static bool s_TranslateCds(const CSeq_feat &cds, CScope &scope)
static CRef< CSeq_loc > s_GetCDSLoc(CScope &scope, const CSeq_id &proteinId, const CSeq_loc &genomicLoc, TSeqPos bioseqLength, const CTable2AsnContext::SPrtAlnOptions &prtAlnOptions)
static bool s_HasUnprocessedCdregions(const CSeq_entry &nuc_prot)
static CRef< CSeq_feat > s_MakeCDSFeat(CSeq_loc &loc, bool isPartial, CSeq_id &productId)
static const CSeq_id * s_GetIdFromLocation(const CSeq_loc &loc)
static bool s_UnknownEstimatedLength(const CSeq_feat &feat)
static CBioseq_Handle s_GetSingleNucSeq(CSeq_entry_Handle seh)
static void s_GatherRegionIterators(list< CRef< CSeq_annot >> &annots, list< SRegionIterators > &its)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
@ eDiag_Error
Error message.
const string & GetMsg(void) const
Get message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CNcbiIos & MSerial_VerifyNo(CNcbiIos &io)
#define ENUM_METHOD_NAME(EnumName)
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
#define MSerial_AsnText
I/O stream manipulators â.
static string CanonicalizeString(const CTempString &sValue)
@ fAddMods
Parse defline mods and add to SeqEntry.
@ fNoUserObjs
Don't save raw deflines in User-objects.
@ fForceType
Force specified type regardless of accession.
@ fAssumeProt
Assume prots unless accns indicate otherwise.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
@ e_YES
SeqIds compared, but are different.
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
@ eContent
Untagged human-readable accession or the like.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TSeqPos GetStop(ESeqLocExtremes ext) const
bool CopyFeaturePartials(CSeq_feat &dst, const CSeq_feat &src)
CopyFeaturePartials A function to copy the start and end partialness from one feature to another.
bool AdjustProteinMolInfoToMatchCDS(CMolInfo &molinfo, const CSeq_feat &cds)
AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region.
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
string GetProteinName(const CBioseq_Handle &seq)
Return protein name from corresponding Prot-ref feature.
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
CSeq_entry_EditHandle GetSeq_entryEditHandle(const CSeq_entry &entry)
CBioseq_set_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
vector< CSeq_id_Handle > TId
TClass GetClass(void) const
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &v) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
CBioseq_set_EditHandle GetParentBioseq_set(void) const
Get parent bioseq-set edit handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
TSet ConvertSeqToSet(TClass set_class=CBioseq_set::eClass_not_set) const
Convert the entry from Bioseq to Bioseq-set.
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from the TSE of this Seq-entry.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
CConstRef< TObject > GetCompleteObject(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool AddSeqdesc(CSeqdesc &v) const
const TId & GetId(void) const
int GetSeq_entry_Index(const CSeq_entry_Handle &handle) const
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty â not pointing to any object which means having a null value.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
virtual bool IsType(TTypeInfo type) const
static const char label[]
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
bool IsId(void) const
Check if variant Id is selected.
TId GetId(void) const
Get the variant data.
TActivity & SetActivity(void)
Assign a value to Activity data member.
bool IsSetDesc(void) const
description (instead of name) Check if a value has been assigned to Desc data member.
const TDb & GetDb(void) const
Get the Db member data.
const TActivity & GetActivity(void) const
Get the Activity member data.
TEc & SetEc(void)
Assign a value to Ec data member.
const TName & GetName(void) const
Get the Name member data.
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
bool IsSetEc(void) const
E.C.
void SetDesc(const TDesc &value)
Assign a value to Desc data member.
TProcessed GetProcessed(void) const
Get the Processed member data.
void SetProcessed(TProcessed value)
Assign a value to Processed data member.
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool IsSetActivity(void) const
activities Check if a value has been assigned to Activity data member.
const TEc & GetEc(void) const
Get the Ec member data.
TDb & SetDb(void)
Assign a value to Db data member.
TName & SetName(void)
Assign a value to Name data member.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
const TName & GetName(void) const
Get the variant data.
const TExt & GetExt(void) const
Get the Ext member data.
bool IsName(void) const
Check if variant Name is selected.
@ e_not_set
No variant selected.
TXref & SetXref(void)
Assign a value to Xref data member.
const TKey & GetKey(void) const
Get the Key member data.
void ResetPartial(void)
Reset Partial data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsProt(void) const
Check if variant Prot is selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetPartial(TPartial value)
Assign a value to Partial data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
const TId & GetId(void) const
Get the Id member data.
const TLocal & GetLocal(void) const
Get the variant data.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
const TLocation & GetLocation(void) const
Get the Location member data.
E_Choice
Choice variants.
bool IsLocal(void) const
Check if variant Local is selected.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
const TProduct & GetProduct(void) const
Get the Product member data.
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
const TXref & GetXref(void) const
Get the Xref member data.
vector< CRef< CSeqFeatXref > > TXref
vector< CRef< CGb_qual > > TQual
const TRna & GetRna(void) const
Get the variant data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ e_Het
cofactor, prosthetic grp, etc, bound to seq
@ e_not_set
No variant selected.
@ e_Region
named region (globin locus)
@ e_Seq
to annotate origin from another seq
@ e_Txinit
transcription initiation
@ e_Num
a numbering system
@ e_Pub
publication applies to this seq
@ e_User
user defined structure
@ e_Rsite
restriction site (for maps really)
@ e_Comment
just a comment
@ e_Non_std_residue
non-standard residue here in seq
void SetTo(TTo value)
Assign a value to To data member.
const TWhole & GetWhole(void) const
Get the variant data.
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TSet & SetSet(void)
Select the variant.
TClass GetClass(void) const
Get the Class member data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void SetClass(TClass value)
Assign a value to Class data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_genbank
converted genbank
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
TId & SetId(void)
Assign a value to Id data member.
void ResetId(void)
Reset Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
TTitle & SetTitle(void)
Select the variant.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
const TSource & GetSource(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
void ResetAnnot(void)
Reset Annot data member.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
void SetInst(TInst &value)
Assign a value to Inst data member.
virtual void Reset(void)
Reset the whole object.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
list< CRef< CSeq_feat > > TFtable
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
void SetTech(TTech value)
Assign a value to Tech data member.
TMolinfo & SetMolinfo(void)
Select the variant.
@ eCompleteness_complete
complete biological entity
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ eTech_concept_trans
conceptual translation
@ e_User
user defined object
@ e_Update_date
date of last update
@ e_Pub
a reference to the publication
@ e_Molinfo
info on the molecule and techniques
@ e_Create_date
date entry first created/released
@ e_Title
a title for this sequence
@ e_Source
source of materials, includes Org-ref
@ eMol_na
just a nucleic acid
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
void VisitAllBioseqs(objects::CSeq_entry &entry, _M &&m)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Utility macros and typedefs for exploring NCBI objects from seq.asn.
Utility macros and typedefs for exploring NCBI objects from seqset.asn.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Holds information about a given gap-type string.
CSeq_gap::EType m_eType
The underlying type that the string corresponds to.
ELinkEvid m_eLinkEvid
Indicates what linkage-evidences are compatible with this.
Compare objects pointed to by (smart) pointer.
bool operator()(const CSeq_id *const left, const CSeq_id *const right) const
list< CRef< CSeq_feat > >::const_iterator TFeatIt
list< CRef< CSeq_annot > >::iterator TAnnotIt
CRef< objects::CBioseq > bioseq
CRef< objects::CSeq_feat > ParentGene(const objects::CSeq_feat &cds)
CRef< objects::CSeq_feat > ParentMrna(const objects::CSeq_feat &cds)
static void s_ExtendIntervalToEnd(objects::CSeq_interval &ival, objects::CBioseq_Handle bsh)
bool AssignLocalIdIfEmpty(CSeq_feat &feature, int &id)
static CS_CONTEXT * context
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4