,
"CON",
"PAT",
"EST",
"GSS",
"HTC",
"HTG",
"STS",
"TSA",
114 "FUN",
"INV",
"MAM",
"ORG",
"PHG",
"PLN",
"PRI",
"PRO",
"ROD",
115 "SYN",
"UNA",
"VRL",
"VRT",
"PAT",
"EST",
"STS",
"UNC",
"GSS",
116 "HUM",
"HTG",
"HTC",
"CON",
"ENV",
"MUS",
"TGN",
"TSA",
124 "PLN",
"INV",
"MAM",
"UNA",
"PHG",
"PLN",
"PRI",
"BCT",
"ROD",
125 "SYN",
"UNA",
"VRL",
"VRT",
"PAT",
"EST",
"STS",
"UNA",
"GSS",
126 "PRI",
"HTG",
"HTC",
"CON",
"ENV",
"ROD",
"SYN",
"TSA",
174 "ENSEMBL-SCAFFOLDS",
177 "ENSEMBLGENOMES-GN",
178 "ENSEMBLGENOMES-TR",
181 "EVOLUTIONARYTRACE",
193 "GUIDETOPHARMACOLOGY",
235 "TOPDOWNPROTEOMICS",
241 "UNIPROT/SWISS-PROT",
243 "UNIPROTKB/SWISS-PROT",
302 if(update.
Empty()) {
318 if(seq_entries.front().Empty()) {
335 if(pp->
qamode&& ! seq_entries.empty())
366 result= seq_entries.front();
383 for(
const auto&
id: ids) {
384 if(id->IsStr() && id->GetStr() ==
str) {
396ids.push_back(obj_id);
423 boolvalid_biosample;
432 boolxip = xil && ! xil->empty();
448 for(eptr = bptr +
len; bptr < eptr; bptr = ptr) {
456name.assign(bptr, ptr);
478name =
"UniProtKB/Swiss-Prot";
480name =
"UniProtKB/TrEMBL";
490 if(ptr && ptr < p) {
491 id.assign(bptr, ptr);
502 if(
id.
empty()) {
507 if(name ==
"BioSample"&& !
id.
empty()) {
508many_biosample = (!
id.empty() && ! id1.empty());
511valid_biosample =
false;
512 if(many_biosample || ! valid_biosample) {
517q =
StringChr(
const_cast<char*
>(drline),
'\n');
523 if(! valid_biosample)
530 for(
const string&
val: dr_biosample) {
540dr_biosample.push_back(
id);
544 if(!
id.
empty() && ! id1.empty()) {
549q =
StringChr(
const_cast<char*
>(drline),
'\n');
559 for(
const string&
val: dr_ena) {
569dr_ena.push_back(
id);
578new_xref->SetDbname().SetName(name);
586new_xrefs.push_back(new_xref);
609 if(! new_xrefs.empty())
610embl.
SetXref().swap(new_xrefs);
617 switch(
id.Which()) {
619 return id.SetGenbank();
621 return id.SetEmbl();
625 return id.SetSwissprot();
627 return id.SetOther();
629 return id.SetDdbj();
639 return id.SetGpipe();
641 return id.SetNamed_annot_track();
690vector<string> taxLines;
692 for(
auto& line : taxLines) {
694 if(line.empty() || line.starts_with(
"XX"sv)) {
697 if(! sTaxname.empty()) {
704 if(sTaxname.empty()) {
711 autoopenP = sTaxname.find(
'(');
712 if(openP != string::npos) {
713 autosCommonName = sTaxname.substr(0, openP);
714 autocommonTerm = sCommonName.find_last_not_of(
" \t(");
715 if(commonTerm != string::npos) {
716sCommonName = sCommonName.substr(0, commonTerm + 1);
762 boolallow_crossdb_featloc;
766 if(! dbp || ! dbp->
mBuf.
ptr)
774 for(q = p; *q !=
'\0'; q++) {
777 else if(*q ==
'\n') {
779 if(q[1] ==
'C'&& q[2] ==
'O'&& q[3] ==
' ') {
785 for(q = p,
r= p; *q !=
'\0'; q++)
790 for(q = p; *q !=
'\0'; q++)
791 if((q[0] ==
','&& q[1] ==
',') || (q[0] ==
'('&& q[1] ==
',') ||
792(q[0] ==
','&& q[1] ==
')'))
873 for(
i= 0, q = p; *q !=
'\0'; q++) {
945 boolpat_ref =
false;
946 boolest_kwd =
false;
947 boolsts_kwd =
false;
948 boolgss_kwd =
false;
949 boolhtc_kwd =
false;
950 boolfli_kwd =
false;
951 boolwgs_kwd =
false;
952 booltpa_kwd =
false;
953 booltsa_kwd =
false;
954 booltls_kwd =
false;
955 boolenv_kwd =
false;
956 boolmga_kwd =
false;
990 while(*bptr ==
' '|| *bptr ==
';')
996bptr = (
char*)
"CON";
1001 while(*bptr ==
' '|| *bptr ==
';')
1004dataclass[3] =
'\0';
1008bptr = (
char*)
" ";
1009dataclass[0] =
'\0';
1020embl->SetKeywords() = keywords;
1033 for(
const string&
key: keywords) {
1034 fta_keywords_check(
key, &est_kwd, &sts_kwd, &gss_kwd, &htc_kwd, &fli_kwd, &wgs_kwd, &tpa_kwd, &env_kwd, &mga_kwd, &tsa_kwd, &tls_kwd);
1069 const char* p = gbdiv.c_str();
1086 if(!
HasHtg(embl->GetKeywords())) {
1125 if(ibp->
is_mga==
false) {
1130}
else if(ibp->
is_mga) {
1140}
else if(ibp->
is_tpa) {
1146 if(ibp->
is_tsa==
false) {
1151}
else if(ibp->
is_tsa) {
1156 if(ibp->
is_tls==
false) {
1161}
else if(ibp->
is_tls) {
1166 if(
i== 2 && ibp->
htg> 0 && env_kwd)
1167 FtaErrPost(
SEV_WARNING,
ERR_KEYWORD_HTGPlusENV,
"This HTG record also has the ENV keyword, which is an unusual combination. Confirmation that isolation and cloning steps actually occured might be appropriate.");
1168 else if((
i== 2 && wgs_kwd && tpa_kwd) ||
1169(
i== 2 && tsa_kwd && tpa_kwd)) {
1170}
else if(
i!= 2 || env_kwd ==
false||
1171(est_kwd ==
false&& gss_kwd ==
false&& wgs_kwd ==
false)) {
1172 FtaErrPost(
SEV_REJECT,
ERR_KEYWORD_ConflictingKeywords,
"This record contains more than one of the special keywords used to indicate that a sequence is an HTG, EST, GSS, STS, HTC, WGS, ENV, FLI_CDNA, TPA, CAGE, TSA or TLS sequence.");
1180wgs_kwd ==
false&& tpa_kwd ==
false&& env_kwd ==
false) {
1195 if(! est_kwd && kw.find(
"EST") != string::npos) {
1198 if(! sts_kwd && kw.find(
"STS") != string::npos) {
1201 if(! gss_kwd && kw.find(
"GSS") != string::npos) {
1210 check_div(ibp->
is_pat, pat_ref, est_kwd, sts_kwd, gss_kwd, if_cds, gbdiv, &tech, ibp->
bases, pp->
source, drop);
1219}
else if(gbdiv ==
"CON") {
1223 boolis_htc_div = (gbdiv ==
"HTC");
1224 boolhas_htc =
HasHtc(embl->GetKeywords());
1226 if(is_htc_div && ! has_htc) {
1230 if(! is_htc_div && has_htc) {
1249 if(*p ==
'm'|| *p ==
'r')
1253 else if(
StringEquN(p,
"transcribed ", 12))
1267 if(! gbdiv.empty()) {
1268 if(gbdiv ==
"EST") {
1271}
else if(gbdiv ==
"STS") {
1274}
else if(gbdiv ==
"GSS") {
1277}
else if(gbdiv ==
"HTC") {
1281}
else if((gbdiv ==
"SYN") && bio_src &&
1309 for(
const auto& subtype : bio_src->
GetSubtype()) {
1323 if(! std_creation_date || ! std_update_date) {
1327embl->SetCreation_date().SetStd(*std_creation_date);
1328embl->SetUpdate_date().SetStd(*std_update_date);
1330ibp->
wgssec[0] =
'\0';
1339 boolfound =
false;
1340 for(
const string& acc : embl->SetExtra_acc()) {
1342(acc[0] ==
'C'|| acc[0] ==
'U')) {
1374ibp->
wgssec[0] =
'\0';
1380 if(! gbdiv.empty()) {
1383 const auto& subtype = bio_src->
GetSubtype();
1385find_if(begin(subtype), end(subtype), [](
autopSubSource) {
1391}
else if(! bio_src ||
1397 if(! gbb->IsSetExtra_accessions() && ! gbb->IsSetKeywords() && ! gbb->IsSetDiv())
1436 for(
i= 0, q = bptr; *q !=
'\0'; q++) {
1448 for(p =
r+ 1; *p ==
' '|| *p ==
';';)
1457 else if(ibp->
is_wgs) {
1464}
else if(ibp->
is_tsa)
1472 GetFlatBiomol(mol_info->SetBiomol(), mol_info->GetTech(), bptr, pp, entry, org_ref);
1474mol_info->ResetBiomol();
1486 if(!
tag|| lst.empty())
1493 for(
const string& item : lst) {
1494field->
SetData().SetStrs().push_back(item);
1505 if(dr_ena.empty() && dr_biosample.empty())
1510 for(
auto& descr : descrs) {
1511 if(! descr->IsUser() || ! descr->GetUser().IsSetType())
1514 const CObject_id& obj_id = descr->GetUser().GetType();
1516 if(obj_id.
IsStr() && obj_id.
GetStr() ==
"DBLink") {
1517user_obj_ptr = &descr->SetUser();
1524 if(! dr_biosample.empty())
1528 if(! dr_ena.empty()) {
1532 if(field_bs.
Empty() && field_ena.
Empty())
1539user_obj->
SetType().SetStr(
"DBLink");
1545user_obj_ptr->
SetData().push_back(field_bs);
1547user_obj_ptr->
SetData().push_back(field_ena);
1552descrs.push_back(descr);
1559dbuop->
Assign(*user_obj_ptr);
1570 for(
const auto& xref : embl_block.
GetXref()) {
1571 if(! xref->IsSetDbname() || ! xref->GetDbname().IsName() ||
1572! xref->GetDbname().GetName().starts_with(
"IMGT/"sv))
1576 for(
const auto&
id: xref->GetId()) {
1577 if(id->IsStr() && ! id->GetStr().empty()) {
1587 tag->SetDb(xref->GetDbname().GetName());
1589 string& id_str =
tag->SetTag().SetStr();
1591 boolneed_delimiter =
false;
1592 for(
const auto&
id: xref->GetId()) {
1593 if(id->IsStr() && ! id->GetStr().empty()) {
1597need_delimiter =
true;
1599id_str +=
id->GetStr();
1603xrefs.push_back(
tag);
1611imp.
SetKey(
"misc_feature");
1616 if(annot.empty() || ! (*annot.begin())->IsFtable()) {
1618new_annot->
SetData().SetFtable().push_back(feat);
1620annot.push_back(new_annot);
1623old_annot.
SetData().SetFtable().push_front(feat);
1629 returnline.starts_with(
"TPA:"sv) ||
1630line.starts_with(
"TPA_exp:"sv) ||
1631line.starts_with(
"TPA_inf:"sv) ||
1632line.starts_with(
"TPA_asm:"sv) ||
1633line.starts_with(
"TPA_reasm:"sv) ||
1634line.starts_with(
"TPA_specdb:"sv);
1645 boolis_htg =
false;
1661 for(
size_tpos = 0; pos <
str.size();) {
1662pos =
str.find(
";;", pos);
1663 if(pos == string::npos)
1667 for(
size_t i= pos;
i<
str.size() &&
str[
i] ==
';'; ++
i)
1672 while(!
str.empty()) {
1673 charc =
str.back();
1674 if(c ==
' '|| c ==
';')
1681 str.starts_with(
"TPA:"sv)) {
1687 if(ibp->
is_tsa==
false&&
str.starts_with(
"TSA:"sv)) {
1693 if(ibp->
is_tls==
false&&
str.starts_with(
"TLS:"sv)) {
1699 if(
str.starts_with(
"TPA:"sv)) {
1702str1 =
"TPA_asm:"sv;
1704str1 =
"TPA_specdb:"sv;
1706str1 =
"TPA_inf:"sv;
1708str1 =
"TPA_exp:"sv;
1711 str.replace(0, 4, str1);
1716bioseq.
SetDescr().Set().push_back(descr);
1732(title.empty() || ! title.starts_with(
"TSA:"sv))) {
1738 if(ibp->
is_tls&& (title.empty() || ! title.starts_with(
"TLS:"))) {
1747 for(
auto& ref_blk : chain) {
1754descr->
SetPub(*pubdesc);
1755bioseq.
SetDescr().Set().push_back(descr);
1759 for(
auto& ref_blk : chain) {
1766descr->
SetPub(*pubdesc);
1767bioseq.
SetDescr().Set().push_back(descr);
1776 for(
auto& descr : bioseq.
SetDescr().Set()) {
1777 if(descr->IsSource()) {
1778bio_src = &(descr->SetSource());
1780org_ref = &bio_src->
SetOrg();
1799ibp->
is_contig&& ! mol_info->IsSetTech()) {
1802mol_info->ResetTech();
1804mol_info->SetTech(tech);
1807 if(mol_info->IsSetBiomol() || mol_info->IsSetTech()) {
1810bioseq.
SetDescr().Set().push_back(descr);
1820 if(! dr_ena.empty() || ! dr_biosample.empty())
1823 if(embl_block.
Empty()) {
1833 if(embl_block->GetExtra_acc().empty())
1834embl_block->ResetExtra_acc();
1844 boolhasEmblBlock =
false;
1847descr->
SetEmbl(*embl_block);
1848bioseq.
SetDescr().Set().push_back(descr);
1849hasEmblBlock =
true;
1857 "TPA:{} record lacks the mandatory comment line. Entry dropped.",
1858(ibp->
inferential==
false) ?
"experimental":
"inferential");
1878 if(hasEmblBlock && embl_block->IsSetDiv() && embl_block->GetDiv() < 15) {
1885 for(
auto& pAnnot : bioseq.
SetAnnot()) {
1886 if(pAnnot->IsFtable()) {
1887 for(
auto& pFeat : pAnnot->SetData().SetFtable()) {
1888 if(pFeat->IsSetData() && pFeat->SetData().IsBiosrc()) {
1889 auto& biosrc = pFeat->SetData().SetBiosrc();
1890 if(biosrc.IsSetOrg() &&
1891(! biosrc.GetOrg().IsSetDb() ||
1900}
else if(gbb && gbb->IsSetDiv()) {
1908bioseq.
SetDescr().Set().push_back(descr);
1926 for(
auto& user_obj : user_objs) {
1929bioseq.
SetDescr().Set().push_back(descr);
1935 for(q =
str, p = q; *p !=
'\0';) {
1936 if(*p ==
';'&& (p[1] ==
' '|| p[1] ==
'~'))
1938 if(*p ==
'~'|| *p ==
' ') {
1940 for(p++; *p ==
' '|| *p ==
'~';)
1948 if(
str[0] != 0) {
1951bioseq.
SetDescr().Set().push_back(descr);
1966 if(std_creation_date.
NotEmpty()) {
1969bioseq.
SetDescr().Set().push_back(descr);
1972 if(std_update_date.
NotEmpty()) {
1975bioseq.
SetDescr().Set().push_back(descr);
1978 stringcrdate_str, update_str;
1979std_creation_date->
GetDate(&crdate_str,
"%2M-%2D-%4Y");
1980std_update_date->
GetDate(&crdate_str,
"%2M-%2D-%4Y");
1992 for(
const auto& os_blk : chain) {
1998 if(org_ref.
Empty())
2002bio_src->
SetOrg(*org_ref);
2004 string& taxname_str = org_ref->
SetTaxname();
2007 while(taxname_str[off_pos] !=
' '&& off_pos < taxname_str.size())
2009 while(taxname_str[off_pos] ==
' '&& off_pos < taxname_str.size())
2013taxname_str = taxname_str.substr(off_pos);
2014 if(taxname_str ==
"Unknown.") {
2015taxname_str = taxname_str.substr(0, taxname_str.size() - 1);
2018 for(
const auto& subdbp : os_blk.GetSubBlocks()) {
2023 if(subdbp.mType !=
ParFlat_OC|| ! subdbp.mBuf.ptr ||
2028 while(! lineage.empty()) {
2029 autoit = lineage.find(
"\nOC ");
2030 if(it == string::npos)
2032lineage.erase(it, 5);
2034 while(! lineage.empty()) {
2035 charc = lineage.back();
2036 if(c !=
' '&& c !=
'\t'&& c !=
'\n'&& c !=
'.'&& c !=
';')
2040 if(! lineage.empty() && ! org_ref->
IsSetOrgname()) {
2047bioseq.
SetDescr().Set().push_front(descr);
2070 while(*p ==
' '|| *p ==
';')
2083 for(
i= 0, p = entry.
mBuf.
ptr; *p !=
'\0'&&
i< 4; p++)
2084 if(*p ==
';'&& p[1] ==
' ')
2094 for(p++; *p ==
' ';)
2114 boolseq_long =
false;
2119 if(! ibp->
drop) {
2125 autoebp = pEntry->GetEntryData();
2126 char* ptr = pEntry->mBuf.ptr;
2128 char* eptr = ptr + pEntry->mBuf.len;
2168ebp->seq_entry->SetSeq(*bioseq);
2213 if(! ibp->
gaps.empty())
2215 else if(ibp->
htg== 4 || ibp->
htg== 1 || ibp->
htg== 2 ||
2218}
else if(! ibp->
gaps.empty())
2242pEntry->mpQscore.clear();
2248 id->SetPatent(*ibp->
psip);
2249bioseq->
SetId().push_back(
id);
2262seq_entries.push_back(ebp->seq_entry);
2263ebp->seq_entry.
Reset();
2266 if(ibp->
htg== 4 || ibp->
htg== 1 || ibp->
htg== 2) {
2276 else if(! ibp->
drop) {
2282 if(! ibp->
drop) {
2320 boolpat_ref =
false;
2321 boolest_kwd =
false;
2322 boolsts_kwd =
false;
2323 boolgss_kwd =
false;
2324 boolhtc_kwd =
false;
2325 boolfli_kwd =
false;
2326 boolwgs_kwd =
false;
2327 booltpa_kwd =
false;
2328 boolenv_kwd =
false;
2329 boolmga_kwd =
false;
2330 booltsa_kwd =
false;
2331 booltls_kwd =
false;
2346embl->SetKeywords().swap(ibp->
keywords);
2351 for(
const string&
key: embl->GetKeywords()) {
2352 fta_keywords_check(
key, &est_kwd, &sts_kwd, &gss_kwd, &htc_kwd, &fli_kwd, &wgs_kwd, &tpa_kwd, &env_kwd, &mga_kwd, &tsa_kwd, &tls_kwd);
2362dataclass[0] =
'\0';
2395 const char* p = gbdiv.c_str();
2412 if(!
HasHtg(embl->GetKeywords())) {
2451 if(ibp->
is_mga==
false) {
2456}
else if(ibp->
is_mga) {
2467}
else if(ibp->
is_tpa) {
2473 if(ibp->
is_tsa==
false) {
2478}
else if(ibp->
is_tsa) {
2484 if(ibp->
is_tls==
false) {
2489}
else if(ibp->
is_tls) {
2495 if(
i== 2 && ibp->
htg> 0 && env_kwd)
2496 FtaErrPost(
SEV_WARNING,
ERR_KEYWORD_HTGPlusENV,
"This HTG record also has the ENV keyword, which is an unusual combination. Confirmation that isolation and cloning steps actually occured might be appropriate.");
2497 else if(
i!= 2 || env_kwd ==
false||
2498(est_kwd ==
false&& gss_kwd ==
false&& wgs_kwd ==
false)) {
2499 FtaErrPost(
SEV_REJECT,
ERR_KEYWORD_ConflictingKeywords,
"This record contains more than one of the special keywords used to indicate that a sequence is an HTG, EST, GSS, STS, HTC, WGS, ENV, FLI_CDNA, TPA, CAGE, TSA or TLS sequence.");
2507wgs_kwd ==
false&& tpa_kwd ==
false&& env_kwd ==
false) {
2519 if(! est_kwd && kw->find(
"EST") != string::npos) {
2522 if(! sts_kwd && kw->find(
"STS") != string::npos) {
2525 if(! gss_kwd && kw->find(
"GSS") != string::npos) {
2533 check_div(ibp->
is_pat, pat_ref, est_kwd, sts_kwd, gss_kwd, if_cds, gbdiv, &tech, ibp->
bases, pp->
source, drop);
2542}
else if(gbdiv ==
"CON") {
2546 boolis_htc_div = (gbdiv ==
"HTC");
2547 boolhas_htc =
HasHtc(embl->GetKeywords());
2549 if(is_htc_div && ! has_htc) {
2553 if(! is_htc_div && has_htc) {
2562 if(*
r==
'm'|| *
r==
'r')
2583 if(! gbdiv.empty()) {
2584 if(gbdiv ==
"EST") {
2587}
else if(gbdiv ==
"STS") {
2590}
else if(gbdiv ==
"GSS") {
2593}
else if(gbdiv ==
"HTC") {
2597}
else if((gbdiv ==
"SYN") && bio_src &&
2624ibp->
wgssec[0] =
'\0';
2631embl->SetCreation_date().SetStd(*std_creation_date);
2636embl->SetUpdate_date().SetStd(*std_update_date);
2640 if(std_update_date.
Empty() && std_creation_date.
NotEmpty())
2641embl->SetUpdate_date().SetStd(*std_creation_date);
2647 boolfound =
false;
2648 for(
const string& acc : embl->SetExtra_acc()) {
2650(acc[0] ==
'C'|| acc[0] ==
'U')) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void AssemblyGapsToDelta(CBioseq &bioseq, TGapFeatsList &gf, bool *drop)
bool no_reference(const CBioseq &bioseq)
void SeqToDelta(CBioseq &bioseq, Int2 tech)
CMolInfo::TTech fta_check_con_for_wgs(CBioseq &bioseq)
bool fta_check_htg_kwds(TKeywordList &kwds, IndexblkPtr ibp, CMolInfo &mol_info)
void fta_set_molinfo_completeness(CBioseq &bioseq, const Indexblk *ibp)
void fta_add_hist(ParserPtr pp, CBioseq &bioseq, CGB_block::TExtra_accessions &extra_accs, Parser::ESource source, CSeq_id::E_Choice acctype, bool pricon, const char *acc)
bool fta_parse_tpa_tsa_block(CBioseq &bioseq, char *offset, char *acnum, Int2 vernum, size_t len, Int2 col_data, bool tpa)
Int4 fta_fix_seq_loc_id(TSeqLocList &locs, ParserPtr pp, string_view location, string_view name, bool iscon)
string GetQSFromFile(FILE *fd, const Indexblk *ibp)
CRef< CSeq_loc > fta_get_seqloc_int_whole(const CSeq_id &seq_id, size_t len)
bool check_cds(const DataBlk &entry, Parser::EFormat format)
bool fta_if_valid_sra(string_view id, bool dblink)
void fta_create_far_fetch_policy_user_object(CBioseq &bsp, Int4 num)
void fta_tsa_tls_comment_dblink_check(const CBioseq &bioseq, bool is_tsa)
void fta_remove_cleanup_user_object(CSeq_entry &seq_entry)
bool fta_dblink_has_sra(const CRef< CUser_object > &uop)
void GapsToDelta(CBioseq &bioseq, TGapFeatsList &gf, bool *drop)
void fta_get_project_user_object(TSeqdescList &descrs, const char *offset, Parser::EFormat format, bool *drop, Parser::ESource source)
void err_install(const Indexblk *ibp, bool accver)
bool fta_if_valid_biosample(string_view id, bool dblink)
void fta_parse_structured_comment(char *str, bool &bad, TUserObjVector &objs)
void StripSerialNumbers(TEntryList &seq_entries)
void AddNIDSeqId(CBioseq &bioseq, const DataBlk &entry, Int2 type, Int2 coldata, Parser::ESource source)
void fta_fix_orgref_div(const CBioseq::TAnnot &annots, COrg_ref *org_ref, CGB_block &gbb)
void DefVsHTGKeywords(CMolInfo::TTech tech, const DataBlk &entry, Int2 what, Int2 ori, bool cancelled)
unsigned char *const GetDNAConvTable()
void fta_sort_seqfeat_cit(TEntryList &seq_entries)
void PackEntries(TEntryList &seq_entries)
void fta_set_strandedness(TEntryList &seq_entries)
void CheckHTGDivision(const char *div, CMolInfo::TTech tech)
bool fta_orgref_has_taxid(const COrg_ref::TDb &dbtags)
bool XMLCheckCDS(const char *entry, const TXmlIndexList &xil)
char * GetDescrComment(char *offset, size_t len, Uint2 col_data, bool is_htg, bool is_pat)
char * GetEmblBlock(TDataBlkList &chain, char *ptr, short *retkw, Parser::EFormat format, char *eptr)
void EntryCheckDivCode(TEntryList &seq_entries, ParserPtr pp)
void GetEmblSubBlock(size_t bases, Parser::ESource source, const DataBlk &entry)
void XMLDefVsHTGKeywords(CMolInfo::TTech tech, const char *entry, const TXmlIndexList &xil, bool cancelled)
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
bool fta_EntryCheckGBBlock(TEntryList &seq_entries)
void fta_sort_descr(TEntryList &seq_entries)
void GetExtraAccession(IndexblkPtr ibp, bool allow_uwsec, Parser::ESource source, TAccessionList &accessions)
bool check_div(bool pat_acc, bool pat_ref, bool est_kwd, bool sts_kwd, bool gss_kwd, bool if_cds, string &div, CMolInfo::TTech *tech, size_t bases, Parser::ESource source, bool &drop)
CRef< CBioseq > CreateEntryBioseq(ParserPtr pp)
list< string > TStringList
void ProcessCitations(TEntryList &seq_entries)
CDate::ECompare Compare(const CDate_std &date) const
Indicate how *this relates to another date.
void GetDate(string *label, const string &format) const
Append a custom string representation of the date to the label.
@ eCompare_after
*this comes second.
void PostTotals() override
CRef< objects::CSeq_entry > xGetEntry() override
@Imp_feat.hpp User-defined methods of the data storage class.
const list< string > KeywordList() const
namespace ncbi::objects::
static bool IsNa(EMol mol)
EntryBlk * GetEntryData() const
struct DataBlk::@1166 mBuf
void fta_build_ena_user_object(list< CRef< CSeqdesc >> &descrs, TStringList &dr_ena, TStringList &dr_biosample, CRef< CUser_object > &dbuop)
static bool CheckEmblContigEverywhere(const IndexblkPtr ibp, Parser::ESource source)
static const char * ParFlat_DRname_array[]
static CRef< CSeq_entry > OutputEmblAsn(bool seq_long, ParserPtr pp, TEntryList &seq_entries)
static const char * ParFlat_Embl_DIV_array[]
static void GetEmblDescr(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
const char * GetEmblDiv(Uint1 num)
static CRef< CUser_field > fta_create_user_field(const char *tag, TStringList &lst)
static void fta_create_imgt_misc_feat(CBioseq &bioseq, CEMBL_block &embl_block, IndexblkPtr ibp)
static bool s_DuplicatesBiosource(const CBioSource &biosource, const string &gbdiv)
static CTextseq_id & SetTextIdRef(CSeq_id &id)
static void EmblGetDivisionNewID(IndexblkPtr ibp, const DataBlk &entry)
static const char * ParFlat_DBname_array[]
static bool s_GetEmblInst(ParserPtr pp, const DataBlk &entry, unsigned char *const dnaconv)
bool GetEmblInstContig(const DataBlk &entry, CBioseq &bioseq, ParserPtr pp)
static void EmblGetDivision(IndexblkPtr ibp, const DataBlk &entry)
static CRef< CMolInfo > GetEmblMolInfo(ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
static void GetEmblBlockXref(const DataBlk &entry, const TXmlIndexList *xil, const char *chentry, TStringList &dr_ena, TStringList &dr_biosample, bool *drop, CEMBL_block &embl)
static CRef< CGB_block > GetEmblGBBlock(ParserPtr pp, const DataBlk &entry, const string &gbdiv, CBioSource *bio_src)
static const char * ParFlat_Embl_dataclass_array[]
static bool s_HasTPAPrefix(string_view line)
static CRef< CEMBL_block > GetDescrEmblBlock(ParserPtr pp, const DataBlk &entry, CMolInfo &mol_info, string &gbdiv, const CBioSource *bio_src, TStringList &dr_ena, TStringList &dr_biosample)
static void FakeEmblBioSources(const DataBlk &entry, CBioseq &bioseq)
static CRef< COrg_ref > GetEmblOrgRef(const DataBlk &dbp)
static void SetXrefObjId(CEMBL_xref &xref, const string &str)
static const char * ParFlat_GBDIV_array[]
static void GetReleaseInfo(const DataBlk &entry)
CRef< CEMBL_block > XMLGetEMBLBlock(ParserPtr pp, const char *entry, CMolInfo &mol_info, string &gbdiv, CBioSource *bio_src, TStringList &dr_ena, TStringList &dr_biosample)
static void GetEmblDate(Parser::ESource source, const DataBlk &entry, CRef< CDate_std > &crdate, CRef< CDate_std > &update)
#define ParFlat_COL_DATA_EMBL
DataBlk * LoadEntry(ParserPtr pp, size_t offset, size_t len)
void FinalCleanup(TEntryList &seq_entries)
#define ERR_DRXREF_DuplicatedSRA
#define ERR_SEQUENCE_BadData
#define ERR_TPA_TpaSpansMissing
#define ERR_ENTRY_LongSequence
#define ERR_FORMAT_MissingContigFeature
#define ERR_KEYWORD_ShouldNotBeTPA
#define ERR_DIVISION_BadTSADivcode
#define ERR_FORMAT_MissingSequenceData
#define ERR_DIVISION_InvalidHTCKeyword
#define ERR_DRXREF_InvalidSRA
#define ERR_KEYWORD_IllegalForCON
#define ERR_DIVISION_MissingHTGKeywords
#define ERR_QSCORE_FailedToParse
#define ERR_ENTRY_LongHTGSSequence
#define ERR_KEYWORD_MissingTSA
#define ERR_DIVISION_BadTPADivcode
#define ERR_DRXREF_InvalidBioSample
#define ERR_LOCUS_WrongTopology
#define ERR_TPA_TpaCommentMissing
#define ERR_REFERENCE_No_references
#define ERR_KEYWORD_ShouldNotBeTLS
#define ERR_ENTRY_GBBlock_not_Empty
#define ERR_KEYWORD_HTGPlusENV
#define ERR_DEFINITION_MissingTPA
#define ERR_ENTRY_Skipped
#define ERR_DEFINITION_MissingTLS
#define ERR_KEYWORD_ESTSubstring
#define ERR_KEYWORD_ConflictingKeywords
#define ERR_DIVISION_ConDivLacksContig
#define ERR_LOCATION_ContigHasNull
#define ERR_KEYWORD_ENV_NoMatchingQualifier
#define ERR_KEYWORD_ShouldNotBeTSA
#define ERR_KEYWORD_STSSubstring
#define ERR_DIVISION_UnknownDivCode
#define ERR_KEYWORD_MissingTLS
#define ERR_DEFINITION_ShouldNotBeTSA
#define ERR_DIVISION_MissingHTCKeyword
#define ERR_DIVISION_MappedtoCON
#define ERR_FORMAT_ContigWithSequenceData
#define ERR_DRXREF_UnknownDBname
#define ERR_DRXREF_DuplicatedBioSamples
#define ERR_KEYWORD_NoGeneExpressionKeywords
#define ERR_DEFINITION_MissingTSA
#define ERR_KEYWORD_GSSSubstring
#define ERR_DEFINITION_ShouldNotBeTPA
#define ERR_FORMAT_MissingEnd
#define ERR_KEYWORD_MissingTPA
#define ERR_ENTRY_ParsingComplete
#define ERR_ORGANISM_NoOrganism
#define ERR_DATE_IllegalDate
#define ERR_DIVISION_HTCWrongMolType
#define ERR_KEYWORD_ShouldNotBeCAGE
#define ERR_DEFINITION_ShouldNotBeTLS
#define ERR_TSA_UnexpectedPrimaryAccession
list< CRef< objects::CSeq_entry > > TEntryList
bool QscoreToSeqAnnot(const string &qscore, CBioseq &bioseq, char *acc, Int2 ver, bool check_minmax, bool allow_na)
unique_ptr< string > XMLFindTagValue(const char *entry, const TXmlIndexList &xil, Int4 tag)
unique_ptr< string > XMLConcatSubTags(const char *entry, const TXmlIndexList &xil, Int4 tag, Char sep)
#define INSDSEQ_DATABASE_REFERENCE
#define INSDSEQ_CREATE_DATE
void XMLGetKeywords(const char *entry, const TXmlIndexList &xil, TKeywordList &keywords)
#define INSDSEQ_UPDATE_DATE
std::list< std::string > TKeywordList
forward_list< XmlIndex > TXmlIndexList
std::vector< CRef< objects::CUser_object > > TUserObjVector
bool fta_StartsWith(const char *s1, string_view s2)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
void StringNCpy(char *d, const char *s, size_t n)
size_t StringLen(const char *s)
void FtaDeletePrefix(int prefix)
#define FtaErrPost(sev, level,...)
void fta_find_pub_explore(ParserPtr pp, TEntryList &seq_entries)
static const char * str(char *buf, int n)
void DealWithGenes(CRef< CSeq_entry > &pEntry, ParserPtr pp)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
char Char
Alias for char.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
@ eNocase
Case insensitive compare.
list< CRef< CObject_id > > TId
const TXref & GetXref(void) const
Get the Xref member data.
TXref & SetXref(void)
Assign a value to Xref data member.
TId & SetId(void)
Assign a value to Id data member.
bool IsSetXref(void) const
Check if a value has been assigned to Xref data member.
list< CRef< CEMBL_xref > > TXref
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
TGenome GetGenome(void) const
Get the Genome member data.
TOrigin GetOrigin(void) const
Get the Origin member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
void SetOrg(TOrg &value)
Assign a value to Org data member.
@ eSubtype_environmental_sample
@ eOrigin_synthetic
purely synthetic
bool IsStr(void) const
Check if variant Str is selected.
void SetYear(TYear value)
Assign a value to Year data member.
void SetMonth(TMonth value)
Assign a value to Month data member.
TStd & SetStd(void)
Select the variant.
void SetDay(TDay value)
Assign a value to Day data member.
TData & SetData(void)
Assign a value to Data data member.
void SetNum(TNum value)
Assign a value to Num data member.
const TStr & GetStr(void) const
Get the variant data.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
TYear GetYear(void) const
Get the Year member data.
TMonth GetMonth(void) const
Get the Month member data.
TDay GetDay(void) const
Get the Day member data.
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
const TDiv & GetDiv(void) const
Get the Div member data.
void SetCommon(const TCommon &value)
Assign a value to Common data member.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
@ eSeq_code_type_iupacna
IUPAC 1 letter nuc acid code.
vector< CRef< CDbtag > > TDbxref
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetKey(const TKey &value)
Assign a value to Key data member.
bool IsMix(void) const
Check if variant Mix is selected.
const TMix & GetMix(void) const
Get the variant data.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_Gpipe
Internal NCBI genome pipeline processing ID.
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_Named_annot_track
Internal named annotation tracking ID.
@ e_Tpg
Third Party Annot/Seq Genbank.
virtual void Reset(void)
Reset the whole object.
TRepr GetRepr(void) const
Get the Repr member data.
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
TTitle & SetTitle(void)
Select the variant.
TPub & SetPub(void)
Select the variant.
TTopology GetTopology(void) const
Get the Topology member data.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TGenbank & SetGenbank(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
TTech GetTech(void) const
Get the Tech member data.
TComment & SetComment(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
void ResetTech(void)
Reset Tech data member.
TSource & SetSource(void)
Select the variant.
void SetTopology(TTopology value)
Assign a value to Topology data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
TUser & SetUser(void)
Select the variant.
TEmbl & SetEmbl(void)
Select the variant.
void SetRepr(TRepr value)
Assign a value to Repr data member.
EStrand
strandedness in living organism
list< CRef< CSeq_annot > > TAnnot
void SetStrand(TStrand value)
Assign a value to Strand data member.
void SetTech(TTech value)
Assign a value to Tech data member.
TMolinfo & SetMolinfo(void)
Select the variant.
TCreate_date & SetCreate_date(void)
Select the variant.
TUpdate_date & SetUpdate_date(void)
Select the variant.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_htc
high throughput cDNA
@ eTech_targeted
targeted locus sets/studies
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_survey
one-pass genomic sequence
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
int fta_if_wgs_acc(string_view accession)
int CheckSTRAND(const string &str)
CRef< CDate_std > GetUpdateDate(string_view str, Parser::ESource source)
void GetFlatBiomol(CMolInfo::TBiomol &biomol, CMolInfo::TTech tech, char *molstr, ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
void LoadFeat(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
std::list< SeqLoc > TSeqLocList
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
CRef< CPubdesc > DescrRefs(ParserPtr pp, DataBlk &dbp, Uint2 col_data)
CRef< objects::CSeq_entry > seq_entry
CRef< objects::CPatent_seq_id > psip
char *(* ff_get_qscore_pp)(const char *accession, Int2 v, Parser *pp)
vector< IndexblkPtr > entrylist
bool allow_crossdb_featloc
CKeywordParser & KeywordParser()
char *(* ff_get_qscore)(const char *accession, Int2 v)
bool GetGenomeInfo(CBioSource &bsp, string_view bptr)
void MaybeCutGbblockSource(TEntryList &seq_entries)
bool HasHtg(const TKeywordList &keywords)
char * SrchTheChar(string_view sv, Char letter)
bool HasHtc(const TKeywordList &keywords)
bool fta_tls_keywords_check(const TKeywordList &kwds, Parser::ESource source)
void PointToNextToken(char *&ptr)
void RemoveHtgPhase(TKeywordList &keywords)
string GetBlkDataReplaceNewLine(string_view instr, Uint2 indent)
void fta_remove_tsa_keywords(TKeywordList &kwds, Parser::ESource source)
void fta_remove_tpa_keywords(TKeywordList &kwds)
bool SrchNodeType(const DataBlk &entry, Int4 type, size_t *plen, char **pptr)
TDataBlkList & TrackNodes(const DataBlk &entry)
void fta_remove_keywords(CMolInfo::TTech tech, TKeywordList &kwds)
void fta_remove_tls_keywords(TKeywordList &kwds, Parser::ESource source)
char * SrchTheStr(string_view sv, string_view leadstr)
Int2 fta_StringMatch(const Char **array, string_view text)
DataBlk * TrackNodeType(const DataBlk &entry, Int2 type)
void fta_keywords_check(string_view str, bool *estk, bool *stsk, bool *gssk, bool *htck, bool *flik, bool *wgsk, bool *tpak, bool *envk, bool *mgak, bool *tsak, bool *tlsk)
bool IsCancelled(const TKeywordList &keywords)
bool fta_tsa_keywords_check(const TKeywordList &kwds, Parser::ESource source)
void fta_remove_env_keywords(TKeywordList &kwds)
bool fta_tpa_keywords_check(const TKeywordList &kwds)
void CleanTailNonAlphaChar(string &str)
CRef< CSeq_loc > xgbparseint_ver(string_view raw_intervals, bool &keep_rawPt, int &numErrors, const TSeqIdList &seq_ids, bool accver)
void XGappedSeqLocsToDeltaSeqs(const TSeqLocList &locs, TDeltaList &deltas)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4