,
93 "BIOPHYSICOCHEMICAL PROPERTIES:",
95 "CATALYTIC ACTIVITY:",
99 "DEVELOPMENTAL STAGE:",
101 "DISRUPTION PHENOTYPE:",
103 "ENZYME REGULATION:",
107 "MASS SPECTROMETRY:",
114 "SEQUENCE CAUTION:",
116 "SUBCELLULAR LOCATION:",
118 "TISSUE SPECIFICITY:",
170{
"MOD_RES",
ParFlatSPSites, 10,
"5-glutamyl glycerylphosphorylethanolamine"},
220{
"MOD_RES",
ParFlatSPSites, 13,
"2'-methylsulfonyltryptophan"},
223{
"MOD_RES",
ParFlatSPSites, 13,
"Asymmetric dimethylarginine"},
227{
"MOD_RES",
ParFlatSPSites, 13,
"Glutamate methyl ester (Gln)"},
228{
"MOD_RES",
ParFlatSPSites, 13,
"Glutamate methyl ester (Glu)"},
246{
"MOD_RES",
ParFlatSPSites, 13,
"N6,N6,N6-trimethyl-5-hydroxylysine"},
251{
"MOD_RES",
ParFlatSPSites, 13,
"N6-poly(methylaminopropyl)lysine"},
262{
"MOD_RES",
ParFlatSPSites, 13,
"Omega-N-methylated arginine"},
266{
"MOD_RES",
ParFlatSPSites, 13,
"Symmetric dimethylarginine"},
273{
"MOD_RES",
ParFlatSPSites, 14,
"(3R,4R)-3,4-dihydroxyproline"},
274{
"MOD_RES",
ParFlatSPSites, 14,
"(3R,4R)-4,5-dihydroxyisoleucine"},
275{
"MOD_RES",
ParFlatSPSites, 14,
"(3R,4S)-3,4-dihydroxyproline"},
276{
"MOD_RES",
ParFlatSPSites, 14,
"(3R,4S)-4-hydroxyisoleucine"},
279{
"MOD_RES",
ParFlatSPSites, 14,
"(3S,4R)-3,4-dihydroxyisoleucine"},
283{
"MOD_RES",
ParFlatSPSites, 14,
"3',4'-dihydroxyphenylalanine"},
296{
"MOD_RES",
ParFlatSPSites, 14,
"5-hydroxy-3-methylproline (Ile)"},
304{
"MOD_RES",
ParFlatSPSites, 14,
"N6-(3,6-diaminohexanoyl)-5-hydroxylysine"},
310{
"MOD_RES",
ParFlatSPSites, 17,
"Pyrrolidone carboxylic acid"},
311{
"MOD_RES",
ParFlatSPSites, 17,
"Pyrrolidone carboxylic acid (Glu)"},
313{
"MOD_RES",
ParFlatSPSites, 18,
"GAMMA-CARBOXYGLUTAMIC ACID"},
332{
"MOD_RES",
ParFlatSPSites, 19,
"Blocked carboxyl end (Arg)"},
333{
"MOD_RES",
ParFlatSPSites, 19,
"Blocked carboxyl end (His)"},
359{
"MOTIF",
ParFlatSPRegions, -1,
"Short sequence motif of biological interest"},
367{
nullptr, 0, 0,
nullptr}
372 #define ParFlatSPSitesModB 9 373 #define ParFlatSPSitesModE 174 375 #define COPYRIGHT "This Swiss-Prot entry is copyright." 376 #define COPYRIGHT1 "Copyrighted by the UniProt Consortium," 378 #define SPDE_RECNAME 000001 379 #define SPDE_ALTNAME 000002 380 #define SPDE_SUBNAME 000004 381 #define SPDE_FLAGS 000010 382 #define SPDE_INCLUDES 000020 383 #define SPDE_CONTAINS 000040 384 #define SPDE_FULL 000100 385 #define SPDE_SHORT 000200 386 #define SPDE_EC 000400 387 #define SPDE_ALLERGEN 001000 388 #define SPDE_BIOTECH 002000 389 #define SPDE_CD_ANTIGEN 004000 390 #define SPDE_INN 010000 401 const char*
end=
nullptr;
438forward_list<string>
syn;
479 "STRAIN",
"SUBSTRAIN",
"TYPE",
"SUBTYPE",
"VAR.",
"SEROTYPE",
480 "SEROGROUP",
"SEROVAR",
"CULTIVAR",
"PATHOVAR",
"CHEMOVAR",
"BIOVAR",
481 "BIOTYPE",
"GROUP",
"SUBGROUP",
"ISOLATE",
"ACRONYM",
"DOSAGE",
482 "NAT_HOST",
"SUBSP.",
nullptr 486 "2DBASE-ECOLI",
"AARHUS/GHENT-2DPAGE",
"AGD",
487 "ANU-2DPAGE",
"BURULIST",
"CARBBANK",
488 "CMR",
"CORNEA-2DPAGE",
"DICTYDB",
489 "DOMO",
"ECO2DBASE",
"GCRDB",
490 "GENEVESTIGATOR",
"GENEW",
"GENOMEREVIEWS",
491 "GERMONLINE",
"HIV",
"HSC-2DPAGE",
492 "HSSP",
"IPI",
"LINKHUB",
493 "LISTILIST",
"MAIZE-2DPAGE",
"MENDEL",
494 "MGD",
"MYPULIST",
"NMPDR",
495 "PATHWAY_INTERACTION_DB",
"PHCI-2DPAGE",
"PHOSSITE",
496 "PPTASEDB",
"PROTCLUSTDB",
"PHOTOLIST",
497 "PMMA-2DPAGE",
"RAT-HEART-2DPAGE",
"RZPD-PROTEXP",
498 "SAGALIST",
"SIENA-2DPAGE",
"STYGENE",
499 "SUBTILIST",
"TIGR",
"TRANSFAC",
500 "WORMPEP",
"YEPD",
"YPD",
505 "ALLERGOME",
"ARACHNOSERVER",
"ARAPORT",
506 "ARRAYEXPRESS",
"BEEBASE",
"BGD",
507 "BGEE",
"BINDINGDB",
"BIOCYC",
508 "BIOGRID",
"BIOMUTA",
"BRENDA",
509 "CAZY",
"CCDS",
"CDD",
510 "CGD",
"CHEMBL",
"CHITARS",
511 "CLEANEX",
"COLLECTF",
"COMPLUYEAST-2DPAGE",
512 "CONOSERVER",
"CTD",
"CYGD",
513 "DBSNP",
"DEPOD",
"DICTYBASE",
514 "DIP",
"DISGENET",
"DISPROT",
515 "DMDM",
"DNASU",
"DOSAC-COBS-2DPAGE",
516 "DRUGBANK",
"ECHOBASE",
"ECOGENE",
517 "EGGNOG",
"EMBL",
"ENSEMBL",
518 "ENSEMBLBACTERIA",
"ENSEMBLFUNGI",
"ENSEMBLMETAZOA",
519 "ENSEMBLPLANTS",
"ENSEMBLPROTISTS",
"EPD",
520 "ESTHER",
"EUHCVDB",
"EUPATHDB",
521 "EUROPEPMC",
"EVOLUTIONARYTRACE",
"EXPRESSIONATLAS",
522 "FLYBASE",
"GENE3D",
"GENECARDS",
523 "GENEDB",
"GENEDB_SPOMBE",
"GENEFARM",
524 "GENEID",
"GENEREVIEWS",
"GENETREE",
525 "GENEVISIBLE",
"GENEWIKI",
"GENOLIST",
526 "GENOMERNAI",
"GK",
"GLYCOSUITEDB",
527 "GRAINGENES",
"GO",
"GRAMENE",
528 "GUIDETOPHARMACOLOGY",
"H-INVDB",
"HAMAP",
529 "HGNC",
"HOGENOM",
"HOVERGEN",
530 "HPA",
"IMGT/GENE-DB",
"IMGT/HLA",
531 "IMGT/LIGM",
"IMGT_GENE-DB",
"INPARANOID",
532 "INTACT",
"INTERPRO",
"IPD-KIR",
533 "IPTMNET",
"KEGG",
"KO",
534 "LEGIOLIST",
"LEPROMA",
"MAIZEDB",
535 "MAIZEGDB",
"MALACARDS",
"MAXQB",
536 "MEROPS",
"MGI",
"MIM",
537 "MINT",
"MIRBASE",
"MOONPROT",
538 "MYCOCLAP",
"NEXTBIO",
"NEXTPROT",
539 "OGP",
"OMA",
"OPENTARGETS",
540 "ORPHANET",
"ORTHODB",
"PANTHER",
541 "PATRIC",
"PAXDB",
"PDB",
542 "PDBSUM",
"PEPTIDEATLAS",
"PEROXIBASE",
543 "PFAM",
"PHARMGKB",
"PHOSPHOSITE",
544 "PHOSPHOSITEPLUS",
"PHYLOMEDB",
"PIR",
545 "PIRSF",
"PMAP-CUTDB",
"POMBASE",
546 "PR",
"PR2",
"PRIDE",
547 "PRINTS",
"PRO",
"PRODOM",
548 "PROMEX",
"PROSITE",
"PROTEINMODELPORTAL",
549 "PROTEOMES",
"PSEUDOCAP",
"REACTOME",
550 "REBASE",
"REFSEQ",
"REPRODUCTION-2DPAGE",
551 "RGD",
"RZPD",
"SABIO-RK",
552 "SFLD",
"SGD",
"SIGNALINK",
553 "SIGNALLINK",
"SIGNOR",
"SMART",
554 "SMR",
"STRING",
"SUPFAM",
555 "SWISS-2DPAGE",
"SWISSLIPIDS",
"SWISSPALM",
556 "TAIR",
"TCDB",
"TIGRFAMS",
557 "TOPDOWNPROTEOMICS",
"TREEFAM",
"TUBERCULIST",
558 "UCD-2DPAGE",
"UCSC",
"UNICARBKB",
559 "UNIGENE",
"UNILIB",
"UNIPATHWAY",
560 "UNITE",
"VBASE2",
"VECTORBASE",
561 "VEGA-TR",
"VEGA-GN",
"VGNC",
562 "WBPARASITE",
"WORLD-2DPAGE",
"WORMBASE",
563 "XENBASE",
"ZFIN",
nullptr 567 "CHLOROPLAST",
"CYANELLE",
"MITOCHONDRION",
"PLASMID",
"NUCLEOMORPH",
568 "HYDROGENOSOME",
"APICOPLAST",
"CHROMATOPHORE",
569 "ORGANELLAR CHROMATOPHORE",
nullptr 573 "Evidence at protein level",
574 "Evidence at transcript level",
575 "Inferred from homology",
596 if(delim && *delim !=
'\0'&& ! dest.empty())
616 tag->SetTag().SetStr(
str);
667pdb_seq_id->SetChain(chain);
671sid->
SetPdb(*pdb_seq_id);
686 if(! mol || ! chain)
690 for(bad =
false, got =
false, q = chain; *q !=
'\0'; q = p) {
691 while(*q ==
' '|| *q ==
',')
693 for(p = q; *p !=
'\0'&& *p !=
' '&& *p !=
',';)
703 for(
r= q; *
r!=
'\0';
r++) {
706 if(
r[1] !=
'/'&&
r[1] !=
'\0') {
707 while(*
r!=
'/'&& *
r!=
'\0')
745pdb_seq_id->SetRel(*date);
749sid->
SetPdb(*pdb_seq_id);
758 if(choice < 1 || choice > 4)
765}
else if(choice == 4) {
776 for(
const auto& dbp : dbl)
778 for(
const auto& subdbp : dbp.GetSubBlocks())
782 for(p += 8; *p ==
' ';)
803 if(gmod == 7 || gmod == 8)
822 while(*eptr ==
' '&& eptr >
offset)
827 while(*eptr ==
' '&& eptr >
offset)
849 auto& os_blk = *dbp;
858 for(
auto& ref_blk : chain) {
899 if(str_.find(
"(GENE NAME") != string::npos) {
925 for(p = ptr + shift; *p ==
' ';)
928 if(*p == symb || *p ==
'\0') {
933 while(*p ==
'.'|| *p ==
'-'|| *p ==
'n'||
isdigit(*p) != 0)
936 while(*p ==
' '|| *p ==
')')
947 if(ptr[8] ==
'\0') {
951 if(ptr[8] !=
' ') {
956 for(q = ptr + 8;;) {
959 if(! q || (p && q > p))
965 else if(q[9] ==
'\0')
967 else if(q[9] ==
's'|| q[9] ==
'S') {
970 else if(q[10] ==
'\0')
983 if(ptr[8] ==
'\0') {
1012 for(p = ptr + 8; *p !=
'\0'&& *p !=
')';)
1014 while(*p ==
' '|| *p ==
')')
1023 if(! s.empty() && s.back() ==
'.') {
1025 while(! s.empty() && s.back() ==
' ')
1042 for(
size_t i= 0;
i< dbp.
mBuf.
len;
i++)
1043 if(dbp.
mBuf.
ptr[
i] ==
'\n')
1047 for(q = dbp.
mBuf.
ptr; *q !=
'\0';) {
1052 for(q += 5; *q !=
'\n'&& *q !=
'\0'; q++)
1060 while(*p ==
'.'|| *p ==
' '|| *p ==
'\t') {
1078 if(! line || line[0] ==
'\0')
1080 for(p = line; *p ==
' '|| *p ==
'\t'|| *p ==
'.'|| *p ==
',';)
1096 for(
r= p - 1; *
r==
' '|| *
r==
'\t';
r--) {
1104 autotssp = res->
syn.before_begin();
1106 for(p++; *p ==
' '|| *p ==
'\t';)
1109 for(
i= 1; *p !=
'\0'; p++) {
1112 else if(*p ==
')')
1118tssp = res->
syn.insert_after(tssp, q);
1123 for(
r= p - 1; *
r==
' '|| *
r==
'\t';
r--) {
1129tssp = res->
syn.insert_after(tssp, q);
1149 size_t len= taxname.size();
1153 const Char* p = taxname.c_str() +
len- 3;
1154 if((p[0] ==
' '|| p[0] ==
'\t') && (p[1] ==
's'|| p[1] ==
'S') &&
1155(p[2] ==
'p'|| p[2] ==
'P') && p[3] ==
'\0') {
1176 if(sosp->
name&& sosp->
name[0] !=
'\0')
1179 for(
autosynsp = sosp->
syn.begin(); synsp != sosp->
syn.end(); ++synsp) {
1180 auto& syn = *synsp;
1184 char* p = syn.data();
1189 i= (*q ==
'C'|| *q ==
'c') ? 5 : 7;
1194 if((q == p || q[0] ==
' '|| q[0] ==
'\t') &&
1195(q[
i] ==
' '|| q[
i] ==
'\t'|| q[
i] ==
'\0')) {
1208 if((
StringEquNI(p,
"PV.", 3) && (p[3] ==
' '|| p[3] ==
'\t'|| p[3] ==
'\0')) ||
1221 for(q = p; *p !=
'\0'&& *p !=
' '&& *p !=
'\t';)
1224org_ref->
SetSyn().push_back(q);
1228string_view modifier(syn.data(), p);
1229 for(q = p + 1; *q ==
' '|| *q ==
'\t';)
1236org_ref->
SetSyn().push_back(q);
1247 for(
b=
org_mods, num = 2; *
b;
b++, num++) {
1257 if(*p ==
' '&& (p[
i] ==
' '|| p[
i] ==
'\t'|| p[
i] ==
'\0')) {
1267org_ref->
SetSyn().push_back(syn);
1272 if(! taxname.empty())
1294sosp->
syn.clear();
1307 for(; dbp != dbp_end; ++dbp)
1313 const auto& subblocks = dbp->GetSubBlocks();
1314 autosubdbp = subblocks.cbegin();
1315 for(; subdbp != subblocks.cend(); ++subdbp)
1318 if(subdbp == subblocks.cend())
1322 autotvhp = vhl.before_begin();
1324line =
StringNew(subdbp->mBuf.len + 1);
1325ch = subdbp->mBuf.ptr[subdbp->mBuf.len - 1];
1326subdbp->mBuf.ptr[subdbp->mBuf.len - 1] =
'\0';
1330subdbp->mBuf.ptr[subdbp->mBuf.len - 1] = ch;
1332 if(!
StringEquNI(line,
"\nOH NCBI_TaxID=", 17)) {
1352 for(p += 17, q = p; *q ==
' ';)
1356 if((!
r||
r> p) && p) {
1357tvhp = vhl.emplace_after(tvhp);
1358 for(p--; *p ==
';'|| *p ==
' ';)
1361 for(
r= q; *
r>=
'0'&& *
r<=
'9';)
1369 for(p++; *p ==
' '|| *p ==
';';)
1376 while((*
r==
' '|| *
r==
'.'|| *
r==
'\0') &&
r> p)
1378 if(*
r!=
'\0'&& *
r!=
'.'&& *
r!=
' ')
1412 for(; dbp != dbp_end; ++dbp) {
1416 for(
const auto& subdbp : dbp->GetSubBlocks()) {
1420line =
StringSave(string_view(subdbp.mBuf.ptr, subdbp.mBuf.len - 1));
1424 if(!
StringEquNI(line,
"OX NCBI_TaxID=", 16)) {
1434 for(q = p; *q ==
' ';)
1440 for(p = line + 16; *p ==
' ';)
1446 for(q = p; *q >=
'0'&& *q <=
'9';)
1448 if(*q ==
' '|| *q ==
'\0')
1450 if(taxid <=
ZERO_TAX_ID|| (*q !=
' '&& *q !=
'\0')) {
1477 for(
autodbp = entry; dbp != end; ++dbp) {
1481 for(
const auto& subdbp : dbp->GetSubBlocks()) {
1490 if(line_OS && line_OS[0] !=
'\0') {
1492 if(sosp && sosp->
name&& sosp->
name[0] !=
'\0') {
1500 if(org_ref.
NotEmpty() && line_OC && line_OC[0] !=
'\0') {
1511 char*
offset=
nullptr;
1512 char* eptr =
nullptr;
1518 for(
const auto& os_blk : chain) {
1522 for(
const auto& subdbp : os_blk.GetSubBlocks()) {
1527eptr =
offset+ subdbp.mBuf.len;
1539 while(*
str==
' ')
1542 for(ptr =
str; *ptr !=
'\n'&& *ptr !=
' ';)
1546plasms.push_back(
string(
str, ptr));
1566 if(! p || *p ==
'\0')
1570 if(*p ==
'\0'|| *p ==
'\n')
1572 if((*p ==
';'|| *p ==
'.') && (p[1] ==
' '|| p[1] ==
'\n'))
1576 if(*p ==
'\0'|| *p ==
'\n')
1583 while(*p ==
' '|| *p ==
';'|| *p ==
'.')
1596 char* end =
nullptr;
1600 if(!
str|| *
str==
'\0')
1603 if(
str[0] ==
'-') {
1608lID = strtoll(
str+ 1, &end, 10);
1609 if((lID == 0 &&
str+ 1 == end) || (lID == LLONG_MAX && errno == ERANGE)) {
1616 if(*
str==
'G') {
1619}
else if(*
str==
'E'|| *
str==
'D') {
1621 tag->SetDb(
"PID");
1622 tag->SetTag().SetStr(
str);
1641 for(string_view it : L)
1645 autodot =
str.find(
'.');
1646 if(dot != string_view::npos) {
1647string_view acc2 =
str.substr(0, dot);
1648 for(string_view it : L) {
1649 autod = it.find(
'.');
1650 if(d != string_view::npos) {
1651string_view acc1 = it.substr(0, d);
1659 autotail = L.before_begin();
1660 while(
next(tail) != L.end())
1662L.emplace_after(tail,
str);
1670 for(CSP_block::TSeqref::iterator cur_ref = refs.begin(); cur_ref != refs.end(); ++cur_ref) {
1671 if((*cur_ref)->Which() !=
CSeq_id::e_Pdb|| (*cur_ref)->GetPdb().IsSetRel())
1676 const CPDB_seq_id& cur_id = (*cur_ref)->GetPdb();
1677CSP_block::TSeqref::iterator next_ref = cur_ref;
1679 for(++next_ref; next_ref != refs.end();) {
1681(*next_ref)->GetPdb().IsSetRel())
1684 const CPDB_seq_id& next_id = (*next_ref)->GetPdb();
1692 if(! got && cur_id.
GetChain() == 32) {
1706next_ref = refs.erase(next_ref);
1719 if(embl_acc_list.empty() ||
next(embl_acc_list.cbegin()) == embl_acc_list.cend())
1722 for(
autoit = embl_acc_list.cbegin(); it != embl_acc_list.cend(); ++it) {
1723string_view pid = it->pid;
1724 autodot = pid.find(
'.');
1725 if(dot != string_view::npos) {
1726 for(
autop = pid.begin() + dot + 1; p != pid.end(); ++p) {
1727 if(*p >=
'0'&& *p <=
'9')
1733 for(
autoit2 =
next(it); it2 != embl_acc_list.cend(); ++it2) {
1734 if(it->choice != it2->choice && pid == string_view(it2->pid)) {
1811forward_list<string> acc_list,
1819 const char* token1;
1830 boolcheck_embl_prot;
1851 autoembl_tail = embl_acc_list.before_begin();
1852check_embl_prot =
false;
1853 for(ptr =
str;;) {
1868 if(! token1 || ! token2 || ! token3 ||
1899 if(
id.NotEmpty())
1903ntype =
GetNucAccOwner(p ? string_view(token2, p) : string_view(token2));
1906}
else if(
AddToList(acc_list, token2)) {
1909 if(
id.NotEmpty())
1916 if(token3[0] >=
'A'&& token3[0] <=
'Z'&&
1917token3[1] >=
'A'&& token3[1] <=
'Z') {
1921 for(q = p + 1; *q >=
'0'&& *q <=
'9';)
1923 if(q == p + 1 || *q !=
'\0')
1934embl_tail = embl_acc_list.emplace_after(embl_tail, ntype, token2, token3);
1938check_embl_prot =
true;
1950 if(
id.NotEmpty())
1959 if(
AddToList(ens_tran_list, token2)) {
1961 if(
tag.NotEmpty())
1965 if(!
AddToList(ens_prot_list, token3)) {
1969 if(
tag.NotEmpty())
1973 if(token4 &&
AddToList(ens_gene_list, token4)) {
1975 if(
tag.NotEmpty())
1980 if(token2[0] >=
'A'&& token2[0] <=
'Z'&&
1981token2[1] >=
'A'&& token2[1] <=
'Z') {
1985 for(q = p + 1; *q >=
'0'&& *q <=
'9';)
1987 if(q == p + 1 || *q !=
'\0')
2005 if(
id.NotEmpty())
2009token1 =
"Reactome";
2013token1 =
"PomBase";
2019 if(
tag.NotEmpty()) {
2022 for(
const auto& cur_tag : spb.
SetDbref()) {
2023 if(
tag->Match(*cur_tag)) {
2034 if(! embl_acc_list.empty()) {
2035 if(check_embl_prot)
2037embl_acc_list.clear();
2042ens_tran_list.clear();
2043ens_prot_list.clear();
2044ens_gene_list.clear();
2047 if(pdbold && pdbnew) {
2052 if(pdbnew && spb.
SetSeqref().size() > 1)
2084list<CTempString> dtlines;
2092 for(string_view line : dtlines) {
2103static_assert(
"sequence version"sv.size() == 16);
2108 autop = line.begin() + pos;
2109 autoe = line.end();
2110 while(p < e && *p ==
' ')
2113 while(p < e && *p >=
'0'&& *p <=
'9')
2115 if(p + 1 == e && *p ==
'.') {
2116*ver_num =
fta_atoi(string_view(q, p));
2124 for(string_view line : dtlines) {
2145}
else if(
first> 1) {
2148}
else if(second == 0) {
2151}
else if(second > 1) {
2154}
else if(third == 0) {
2157}
else if(third > 1) {
2160}
else if(std_crdate.
Empty()) {
2163}
else if(std_sequpd.
Empty()) {
2166}
else if(std_annotupd.
Empty()) {
2169}
else if(ver_num && *ver_num < 1) {
2175crdate.
SetStd(*std_crdate);
2176 sequpd.SetStd(*std_sequpd);
2177annotupd.
SetStd(*std_annotupd);
2219ibp->
wgssec[0] =
'\0';
2222 if(spb->SetExtra_acc().empty())
2223spb->ResetExtra_acc();
2229 i=
GetSPDate(pp->
source, entry, spb->SetCreated(), spb->SetSequpd(), spb->SetAnnotupd(), &ver_num);
2231 i=
GetSPDate(pp->
source, entry, spb->SetCreated(), spb->SetSequpd(), spb->SetAnnotupd(),
nullptr);
2234 if(spb->SetPlasnm().empty())
2243 for(
auto& cur_id : bioseq.
SetId()) {
2244 if(! cur_id->IsSwissprot())
2252 id.SetRelease(
"reviewed");
2254 id.SetRelease(
"reviewed");
2261descr->
SetSp(*spb);
2262bioseq.
SetDescr().Set().push_back(descr);
2281 for(p = line; *p ==
' ';)
2290 while(*p !=
'\0') {
2296 if(p > line && *(p - 1) !=
'-')
2298 for(++p; *p ==
' ';)
2301 for(p += 3; *p ==
' ';)
2308 for(--q; q > com && *q ==
' ';)
2321descrs.push_back(descr);
2358 for(q = p; q >
offset&& *q !=
'\n';)
2386 if(
count== 0 && cla != 2)
2399 for(p =
tmp; p > bptr && *p !=
'\n';)
2420 for(
const auto& cur_ref : spb.
GetSeqref()) {
2421 if(! cur_ref->IsPir())
2425text_id->Assign(cur_ref->GetPir());
2428rep_id->
SetPir(*text_id);
2430rep_ids.push_back(rep_id);
2433 if(rep_ids.empty())
2448 if(! orpname && ! ohname)
2450 if(! orpname || ! ohname)
2453 for(p = orpname, q = ohname; *p !=
'\0'&& *q !=
'\0'; p++, q++) {
2455 if(chp >=
'a'&& chp <=
'z')
2458 if(chq >=
'a'&& chq <=
'z')
2471 if(*q ==
'('|| *q ==
'\0')
2481 boolfragment =
false;
2492 if(! title.empty()) {
2495descr.
Set().push_back(desc_new);
2508 for(
const string& cur_acc : spb->GetExtra_acc()) {
2513text_id->SetAccession(cur_acc);
2517rep_ids.push_back(rep_id);
2520 if(! rep_ids.empty()) {
2526 if(spb->CanGetCreated()) {
2530descr.
Set().push_back(create_date_descr);
2533 boolhas_update_date = spb->CanGetAnnotupd() || spb->CanGetSequpd();
2536 if(has_update_date) {
2537 if(spb->CanGetAnnotupd() && spb->CanGetSequpd()) {
2539}
else if(spb->CanGetAnnotupd())
2540upd_date.
Assign(spb->GetAnnotupd());
2542upd_date.
Assign(spb->GetSequpd());
2547descr.
Set().push_back(upd_date_descr);
2550 if(spb->CanGetCreated() && has_update_date &&
2552 stringupd_date_str, create_date_str;
2554upd_date.
GetDate(&upd_date_str);
2555spb->GetCreated().GetDate(&create_date_str);
2565 for(
autodbp = chain.cbegin(); dbp != chain.cend(); ++dbp) {
2574 if(org_ref.
Empty())
2581bio_src->
SetOrg(*org_ref);
2587 if(bio_src.
Empty()) {
2593bio_src->
SetOrg(*org_ref);
2601 if(bio_src.
Empty())
2605 if(! vhl.empty()) {
2608 for(; ! vhl.empty(); vhl.pop_front()) {
2609 const auto& vh = vhl.front();
2613 mod->SetSubname(vh.name);
2622 if(org_ref_cur.
Empty()) {
2628vector<Char> org_taxname;
2630 const string& cur_taxname = org_ref_cur->
GetTaxname();
2631org_taxname.assign(cur_taxname.begin(), cur_taxname.end());
2634org_taxname.push_back(0);
2639 "OH-line HostName \"{}\" does not match NCBI organism name \"{}\" obtained by lookup of NCBI TaxID \"{}\".",
2651descr.
Set().push_back(bio_src_desc);
2663descr.
Set().push_back(mol_info_descr);
2667 for(
auto& ref_blk : chain) {
2674pub_desc_descr->
SetPub(*pub_desc);
2676descr.
Set().push_back(pub_desc_descr);
2708 if(fi1.
key!= fi2.
key||
2710fi1.
to!= fi2.
to||
2720 if(spfil.empty() ||
next(spfil.begin()) == spfil.end())
2723 for(
autospfip = spfil.begin(); spfip != spfil.end() &&
next(spfip) != spfil.end(); ++spfip) {
2724 autofipprev = spfip;
2725 for(
autofip =
next(fipprev); fip != spfil.end();) {
2731fip = spfil.erase_after(fipprev);
2749 if(! p || p == temp ||
2750(*(p - 1) !=
' '&& *(p - 1) !=
'\n') || (p[2] !=
' '&& p[2] !=
'\n')) {
2756 for(p--; p > temp && (*p ==
' '|| *p ==
'\n');)
2758 if(*p < 'A' || *p >
'Z') {
2765 while(p > temp && (*p ==
'\n'|| (*p >=
'A'&& *p <=
'Z')))
2772 while(*p >=
'A'&& *p <=
'Z'&& p < end)
2776 for(q = p; *p ==
'\n'; p++)
2781 while(*p ==
' '|| *p ==
'\n')
2783 for(p += 2; *p ==
' '|| *p ==
'\n';)
2786 if(*p < 'A' || *p >
'Z') {
2792 for(q = p; *q ==
'\n'|| (*q >=
'A'&& *q <=
'Z');)
2794 if(q > p && *(q - 1) ==
'\n') {
2795 for(q--; *q ==
'\n'&& q > p;)
2803 while(*p >=
'A'&& *p <=
'Z'&& p < end)
2807 for(q = p; *p ==
'\n'; p++)
2812 for(p = temp; *p !=
'\0'; p++)
2833 const char* defdelim;
2860 autocurrent = spfil.before_begin();
2862 while(bptr < eptr && (endline =
SrchTheChar(string_view(bptr, eptr),
'\n'))) {
2865 for(p = bptr,
i= 0; *p !=
' '&& *p !=
'\n'&&
i< 8;
i++)
2867temp.
key.assign(bptr, p);
2870 if(temp.
key==
"VAR_SEQ")
2875 for(bptr += 8; *bptr ==
' '&& bptr <= endline;)
2880 if(((*bptr >=
'a'&& *bptr <=
'z') || (*bptr >=
'A'&& *bptr <=
'Z')) &&
2882 for(bptr += 7; *bptr >=
'0'&& *bptr <=
'9'&& bptr <= endline;)
2884 for(; *bptr ==
':'&& bptr <= endline;)
2888 for(ptr1 = bptr; *ptr1 ==
'?'|| *ptr1 ==
'>'|| *ptr1 ==
'<'||
2889(*ptr1 >=
'0'&& *ptr1 <=
'9');)
2892 if(bptr < ptr1 && ptr1 <= endline) {
2893temp.
from.assign(bptr, ptr1);
2900 if(! p || (q && q < p))
2912temp.
from.assign(
"-1");
2913fromstart =
nullptr;
2917new_format =
false;
2919 for(; (*bptr ==
' '|| *bptr ==
'.') && bptr <= endline; bptr++)
2922 for(ptr1 = bptr; *ptr1 ==
'?'|| *ptr1 ==
'>'|| *ptr1 ==
'<'||
2923(*ptr1 >=
'0'&& *ptr1 <=
'9');)
2926p = (
char*)temp.
from.c_str();
2927 if(*p ==
'<'|| *p ==
'>')
2930 for(q = ptr1; *q ==
' ';)
2932extra_text =
false;
2933 if(bptr < ptr1 && ptr1 <= endline) {
2934 if(*q !=
'\n'&& new_format && (*p ==
'?'||
fta_atoi(p) != -1))
2936temp.
to.assign(bptr, ptr1);
2937}
else if(fromstart) {
2938 if(*q !=
'\n'&& (*p ==
'?'||
fta_atoi(p) != -1))
2940temp.
to.assign(fromstart, fromend);
2942 if(*q !=
'\n'&& (*p ==
'?'||
fta_atoi(p) != -1))
2944temp.
to.assign(
"-1");
2947q = (
char*)temp.
to.c_str();
2948 if(*q ==
'<'|| *q ==
'>')
2950 if(extra_text || (*p !=
'?'&& *q !=
'?'&& (
fta_atoi(p) >
fta_atoi(q)))) {
2954 if(! p || (q && q < p))
2963temp.
from.assign(
"-1");
2966 for(bptr = ptr1; *bptr ==
' '&& bptr <= endline;)
2972 if(*--
str==
'-'&&
str> bptr)
2973 if(*--
str!=
' ')
2975 if(bptr <= endline)
2976temp.
descrip.assign(bptr, endline);
2978 for(bptr = endline; *bptr ==
' '|| *bptr ==
'\n';)
2983 while(bptr < eptr && (*bptr ==
' '))
2985 while(*bptr ==
' ')
2991}
else if(
StringEquN(bptr,
"/evidence=\"", 11)) {
3004 if(*bptr ==
'/') {
3005 for(p = bptr + 1; (*p >=
'a'&& *p <=
'z') || (*p >=
'A'&& *p <=
'Z') || (*p >=
'0'&& *p <=
'9') || *p ==
'_';)
3007 if(*p ==
'='&& p[1] ==
'\"') {
3017endline =
SrchTheChar(string_view(bptr, eptr),
'\n');
3019 if(p >= bptr && *p ==
'\"')
3026 if(p && p - 1 >= bptr && *(p - 1) ==
'.')
3031 if(p && p - 1 >= bptr && *(p - 1) ==
'.')
3043 if(*--
str==
'-'&&
str> bptr)
3044 if(*--
str!=
' ')
3046 for(bptr = endline; *bptr ==
' '|| *bptr ==
'\n';)
3057 if(*defdelim ==
'\n')
3060p = (
char*)temp.
from.c_str();
3061 if(*p ==
'<'|| *p ==
'>')
3063 if(*p !=
'?'&&
fta_atoi(p) < 0) {
3067q = (
char*)temp.
to.c_str();
3068 if(*q ==
'<'|| *q ==
'>')
3076current = spfil.insert_after(current, temp);
3105 boolfuzzfrom =
false;
3106 boolfuzzto =
false;
3107 boolnofrom =
false;
3109 boolpntfuzz =
false;
3113 if(spfip.
from.empty() || spfip.
to.empty())
3120ptr = spfip.
from.c_str();
3124 while(*ptr !=
'\0'&&
isdigit(*ptr) == 0)
3133 if((initmet ==
false&& from != 0) ||
3134(initmet && signal && from == 1))
3137ptr = spfip.
to.c_str();
3140 while(*ptr !=
'\0'&&
isdigit(*ptr) == 0)
3149 if(initmet ==
false&& to != 0)
3172}
else if(from != to && ! pntfuzz) {
3175interval.
SetTo(to);
3196}
else if(fuzzfrom) {
3283 if((pos != 0 && retstr[pos - 1] !=
' '&& retstr[pos - 1] !=
'.') ||
3284(retstr[pos +
len] !=
'\0'&& retstr[pos +
len] !=
' '&&
3285retstr[pos +
len] !=
'.'&& retstr[pos +
len] !=
';'))
3327fbp->
key=
"VAR_SEQ";
3332fbp->
key=
"SE_CYS";
3335fbp->
key=
"MOD_RES";
3352feat->
SetData().SetImp().SetDescr(
"uncertain amino acids");
3375 if(! pp->
debug) {
3377 if(! descrip.empty())
3388 if(! descrip.empty())
3411 for(
auto& temp : spfil) {
3416temp.key =
"VAR_SEQ";
3421temp.key =
"SE_CYS";
3422temp.descrip.clear();
3424temp.key =
"MOD_RES";
3458feat->
SetData().SetImp().SetDescr(
"uncertain amino acids");
3483 if(! temp.descrip.empty())
3486feats.push_back(feat);
3495 autop =
str.cbegin();
3496 autoq =
str.cend();
3497 while(p < q && (*p ==
' '|| *p ==
'\t'))
3501 while(p < q && (*q ==
' '|| *q ==
'\t'))
3508 while(pp < qq && *pp ==
'(')
3510 while(pp < qq && *(qq - 1) ==
')')
3514 for(
auto r= pp;
r< qq;
r++) {
3517 else if(*
r==
')') {
3519 count= left - right;
3522 for(; count < 0 && pp > p; pp--)
3525 for(
auto r= qq;
r> pp;) {
3529 else if(*
r==
')')
3532 for(;
count< 0 && qq < q; qq++)
3536 if(qq <
str.end())
3537 str.erase(qq,
str.end());
3538 if(pp >
str.begin())
3539 str.erase(
str.begin(), pp);
3556 for(
charc : gname)
3557 if(! (
isalnum(c) || c ==
'_'|| c ==
'-'|| c ==
'.'||
3558c ==
'\''|| c ==
'`'|| c ==
'/'|| c ==
'('|| c ==
')')) {
3579 for(
autop =
str.begin(), e =
str.end(); p < e;) {
3580 while(p < e && *p ==
' ')
3583 while(p < e && *p !=
' ')
3586string_view tok(q, p);
3589 if(tok ==
"AND"sv || tok ==
"OR"sv)
3598gene.
SetSyn().push_back(gname);
3643 for(
char& c :
str)
3653feats.push_back(feat);
3678 for(p =
str; p && *p !=
'\0'; p = q) {
3679 while(*p ==
' '|| *p ==
',')
3691gene.
SetSyn().push_back(p);
3701(! name && ! syns && ! ltags && ! orfs))
3721feats.push_back(feat);
3738 if(! pp || pp->
entrylist.empty() || bptr.empty())
3751 for(p =
str; p && *p !=
'\0'; p = q) {
3752 while(*p ==
' '|| *p ==
';')
3754 for(
r= p;;
r= q + 1) {
3756 if(! q || q[1] ==
' '|| q[1] ==
'\n'|| q[1] ==
'\0')
3779}
else if(
StringEquNI(p,
"OrderedLocusNames=", 18)) {
3802 if(! name && ! syns && ! ltags && ! orfs)
3805 if(! name && syns) {
3824 if(! name && ! syns && ! ltags && ! orfs)
3908 for(
count= 0, q =
buf;; q = p) {
3920 if(*p ==
'.'|| *p ==
'\0') {
3925 while(*p >=
'0'&& *p <=
'9')
3927 if(*q ==
'n'&& (*p ==
'.'|| *p ==
'\0')) {
3940 if(
count!= 4 || *p !=
'\0') {
3950 for(; sfp != sfl.cend(); ++sfp) {
3954 if(sfp->tag !=
tag)
3957 prot.SetName().push_back(
string(sfp->start, sfp->end));
3968 for(
autotsfp = sfl.cbegin(); tsfp != sfl.cend(); ++tsfp) {
3975 for(
autotsfp = sfl.cbegin(); tsfp != sfl.cend(); ++tsfp) {
3978 for(++tsfp; tsfp != sfl.cend(); ++tsfp) {
3985 if(tsfp == sfl.cend())
3992}
else if(rcount == 0 && ! is_trembl) {
3997 if(scount > 0 && ! is_trembl) {
4002 if(fcount == 0 && rcount > 0) {
4022 for(
const auto&
id: ids) {
4023 if(! id->IsSwissprot())
4026 if(id->GetSwissprot().IsSetRelease() &&
4031sfl.emplace_front(0,
nullptr);
4032 autotsfp = sfl.begin();
4035 for(p =
str; *p !=
'\0';) {
4039 while(*p !=
'\0'&& *p !=
' ')
4041string_view sv(q, p);
4047 if(cilp->
num== 0)
4050 if(tsfp->tag != 0) {
4051 if(q == tsfp->start)
4055 for(
r= q - 1; *
r==
' '|| *
r==
';';)
4066 for(
r= q + cilp->
str.size(); *
r==
' ';)
4068tsfp = sfl.emplace_after(tsfp, cilp->
num,
r);
4078 for(
autotsfp = sfl.cbegin(); tsfp != sfl.cend(); ++tsfp)
4081 for(
autotsfp = sfl.cbegin(); tsfp != sfl.cend(); ++tsfp)
4085 for(
autotsfp = sfl.cbegin(); tsfp != sfl.cend(); ++tsfp)
4088 for(
autotsfp = sfl.cbegin(); tsfp != sfl.cend(); ++tsfp)
4092 for(
autotsfp = sfl.cbegin(); tsfp != sfl.cend(); ++tsfp)
4095 for(
autotsfp = sfl.cbegin(); tsfp != sfl.cend(); ++tsfp)
4114 for(q =
buf+ 2; *q ==
' ';)
4118 for(p++; *p ==
' ';)
4133qual->
SetQual(
"UniProtKB_evidence");
4135feat.
SetQual().push_back(qual);
4186 while(! str_.empty()) {
4187 charc = str_.back();
4188 if(c ==
'.'|| c ==
';'|| c ==
',')
4220str1.assign(
str, ptr);
4223 while(*ptr ==
' ')
4227 for(bptr = ptr; *ptr !=
'\0'&& *ptr !=
' '&& *ptr != symb;)
4230 stringecnum(bptr, ptr);
4233 if(! ecnum.empty())
4234 prot.SetEc().push_back(ecnum);
4240 while(*ptr !=
'\0'&& (*ptr ==
' '|| *ptr == symb))
4255str1.assign(
str, ptr);
4263 if(!
prot.IsSetName())
4264 prot.SetName().push_back(
str);
4281feats.push_back(feat);
4310SPSegLocList::iterator curspslp;
4317 for(
const auto& spfip : spfil) {
4318 if(spfip.key !=
"NON_CONS")
4321 if(spsll.empty()) {
4322spsll.emplace_front(0);
4323curspslp = spsll.begin();
4326 const char* p = spfip.from.c_str();
4327 if(*p ==
'<'|| *p ==
'>'|| *p ==
'?')
4330curspslp->len = from - curspslp->from;
4331curspslp = spsll.emplace_after(curspslp, from);
4334 for(
auto& descr : bioseq.
SetDescr().Set()) {
4335 if(! descr->IsMolinfo())
4340 else if(spfbp->
noleft)
4346 if(! spsll.empty())
4347curspslp->len = bioseq.
GetLength() - curspslp->from;
4365 autospfip = spfil.cbegin();
4366 autotemp = spfil.cend();
4368 for(; spfip != spfil.cend(); ++spfip) {
4369 if(spfip->key !=
"INIT_MET")
4376 const char* p = spfip->from.c_str();
4377 if(*p ==
'<'|| *p ==
'>'|| *p ==
'?')
4380p = spfip->to.c_str();
4381 if(*p ==
'<'|| *p ==
'>'|| *p ==
'?')
4385 if((from != 0 || to != 0) && (from != 1 || to != 1))
4393 if(spfip != spfil.cend()) {
4398 if(! temp->descrip.empty()) {
4406 string& sequence =
data.SetIupacaa().Set();
4413sequence.insert(sequence.begin(),
'M');
4414bioseq.
SetInst().SetLength(
static_cast<TSeqPos>(sequence.size()));
4415}
else if(sequence.empty() || sequence[0] !=
'M')
4443 for(
auto& descr : bioseq.
SetDescr().Set()) {
4444 if(! descr->IsMolinfo())
4447mol_info = &(descr->SetMolinfo());
4452 for(
const auto& temp : spfil) {
4453 if(temp.key ==
"NON_CONS") {
4458 if(temp.key !=
"NON_TER")
4492 const string& sequence =
data.GetIupacaa().Get();
4494 for(string::const_iterator
value= sequence.begin();
value!= sequence.end(); ++
value) {
4495 if(*
value!=
'X') {
4519 for(
autospslp = spsll.begin(); spslp != spsll.end(); ++spslp) {
4521 if(! deltas.
Set().empty()) {
4522 delta->SetLiteral().SetLength(0);
4523 delta->SetLiteral().SetFuzz().SetLim();
4529 delta->SetLiteral().SetLength(spslp->len);
4532 stringdata_str = bioseq_data.substr(spslp->from, spslp->len);
4534 delta->SetLiteral().SetSeq_data().SetIupacaa().Set(data_str);
4538 if(deltas.
Set().size() > 1) {
4540bioseq.
SetInst().ResetSeq_data();
4542bioseq.
SetInst().SetExt().Reset();
4566 if(! spfil.empty()) {
4578 if(! spsll.empty())
4581 if(! feats.empty()) {
4583annot->
SetData().SetFtable().swap(feats);
4584bioseq.
SetAnnot().push_back(annot);
4601eptr = ptr + entry.
mBuf.
len;
4642 if(! ibp->
drop) {
4651 if(! ibp->
drop) {
4652pResult = entry->GetEntryData()->seq_entry;
4657 if(! ibp->
drop) {
4671 "Parsing completed, {} entr{} parsed",
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void err_install(const Indexblk *ibp, bool accver)
string tata_save(string_view t)
void StripECO(string &str)
void StripSerialNumbers(TEntryList &seq_entries)
void GetSequenceOfKeywords(const DataBlk &entry, int type, Uint2 col_data, TKeywordList &keywords)
char * GetEmblBlock(TDataBlkList &chain, char *ptr, short *retkw, Parser::EFormat format, char *eptr)
void BuildSubBlock(DataBlk &dbp, Int2 subtype, string_view subkw)
CRef< CSeq_id > MakeAccSeqId(const char *acc, Uint1 seqtype, bool accver, Int2 vernum)
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
CRef< CSeq_id > MakeLocusSeqId(const char *locus, CSeq_id::E_Choice seqtype)
unsigned char *const GetProtConvTable()
void GetExtraAccession(IndexblkPtr ibp, bool allow_uwsec, Parser::ESource source, TAccessionList &accessions)
void GetLenSubNode(DataBlk &dbp)
void ShrinkSpaces(char *line)
CRef< CBioseq > CreateEntryBioseq(ParserPtr pp)
void fta_sort_biosource(objects::CBioSource &bio)
TSeqPos GetLength(void) const
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
@ eCompare_after
*this comes second.
@Gb_qual.hpp User-defined methods of the data storage class.
@OrgMod.hpp User-defined methods of the data storage class.
@Seq_descr.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
CRef< objects::CSeq_entry > xGetEntry() override
void PostTotals() override
EntryBlk * GetEntryData() const
struct DataBlk::@1166 mBuf
DataBlk * LoadEntry(ParserPtr pp, size_t offset, size_t len)
#define ERR_GENENAME_IllegalGeneName
#define ERR_SOURCE_HostNameVsTaxIDMissMatch
#define ERR_FEATURE_PartialNoNonTerNonCons
#define ERR_FORMAT_UnexpectedData
#define ERR_FEATURE_ExpectEmptyComment
#define ERR_FORMAT_NoProteinNameCategory
#define ERR_FORMAT_MultipleRecName
#define ERR_SOURCE_InvalidNcbiTaxID
#define ERR_SOURCE_UnknownOHType
#define ERR_GENENAME_DELineGeneName
#define ERR_SOURCE_NcbiTaxIDLookupFailure
#define ERR_FORMAT_InvalidPDBCrossRef
#define ERR_FORMAT_ECNumberNotPresent
#define ERR_FORMAT_MixedPDBXrefs
#define ERR_ENTRY_Skipped
#define ERR_FEATURE_UnEqualEndPoint
#define ERR_SOURCE_OrgNameVsTaxIDMissMatch
#define ERR_FORMAT_MissingCopyright
#define ERR_SOURCE_MissingPlasmidName
#define ERR_FEATURE_Invalid_INIT_MET
#define ERR_FEATURE_InvalidQualifier
#define ERR_FEATURE_BadLocation
#define ERR_REFERENCE_IllegalDate
#define ERR_FORMAT_MissingFullRecName
#define ERR_FORMAT_SwissProtHasSubName
#define ERR_FEATURE_UnknownFeatKey
#define ERR_SOURCE_UnknownOXType
#define ERR_DRXREF_UnknownDBname
#define ERR_SOURCE_NoNcbiTaxIDLookup
#define ERR_FEATURE_ObsoleteFeature
#define ERR_FEATURE_Dropped
#define ERR_ENTRY_ParsingComplete
#define ERR_FEATURE_MissingInitMet
#define ERR_SOURCE_IncorrectOHLine
#define ERR_FORMAT_MissingGeneName
#define ERR_LOCATION_FailedCheck
#define ERR_FORMAT_InvalidECNumber
#define ERR_QUALIFIER_InvalidEvidence
#define ERR_DATE_IllegalDate
#define ERR_FORMAT_UnknownGeneField
#define ERR_FEATURE_NotSeqEndPoint
#define ERR_FEATURE_NoFragment
#define ERR_SPROT_DRLineCrossDBProtein
#define ERR_DATACLASS_UnKnownClass
#define ERR_FORMAT_ExcessGeneFields
#define ERR_FORMAT_MissingRecName
#define ERR_FEATURE_DuplicateRemoved
list< CRef< objects::CSeq_entry > > TEntryList
DataBlk::TList TDataBlkList
TDataBlkList::const_iterator DataBlkCIter
int fta_atoi(string_view sv)
bool fta_StartsWith(const char *s1, string_view s2)
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
void StringNCpy(char *d, const char *s, size_t n)
size_t StringLen(const char *s)
void StringCat(char *d, const char *s)
char * StringRChr(char *s, const char c)
char * StringNew(size_t sz)
void FtaInstallPrefix(int prefix, string_view name, string_view location)
void FtaDeletePrefix(int prefix)
#define FtaErrPost(sev, level,...)
void fta_find_pub_explore(ParserPtr pp, TEntryList &seq_entries)
CRef< COrg_ref > fta_fix_orgref_byid(ParserPtr pp, TTaxId taxid, bool *drop, bool isoh)
void fta_fix_orgref(ParserPtr pp, COrg_ref &org_ref, bool *drop, char *organelle)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
static const char location[]
#define GI_FROM(T, value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define TAX_ID_TO(T, tax_id)
SStrictId_Tax::TId TTaxId
Taxon id type.
#define TAX_ID_FROM(T, value)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
char Char
Alias for char.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static const size_type npos
@ fSplit_Truncate_End
Truncate trailing delimiters.
@ eTrunc_End
Truncate trailing whitespace only.
@ eNocase
Case insensitive compare.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
const TOrg & GetOrg(void) const
Get the Org member data.
void SetGenome(TGenome value)
Assign a value to Genome data member.
void SetOrg(TOrg &value)
Assign a value to Org data member.
EGenome
biological context
TSyn & SetSyn(void)
Assign a value to Syn data member.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
void SetLocus(const TLocus &value)
Assign a value to Locus data member.
void SetLocus_tag(const TLocus_tag &value)
Assign a value to Locus_tag data member.
void SetMin(TMin value)
Assign a value to Min data member.
TRange & SetRange(void)
Select the variant.
TStd & SetStd(void)
Select the variant.
void SetMax(TMax value)
Assign a value to Max data member.
TLim & SetLim(void)
Select the variant.
E_Choice Which(void) const
Which variant is currently selected.
TStr & SetStr(void)
Select the variant.
@ e_not_set
No variant selected.
void SetCommon(const TCommon &value)
Assign a value to Common data member.
bool IsSetCommon(void) const
common name Check if a value has been assigned to Common data member.
void ResetSyn(void)
Reset Syn data member.
TSyn & SetSyn(void)
Assign a value to Syn data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TSyn & GetSyn(void) const
Get the Syn member data.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
TMod & SetMod(void)
Assign a value to Mod data member.
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
bool IsSetSyn(void) const
synonyms for taxname or common Check if a value has been assigned to Syn data member.
@ eSubtype_nat_host
natural host of this specimen
void SetQual(const TQual &value)
Assign a value to Qual data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
const TRegion & GetRegion(void) const
Get the variant data.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetPartial(TPartial value)
Assign a value to Partial data member.
const TLocation & GetLocation(void) const
Get the Location member data.
virtual void Reset(void)
Reset the whole object.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
TQual & SetQual(void)
Assign a value to Qual data member.
void SetTo(TTo value)
Assign a value to To data member.
TGeneral & SetGeneral(void)
Select the variant.
TChain GetChain(void) const
Get the Chain member data.
void SetPoint(TPoint value)
Assign a value to Point data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetA(TA &value)
Assign a value to A data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
TPir & SetPir(void)
Select the variant.
void SetFrom(TFrom value)
Assign a value to From data member.
TGi & SetGi(void)
Select the variant.
const TMol & GetMol(void) const
Get the Mol member data.
E_Choice
Choice variants.
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
void SetB(TB &value)
Assign a value to B data member.
TSwissprot & SetSwissprot(void)
Select the variant.
void SetVersion(TVersion value)
Assign a value to Version data member.
TPdb & SetPdb(void)
Select the variant.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_not_set
No variant selected.
TSeq & SetSeq(void)
Select the variant.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
void SetData(TData &value)
Assign a value to Data data member.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
const TInst & GetInst(void) const
Get the Inst member data.
TTitle & SetTitle(void)
Select the variant.
TPub & SetPub(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TId & GetId(void) const
Get the Id member data.
TSp & SetSp(void)
Select the variant.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_id > > TId
void SetReplaces(TReplaces &value)
Assign a value to Replaces data member.
list< CRef< CSeq_id > > TIds
TComment & SetComment(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
TSource & SetSource(void)
Select the variant.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
list< CRef< CSeq_feat > > TFtable
Tdata & Set(void)
Assign a value to data member.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
TMolinfo & SetMolinfo(void)
Select the variant.
TCreate_date & SetCreate_date(void)
Select the variant.
TUpdate_date & SetUpdate_date(void)
Select the variant.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eCompleteness_complete
complete biological entity
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Iupacaa
IUPAC 1 letter amino acid code.
void ResetDbref(void)
Reset Dbref data member.
list< CRef< CSeq_id > > TSeqref
void ResetSeqref(void)
Reset Seqref data member.
TDbref & SetDbref(void)
Assign a value to Dbref data member.
const TSeqref & GetSeqref(void) const
Get the Seqref member data.
TSeqref & SetSeqref(void)
Assign a value to Seqref data member.
@ eClass_standard
conforms to all SWISSPROT checks
@ eClass_prelim
only seq and biblio checked
char * dbname(DBPROCESS *dbproc)
Get name of current database.
CSeq_id::E_Choice GetProtAccOwner(string_view acc)
bool IsSPROTAccession(const char *acc)
CSeq_id::E_Choice GetNucAccOwner(string_view acc)
CRef< CDate_std > GetUpdateDate(string_view str, Parser::ESource source)
bool GetSeqLocation(CSeq_feat &feat, string_view location, const CSeq_id &seqid, bool *hard_err, ParserPtr pp, string_view name)
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
#define Seq_descr_GIBB_mod_plasmid
CRef< CPubdesc > DescrRefs(ParserPtr pp, DataBlk &dbp, Uint2 col_data)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static void SpPrepareEntry(ParserPtr pp, const DataBlk &entry, unsigned char *protconv)
forward_list< EmblAcc > TEmblAccList
static void GetSPDescrComment(const DataBlk &entry, CSeq_descr::Tdata &descrs, char *acc, Uint1 cla)
static CBioSource::EGenome GetSPGenomeFrom_OS_OG(const TDataBlkList &dbl)
static void CkInitMetSP(ParserPtr pp, const SPFeatInputList &spfil, CSeq_entry &seq_entry, SPFeatBlnPtr spfbp)
static bool AddToList(forward_list< string > &L, string_view str)
static void CheckSPDupPDBXrefs(CSP_block::TSeqref &refs)
static void fix_taxname_dot(COrg_ref &org_ref)
static void GetSprotSubBlock(ParserPtr pp, const DataBlk &entry)
static void fta_check_embl_drxref_dups(const TEmblAccList &embl_acc_list)
static TTaxId GetTaxIdFrom_OX(DataBlkCIter dbp, DataBlkCIter dbp_end)
static bool fta_spfeats_same(SPFeatInput &fi1, SPFeatInput &fi2)
static void SPGetOneGeneRefNew(ParserPtr pp, CSeq_annot::C_Data::TFtable &feats, size_t seqlen, char *name, char *syns, char *ltags, char *orfs)
forward_list< SPSegLoc > SPSegLocList
static void SPParseDefinition(char *str, const CBioseq::TId &ids, IndexblkPtr ibp, CProt_ref &prot)
static void GetOneGeneRef(ParserPtr pp, CSeq_annot::C_Data::TFtable &feats, string_view bptr, size_t seqlen)
static CRef< CSP_block > GetDescrSPBlock(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
static SPFeatInputList ParseSPFeat(const DataBlk &entry, size_t seqlen)
static Int4 GetSeqLen(const DataBlk &entry)
const char * ParFlat_SPComTopics[]
static void SPCollectProtNames(const SPDEFieldList &sfl, SPDEFieldList::const_iterator sfp, CProt_ref &prot, Int4 tag)
static bool IfOHTaxIdMatchOHName(const char *orpname, const char *ohname)
static string GetSPDescrTitle(string_view sv, bool *fragment)
static char * GetDRToken(char **ptr)
static void SPPostProcVarSeq(string &varseq)
static void GetSPAnnot(ParserPtr pp, const DataBlk &entry, unsigned char *protconv)
static SPSegLocList GetSPSegLocInfo(CBioseq &bioseq, const SPFeatInputList &spfil, SPFeatBlnPtr spfbp)
static void MakePDBSeqId(CSP_block_Base::TSeqref &refs, const char *mol, const char *rel, char *chain, bool *drop, Parser::ESource source)
#define ParFlatSPSitesModE
static void GetDRlineDataSP(const DataBlk &entry, CSP_block &spb, bool *drop, Parser::ESource source)
static void SeqToDeltaSP(CBioseq &bioseq, const SPSegLocList &spsll)
static void GetIntFuzzPtr(Uint1 choice, Int4 a, Int4 b, CInt_fuzz &fuzz)
forward_list< SPDEField > SPDEFieldList
static CRef< CSeq_loc > GetSeqLocIntSP(size_t seqlen, char *acnum, bool accver, Int2 vernum)
static void SPGetPEValue(const DataBlk &entry, CSeq_feat &feat)
static void SPValidateEcnum(string &ecnum)
Int2 SpFeatKeyNameValid(const Char *keystr)
static ViralHostList GetViralHostsFrom_OH(DataBlkCIter dbp, DataBlkCIter dbp_end)
static CRef< CDbtag > MakeStrDbtag(const char *dbname, const char *str)
static void MakeChainPDBSeqId(CSP_block_Base::TSeqref &refs, const char *mol, char *chain)
static void fta_create_pdb_seq_id(CSP_block_Base::TSeqref &refs, const char *mol, Uint1 chain)
static void GetSPInst(ParserPtr pp, const DataBlk &entry, unsigned char *protconv)
static void SPValidateDefinition(const SPDEFieldList &sfl, bool *drop, bool is_trembl)
static void SPGetGeneRefsNew(ParserPtr pp, CSeq_annot::C_Data::TFtable &feats, string_view bptr, size_t seqlen)
static void SPFeatGeneral(ParserPtr pp, SPFeatInputList &spfil, bool initmet, CSeq_annot::C_Data::TFtable &feats)
static CRef< CSeq_loc > GetSPSeqLoc(ParserPtr pp, const SPFeatInput &spfip, bool bond, bool initmet, bool signal)
static void SetOfSpeciesFree(SetOfSpeciesPtr sosp)
static void CkGeneNameSP(string &gname)
static bool GetSPDate(Parser::ESource source, const DataBlk &entry, CDate &crdate, CDate &sequpd, CDate &annotupd, short *ver_num)
static char * GetLineOSorOC(const DataBlk &dbp, const char *pattern)
static CRef< COrg_ref > fill_orgref(SetOfSpeciesPtr sosp)
static void SPParseGeneRefTag(char *str, CGene_ref &gene, bool set_locus_tag)
const CharIntLen spde_tags[]
static void ParseGeneNameSP(string_view str, CSeq_feat &feat)
forward_list< ViralHost > ViralHostList
forward_list< SPFeatInput > SPFeatInputList
static CRef< CDate > MakeDatePtr(const char *str, Parser::ESource source)
#define ParFlatSPSitesModB
static void CkNonTerSP(ParserPtr pp, const SPFeatInputList &spfil, CSeq_entry &seq_entry, SPFeatBlnPtr spfbp)
const char * SP_organelle[]
static void get_plasmid(const DataBlk &entry, CSP_block::TPlasnm &plasms)
static SetOfSpeciesPtr GetSetOfSpecies(char *line)
static CRef< CSeq_id > AddPIDToSeqId(char *str, char *acc)
static void SPAppendPIRToHist(CBioseq &bioseq, const CSP_block &spb)
static void SPFreeGenRefTokens(char *name, char *syns, char *ltags, char *orfs)
static void SPFeatProtRef(ParserPtr pp, CSeq_annot::C_Data::TFtable &feats, const DataBlk &entry, SPFeatBlnPtr spfbp)
static void SPFeatGeneRef(ParserPtr pp, CSeq_annot::C_Data::TFtable &feats, const DataBlk &entry)
CRef< CSeq_feat > SpProcFeatBlk(ParserPtr pp, FeatBlkPtr fbp, const CSeq_id &seqid)
static Int2 GetSPSitesMod(string &retstr)
static void ParseSpComment(CSeq_descr::Tdata &descrs, char *line)
static void GetSprotDescr(CBioseq &bioseq, ParserPtr pp, const DataBlk &entry)
static CRef< COrg_ref > GetOrganismFrom_OS_OC(DataBlkCIter entry, DataBlkCIter end)
const char * obsolete_dbs[]
static void fta_remove_dup_spfeats(SPFeatInputList &spfil)
static void SpAddToIndexBlk(const DataBlk &entry, IndexblkPtr pIndex)
SPFeatType ParFlat_SPFeat[]
static void StringCombine(string &dest, const string &to_add, const Char *delim)
static void DelParenthesis(string &str)
#define ParFlat_COL_DATA_SP
EmblAcc(CSeq_id::E_Choice c, string_view a, string_view p)
CRef< objects::CSeq_entry > seq_entry
optional< string > location
vector< IndexblkPtr > entrylist
SPDEField(Int4 t, const char *s)
forward_list< string > syn
string CpTheQualValue(const TQualVector &qlist, const Char *qual)
bool SeqLocHaveFuzz(const CSeq_loc &loc)
char * SrchTheChar(string_view sv, Char letter)
void PointToNextToken(char *&ptr)
string GetBlkDataReplaceNewLine(string_view instr, Uint2 indent)
Int2 StringMatchIcase(const Char **array, string_view text)
CRef< CDate_std > get_full_date(string_view date_view, bool is_ref, Parser::ESource source)
bool SrchNodeType(const DataBlk &entry, Int4 type, size_t *plen, char **pptr)
TDataBlkList & TrackNodes(const DataBlk &entry)
Int2 fta_StringMatch(const Char **array, string_view text)
void fta_StringCpy(char *dst, const char *src)
DataBlk * TrackNodeType(const DataBlk &entry, Int2 type)
Int2 MatchArrayIString(const Char **array, string_view text)
Char * StringIStr(const Char *where, const Char *what)
void CleanTailNonAlphaChar(string &str)
static wxAcceleratorEntry entries[3]
int XDateCheck(const CDate_std &date)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4