{
"arginine",
'R'},
129{
"asparagine",
'N'},
130{
"aspartic acid",
'D'},
131{
"aspartate",
'D'},
132{
"cysteine",
'C'},
133{
"glutamine",
'Q'},
134{
"glutamic acid",
'E'},
135{
"glutamate",
'E'},
137{
"histidine",
'H'},
138{
"isoleucine",
'I'},
141{
"methionine",
'M'},
142{
"phenylalanine",
'F'},
144{
"selenocysteine",
'U'},
146{
"threonine",
'T'},
147{
"tryptophan",
'W'},
148{
"tyrosine",
'Y'},
161{
"Ala",
'A', 0, { 52, 53, 54, 55, -1, -1, -1, -1 } },
162{
"Arg",
'R', 2, { 28, 29, 30, 31, -1, -1, -1, -1 } },
163{
"Arg",
'R', 5, { 28, 29, 30, 31, -1, -1, -1, -1 } },
164{
"Arg",
'R', 9, { 28, 29, 30, 31, -1, -1, -1, -1 } },
165{
"Arg",
'R', 13, { 28, 29, 30, 31, -1, -1, -1, -1 } },
166{
"Arg",
'R', 14, { 28, 29, 30, 31, -1, -1, -1, -1 } },
167{
"Arg",
'R', 0, { 28, 29, 30, 31, 46, 47, -1, -1 } },
168{
"Asn",
'N', 9, { 40, 41, 42, -1, -1, -1, -1, -1 } },
169{
"Asn",
'N', 14, { 40, 41, 42, -1, -1, -1, -1, -1 } },
170{
"Asn",
'N', 0, { 40, 41, -1, -1, -1, -1, -1, -1 } },
171{
"Asp",
'D', 0, { 56, 57, -1, -1, -1, -1, -1, -1 } },
172{
"Asx",
'B', 9, { 40, 41, 42, 56, 57, -1, -1, -1 } },
173{
"Asx",
'B', 14, { 40, 41, 42, 56, 57, -1, -1, -1 } },
174{
"Asx",
'B', 0, { 40, 41, 56, 57, -1, -1, -1, -1 } },
175{
"Cys",
'C', 10, { 12, 13, 14, -1, -1, -1, -1, -1 } },
176{
"Cys",
'C', 0, { 12, 13, -1, -1, -1, -1, -1, -1 } },
177{
"Gln",
'Q', 6, { 10, 11, 26, 27, -1, -1, -1, -1 } },
178{
"Gln",
'Q', 15, { 11, 26, 27, -1, -1, -1, -1, -1 } },
179{
"Gln",
'Q', 0, { 26, 27, -1, -1, -1, -1, -1, -1 } },
180{
"Glu",
'E', 0, { 58, 59, -1, -1, -1, -1, -1, -1 } },
181{
"Glx",
'Z', 6, { 10, 11, 26, 27, 58, 59, -1, -1 } },
182{
"Glx",
'Z', 0, { 11, 26, 27, 58, 59, -1, -1, -1 } },
183{
"Glx",
'Z', 0, { 26, 27, 58, 59, -1, -1, -1, -1 } },
184{
"Gly",
'G', 13, { 46, 47, 60, 61, 62, 63, -1, -1 } },
185{
"Gly",
'G', 0, { 60, 61, 62, 63, -1, -1, -1, -1 } },
186{
"His",
'H', 0, { 24, 25, -1, -1, -1, -1, -1, -1 } },
187{
"Ile",
'I', 2, { 32, 33, -1, -1, -1, -1, -1, -1 } },
188{
"Ile",
'I', 3, { 32, 33, -1, -1, -1, -1, -1, -1 } },
189{
"Ile",
'I', 5, { 32, 33, -1, -1, -1, -1, -1, -1 } },
190{
"Ile",
'I', 13, { 32, 33, -1, -1, -1, -1, -1, -1 } },
191{
"Ile",
'I', 0, { 32, 33, 34, -1, -1, -1, -1, -1 } },
192{
"Leu",
'L', 3, { 2, 3, -1, -1, -1, -1, -1, -1 } },
193{
"Leu",
'L', 12, { 2, 3, 16, 17, 18, -1, -1, -1 } },
194{
"Leu",
'L', 0, { 2, 3, 16, 17, 18, 19, -1, -1 } },
195{
"Lys",
'K', 9, { 43, -1, -1, -1, -1, -1, -1, -1 } },
196{
"Lys",
'K', 14, { 43, -1, -1, -1, -1, -1, -1, -1 } },
197{
"Lys",
'K', 0, { 42, 43, -1, -1, -1, -1, -1, -1 } },
198{
"Met",
'M', 2, { 34, 35, -1, -1, -1, -1, -1, -1 } },
199{
"Met",
'M', 3, { 34, 35, -1, -1, -1, -1, -1, -1 } },
200{
"Met",
'M', 5, { 34, 35, -1, -1, -1, -1, -1, -1 } },
201{
"Met",
'M', 13, { 34, 35, -1, -1, -1, -1, -1, -1 } },
202{
"Met",
'M', 0, { 35, -1, -1, -1, -1, -1, -1, -1 } },
203{
"fMet",
'M', 2, { 34, 35, -1, -1, -1, -1, -1, -1 } },
204{
"fMet",
'M', 3, { 34, 35, -1, -1, -1, -1, -1, -1 } },
205{
"fMet",
'M', 5, { 34, 35, -1, -1, -1, -1, -1, -1 } },
206{
"fMet",
'M', 13, { 34, 35, -1, -1, -1, -1, -1, -1 } },
207{
"fMet",
'M', 0, { 35, -1, -1, -1, -1, -1, -1, -1 } },
208{
"Phe",
'F', 0, { 0, 1, -1, -1, -1, -1, -1, -1 } },
209{
"Pro",
'P', 0, { 20, 21, 22, 23, -1, -1, -1, -1 } },
210{
"Sec",
'U', 0, { -1, -1, -1, -1, -1, -1, -1, -1 } },
211{
"Ser",
'S', 5, { 4, 5, 6, 7, 44, 45, 46, 47 } },
212{
"Ser",
'S', 9, { 4, 5, 6, 7, 44, 45, 46, 47 } },
213{
"Ser",
'S', 12, { 4, 5, 6, 7, 19, 44, 45, -1 } },
214{
"Ser",
'S', 14, { 4, 5, 6, 7, 44, 45, 46, 47 } },
215{
"Ser",
'S', 0, { 4, 5, 6, 7, 44, 45, -1, -1 } },
216{
"Thr",
'T', 3, { 16, 17, 18, 19, 36, 37, 38, 39 } },
217{
"Thr",
'T', 0, { 36, 37, 38, 39, -1, -1, -1, -1 } },
218{
"Trp",
'W', 1, { 15, -1, -1, -1, -1, -1, -1, -1 } },
219{
"Trp",
'W', 6, { 15, -1, -1, -1, -1, -1, -1, -1 } },
220{
"Trp",
'W', 10, { 15, -1, -1, -1, -1, -1, -1, -1 } },
221{
"Trp",
'W', 11, { 15, -1, -1, -1, -1, -1, -1, -1 } },
222{
"Trp",
'W', 12, { 15, -1, -1, -1, -1, -1, -1, -1 } },
223{
"Trp",
'W', 15, { 15, -1, -1, -1, -1, -1, -1, -1 } },
224{
"Trp",
'W', 0, { 14, 15, -1, -1, -1, -1, -1, -1 } },
225{
"Tyr",
'Y', 14, { 8, 9, 10, -1, -1, -1, -1, -1 } },
226{
"Tyr",
'Y', 0, { 8, 9, -1, -1, -1, -1, -1, -1 } },
227{
"Val",
'V', 0, { 48, 49, 50, 51, -1, -1, -1, -1 } },
228{
"TERM",
'*', 1, { 10, 11, 14, -1, -1, -1, -1, -1 } },
229{
"TERM",
'*', 2, { 10, 11, 46, 47, -1, -1, -1, -1 } },
230{
"TERM",
'*', 6, { 14, -1, -1, -1, -1, -1, -1, -1 } },
231{
"TERM",
'*', 11, { 10, 11, 14, -1, -1, -1, -1, -1 } },
232{
"TERM",
'*', 12, { 10, 11, 14, -1, -1, -1, -1, -1 } },
233{
"TERM",
'*', 14, { 11, -1, -1, -1, -1, -1, -1, -1 } },
234{
"TERM",
'*', 15, { 10, 14, -1, -1, -1, -1, -1, -1 } },
235{
"TERM",
'*', 0, { 10, 11, -1, -1, -1, -1, -1, -1 } },
236{
"OTHER",
'X', 0, { -1, -1, -1, -1, -1, -1, -1, -1 } },
237{
nullptr,
'\0', 0, { -1, -1, -1, -1, -1, -1, -1, -1 } }
256 "expressed sequence tag",
257 "partial cDNA sequence",
258 "transcribed sequence fragment",
260 "putatively transcribed partial sequence",
305 "ENSEMBLGENOMES-GN",
306 "ENSEMBLGENOMES-TR",
317 "ACEVIEW/WORMGENES",
387 "UNIPROT/SWISS-PROT",
389 "UNIPROTKB/SWISS-PROT",
430 "artificial_location",
435 "environmental_sample",
446 "ribosomal_slippage",
467 "autocatalytically_spliced_intron",
468 "hammerhead_ribozyme",
509 "DNase_I_hypersensitive_site",
511 "enhancer_blocking_element",
513 "imprinting_control_region",
515 "locus_control_region",
516 "matrix_attachment_region",
520 "polyA_signal_sequence",
522 "recoding_stimulatory_region",
523 "replication_regulatory_region",
524 "ribosome_binding_site",
529 "transcriptional_cis_regulatory_region",
538 "insertion sequence",
539 "non-LTR retrotransposon",
579 returnfbp->
key.empty() ? sv : fbp->
key;
593 returnstd::get<FeatBlk*>(
mData);
602 for(
auto& dbp : dbl) {
619 for(
charc :
value)
642 #ifdef BIOSEQ_FIND_METHOD 644bsp = BioseqFind(sip);
646 return(bsp->length);
655 if(
id.IsGenbank() ||
id.IsEmbl() ||
id.IsDdbj() ||
id.IsTpg() ||
656 id.IsTpe() ||
id.IsTpd())
657text_id =
id.GetTextseq_Id();
662 for(use_indx = 0; use_indx < pp->
indx; use_indx++) {
664 if(text_id_acc == e->acnum &&
665(pp->
accver==
false|| e->vernum == text_id_ver))
669 if(use_indx >= pp->
indx) {
674 if(
len!=
static_cast<size_t>(-1))
675 return static_cast<Int4>(
len);
680 if(pp->
accver==
false|| text_id_ver < 0)
681 msg=
format(
"Location points to outside entry {}", text_id_acc);
683 msg=
format(
"Location points to outside entry {}.{}", text_id_acc, text_id_ver);
688 if(! pp->
buf->empty()) {
689 string msg=
format(
"Feature location references an interval on another record : {}", *pp->
buf);
723qual.
GetQual() !=
"db_xref")
738line = line.substr(4);
740 size_tcolon = line.find(
':');
741 if(colon == string::npos) {
743 "Badly formatted /db_xref qualifier: \"{}\". Qualifier dropped.",
val);
747 stringtail = line.substr(colon + 1);
748line = line.substr(0, colon);
752 "/db_xref type \"{}\" is obsolete.", line);
764 buf=
"UniProt/Swiss-Prot";
766 buf=
"UniProt/TrEMBL";
773 string buf(
"UniProtKB");
774 buf+= line.substr(7);
779 const Char* strid =
nullptr;
782 const Char* p = tail.c_str();
784 for(strid = p; *p >=
'0'&& *p <=
'9';)
786 if(*p ==
'\0'&& *strid !=
'0') {
793 for(strid = p; *p >=
'0'&& *p <=
'9';)
797 "/db_xref qualifier \"{}\" is supposed to be a string, but its value consists of digits only.",
val);
798 if(*strid !=
'0') {
809 "/db_xref qual should have numeric value greater than 0: \"{}\". Qualifier dropped.",
val);
814 for(; *
r>=
'0'&& *
r<=
'9';)
818 "/db_xref qualifier \"{}\" is supposed to be a numeric identifier, but its value includes alphabetic characters. Qualifier dropped.",
val);
821 if(*
r!=
'\0'|| q != p)
828 if(*p !=
'e'&& *p !=
'g'&& *p !=
'd') {
830 "Badly formatted /db_xref qual \"PID\": \"{}\". Qualifier dropped.",
val);
834 const Char* q = p + 1;
839 for(
r= q; *
r>=
'0'&& *
r<=
'9';)
841 if(*q ==
'\0'|| *
r!=
'\0') {
843 "/db_xref qual \"PID\" should contain numeric value greater than 0: \"{}\". Qualifier dropped.",
val);
849 "Unknown data base name /db_xref = \"{}\". Qualifier dropped.",
val);
859 tag->SetTag().SetStr(strid);
861 tag->SetTag().SetId(intid);
892 for(CSeq_feat::TQual::iterator qual = feat.
SetQual().begin(); qual != feat.
SetQual().end();) {
893 if(! (*qual)->IsSetQual() || (*qual)->GetQual() !=
"db_xref") {
904db_refs.push_back(dbtag);
909qual = feat.
SetQual().erase(qual);
955 if(cur_loc->
IsInt())
984 returnmake_unique<string>(
str);
986 for(ptr =
str; *ptr !=
' '&& *ptr !=
'\0';)
995 while(*eptr ==
' '|| *eptr ==
')')
999 returnmake_unique<string>(ptr, eptr);
1071 const CSeq_id* cur_id =
nullptr;
1073 switch(cur_loc->
Which()) {
1077cur_id = cur_loc->
GetId();
1083cur_id = cur_loc->
GetId();
1089cur_id = cur_loc->
GetId();
1097cur_id = cur_loc->
GetId();
1102cur_id = cur_loc->
GetId();
1112 if(! accession || ! cur_id)
1129 else if(strand != cur_loc->
GetStrand())
1147 while((q =
str.find(
"aa")) != string::npos) {
1173dbp = temp_xml_chain.begin();
1174dbp_end = temp_xml_chain.end();
1177dbp = chain.begin();
1178dbp_end = chain.end();
1183 for(; dbp != dbp_end; ++dbp) {
1184 auto& ref_blk = *dbp;
1189 if(pubdesc.
Empty())
1193feat->
SetData().SetPub(*pubdesc);
1195unique_ptr<string> ploc;
1201 auto i= q->find(
'(');
1202 if(
i!= string::npos)
1206 if(ploc->find(
';') != string::npos) {
1207 strings =
"join(";
1214 for(p = ref_blk.mBuf.ptr + col_data; *p !=
'\0'&& *p !=
'(';)
1216ploc =
CheckLocStr(
string(p, ref_blk.mBuf.ptr + ref_blk.mBuf.len - p).c_str());
1218 for(
const auto& subdbp : ref_blk.GetSubBlocks()) {
1222 for(p = subdbp.mBuf.ptr; *p !=
'\0'&&
isdigit(*p) == 0;)
1225 strings =
"join(";
1228ploc = make_unique<string>(s);
1230ploc = make_unique<string>(p);
1234 if(! ploc || ploc->empty()) {
1258feats.push_back(feat);
1263 "Mixed strands in SeqLoc: {}",
location);
1265feats.push_back(feat);
1295dbp = temp_xml_chain.begin();
1296dbp_end = temp_xml_chain.end();
1299dbp = chain.begin();
1300dbp_end = chain.end();
1306 for(; dbp != dbp_end; ++dbp) {
1307 auto& ref_blk = *dbp;
1312 if(pubdesc.
Empty() || ! pubdesc->IsSetPub())
1319imp_feat.
SetKey(
"Site-ref");
1320imp_feat.
SetLoc(
"sites");
1327pub->
SetEquiv(pubdesc->SetPub());
1329feat->
SetCit().SetPub().push_back(pub);
1331 if(pubdesc->IsSetComment())
1338feats.push_back(feat);
1353 return "unknown location";
1375 for(q = p + 4; *q ==
' ';)
1378 for(pars = 0, p = q; *p !=
'\0'; p++) {
1379 if(*p ==
','&& pars == 0)
1383 else if(*p ==
')') {
1393 const char* loc_str = loc_.c_str();
1399 if(ret.
Empty()) {
1402 "Invalid position element for an /anticodon qualifier : \"{}\" : qualifier dropped : feature location \"{}\".", loc_str, loc.empty() ?
"unknown"s : loc);
1411 "tRNA feature at \"{}\" has anticodon with location spanning four bases: \"{}\". Cannot generate corresponding codon value from the DNA sequence.", loc.empty() ?
"unknown"s : loc, loc_str);
1414 "tRNA feature at \"{}\" has anticodon of an unusual size: \"{}\". Cannot generate corresponding codon value from the DNA sequence.", loc.empty() ?
"unknown"s : loc, loc_str);
1423 if(xrange != anticodon_range) {
1426 "Anticodon location \"{}\" does not fall within tRNA feature at \"{}\".", loc_str, loc.empty() ?
"unknown"s : loc);
1454 len= comment.size();
1456 if(
len> 15 &&
len< 20) {
1457 if(
StringEquNI(comment.c_str() +
len- 15,
"S ribosomal RNA", 15)) {
1461}
else if(
len> 6 &&
len< 20) {
1469 if(qval_str.empty())
1473 for(p = qval; p; p += 13) {
1480 for(p = qval; p; p = qval +
len) {
1488 len= p - qval + 13;
1498s.append(
" ribosomal RNA");
1505 for(p = qval, q = p; q; q = p + 13) {
1518 if(p && p > qval && p[15] ==
'\0') {
1520 if(*p >=
'0'&& *p <=
'9')
1528 if(p == qval || (p[9] !=
' '&& p[9] !=
'\0')) {
1536 len= p - qval + 14;
1567rna_ref.
SetExt().SetName(qval);
1577 if(acp->
intaa== ch)
1580 return(acp->
intaa);
1591 for(tap =
taa; tap->
name; tap++)
1601 return(acp->
intaa);
1637 if(product.length() < 7)
1640 booldigits =
false;
1642 for(p = prod; *p !=
'\0'; p++) {
1643 if(*p >=
'a'&& *p <=
'z')
1645 else if((*p < 'A' || *p >
'Z') && *p !=
'('&& *p !=
')') {
1646 if(*p >=
'0'&& *p <=
'9')
1664 for(p = end; *p !=
'\0'; p++)
1665 if(*p ==
'('|| *p ==
')')
1669 if(start == prod && *end ==
'\0') {
1680 for(p = end; *p ==
' '|| *p ==
')'|| *p ==
'(';)
1687 while(*p >=
'A'&& *p <=
'Z')
1694 while(*p ==
' '|| *p ==
')'|| *p ==
'(')
1696 for(q = p; *p >=
'A'&& *p <=
'Z';)
1701 if(q[1] ==
'\0') {
1702 while(*p ==
' '|| *p ==
')'|| *p ==
'(')
1704 for(q = p; *p >=
'A'&& *p <=
'Z';)
1714 while(*p ==
' '|| *p ==
'('|| *p ==
')')
1720 for(p = start - 1; *p ==
' '|| *p ==
')'|| *p ==
'('; p--)
1724 if(p > prod && p[1] ==
')') {
1725 for(p--; *p !=
'('; p--)
1729 for(p--; *p ==
' '|| *p ==
'('|| *p ==
'('; p--)
1735 for(q = p++; *q >=
'A'&& *q <=
'Z'; q--)
1738 if(*q < 'A' || *q >
'Z')
1767 if(!
first&& ! second && ! third && ! fourth &&
remove&& ! digits)
1778comment +=
"; fMet";
1794 if(comment.empty())
1798 for(p = comm; *p !=
'\0'; p++) {
1799 if(*p >=
'a'&& *p <=
'z')
1801 else if(*p < 'A' || *p >
'Z')
1806 if(
StringEquN(comm,
"CODON RECOGNIZED ", 17)) {
1809 if(q &&
StringEqu(q + 1,
"PUTATIVE"))
1818 if(
StringEquN(comm,
"PUTATIVE ", 9) && comm[10] ==
' '&&
1819comm[14] ==
' '&&
StringEquN(&comm[15],
"TRNA", 4)) {
1827 for(q = comm, p = q; p;) {
1855optional<string> qval;
1876feat.
SetData().SetRna(*rna_ref);
1890rna_gen->SetClass(*p);
1896rna_qual->
SetQual(
"tag_peptide");
1900rna_quals->
Set().push_back(rna_qual);
1903rna_gen->SetQuals(*rna_quals);
1912 if(p && ! p->empty()) {
1929 const Char* c_q =
nullptr;
1930 for(;; c_p += 5, c_q = c_p) {
1936 const Char* c_r =
nullptr;
1937 for(c_p = feat.
GetComment().c_str();; c_p += 4, c_r = c_p) {
1944c_p = (c_q > c_r) ? c_q : c_r;
1951 while(*c_p ==
' '|| *c_p ==
'\t'|| *c_p ==
','|| *c_p ==
';')
1954 if(*c_p ==
'\0') {
1962 if(qval->length() > 511) {
1964qval->back() =
'>';
1968 if(rna_gen.
Empty())
1971rna_gen->SetProduct(*qval);
1973rna_ref->
SetExt().SetName(*qval);
1983rna_ref->
SetExt().SetGen(*rna_gen);
2004trnaa->SetAnticodon(*anticodon);
2005rna_ref->
SetExt().SetTRNA(*trnaa);
2013 if(! qval2.empty()) {
2036 if(trnaa.
Empty()) {
2037 if(trnap.
Empty()) {
2039rna_ref->
SetExt().SetTRNA(*trnac);
2045rna_ref->
SetExt().SetTRNA(*trnap);
2049rna_ref->
SetExt().SetTRNA(*trnac);
2053trnap->SetCodon().assign(trnac->GetCodon().begin(), trnac->GetCodon().end());
2072trnac->SetAnticodon(trnaa->SetAnticodon());
2073trnaa->ResetAnticodon();
2076trnac->SetCodon().assign(trnaa->GetCodon().begin(), trnaa->GetCodon().end());
2079rna_ref->
SetExt().SetTRNA(*trnac);
2116feat.
SetData().SetImp(*imp_feat);
2123 for(COrg_ref::TDb::iterator db = bio.
SetOrg().SetDb().begin(); db != bio.
SetOrg().SetDb().end(); ++db) {
2124 if(! (*db)->CanGetDb())
2127COrg_ref::TDb::iterator tdb = db;
2128 for(++tdb; tdb != bio.
SetOrg().SetDb().end(); ++tdb) {
2129 if(! (*tdb)->IsSetDb())
2132 if((*db)->GetDb() < (*tdb)->GetDb())
2135 if((*db)->GetDb() == (*tdb)->GetDb()) {
2137 const CObject_id& tdb_id = (*tdb)->GetTag();
2146 if(! db_id.
IsStr() && ! tdb_id.
IsStr() &&
2157 for(COrgName::TMod::iterator
mod= rmod.begin();
mod!= rmod.end(); ++
mod) {
2158COrgName::TMod::iterator tmod =
mod;
2159 for(++tmod; tmod != rmod.end(); ++tmod) {
2160 if((*mod)->GetSubtype() < (*tmod)->GetSubtype())
2163 if((*mod)->GetSubtype() == (*tmod)->GetSubtype() &&
2164(*mod)->GetSubname() <= (*tmod)->GetSubname())
2177 for(CBioSource::TSubtype::iterator sub = rsub.begin(); sub != rsub.end(); ++sub) {
2178CBioSource::TSubtype::iterator tsub = sub;
2179 for(++tsub; tsub != rsub.end(); ++tsub) {
2180 if((*sub)->GetSubtype() < (*tsub)->GetSubtype())
2183 if((*sub)->GetSubtype() == (*tsub)->GetSubtype() &&
2184(*sub)->GetName() <= (*tsub)->GetName())
2196 boolhas_comma =
val.find(
',') != string::npos;
2199std::replace(
val.begin(),
val.end(),
',',
';');
2210 if(! fbp || fbp->
quals.empty())
2213TQualVector::iterator
first= fbp->
quals.end();
2216 for(TQualVector::iterator qual = fbp->
quals.begin(); qual != fbp->
quals.end();) {
2217 if((*qual)->GetQual() !=
"rpt_unit") {
2223 "Obsolete /rpt_unit qualifier found on feature \"{}\" at location \"{}\".",
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown"));
2225 if((*qual)->GetVal().empty()) {
2226qual = fbp->
quals.erase(qual);
2231 len+= (*qual)->GetVal().size();
2251 const string&
val= (*first)->GetVal();
2252 if(*
val.begin() ==
'('&& *
val.rbegin() ==
')') {
2261p.append((*first)->GetVal());
2263 for(TQualVector::iterator qual =
first; qual != fbp->
quals.end();) {
2264 if((*qual)->GetQual() !=
"rpt_unit") {
2270p.append((*qual)->GetVal());
2271qual = fbp->
quals.erase(qual);
2274(*first)->SetVal(p);
2287 if(! fbp || fbp->
quals.empty())
2297 for(TQualVector::iterator qual = fbp->
quals.begin(); qual != fbp->
quals.end();) {
2298 const string& qual_str = (*qual)->IsSetQual() ? (*qual)->GetQual() :
"";
2299 const string& val_str = (*qual)->IsSetVal() ? (*qual)->GetVal() :
"";
2300 if(qual_str ==
"experiment") {
2301 if(val_str ==
"experimental evidence, no additional details recorded") {
2303qual = fbp->
quals.erase(qual);
2311 if(qual_str ==
"inference") {
2312 if(val_str ==
"non-experimental evidence, no additional details recorded") {
2314qual = fbp->
quals.erase(qual);
2322 if(qual_str !=
"evidence") {
2333 "Illegal value \"{}\" for /evidence qualifier on the \"{}\" feature at \"{}\". Qualifier dropped.", val_str.empty() ?
"Unknown"s : val_str,
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown location"));
2336qual = fbp->
quals.erase(qual);
2339 if(evi_exp + evi_not > 0 && exp_good + exp_bad + inf_good + inf_bad > 0) {
2341 "Old /evidence and new /experiment or /inference qualifiers both exist on the \"{}\" feature at \"{}\". This is currently unsupported.",
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown location"));
2345 if(evi_exp + exp_good > 0 && evi_not + inf_good > 0) {
2347 "The special \"no additional details recorded\" values for both /experiment and /inference exist on the \"{}\" feature at \"{}\". This is currently unsupported.",
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown location"));
2351 if((exp_good > 0 && exp_bad > 0) || (inf_good > 0 && inf_bad > 0)) {
2353 "The special \"no additional details recorded\" value for /experiment or /inference exists in conjunction with other /experiment or /inference qualifiers on the \"{}\" feature at \"{}\". This is currently unsupported.",
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown location"));
2357 if(exp_good + evi_exp > 0)
2359 else if(inf_good + evi_not > 0)
2384 boollocmap =
false;
2392pp->
buf= fbp->
key+
" : "+ loc;
2398 if(pp->
debug==
false) {
2400 "{}|{}| range check detects problems", fbp->
key, loc);
2405 "{}|{}| range check detects problems", fbp->
key, loc);
2409 if(! fbp->
quals.empty()) {
2414 if(loc.find(
"order") != string_view::npos)
2417 if(! fbp->
quals.empty()) {
2422 if(! fbp->
quals.empty())
2425 if(! fbp->
quals.empty())
2428 if(! fbp->
quals.empty()) {
2430 if(fbp->
key== *
b)
2438exc_text +=
", trans-splicing";
2453 if(! fbp->
quals.empty()) {
2456 if(! comment->empty()) {
2464 if(fbp->
key.find(
"source") == string::npos)
2467 for(
const auto& cur : fbp->
quals) {
2468 const string& qual_str = cur->GetQual();
2469 if(qual_str ==
"pseudogene")
2473 if(qual_str ==
"translation"&& (! cur->IsSetVal() || cur->GetVal().empty()))
2476 if(! qual_str.empty())
2477feat->
SetQual().push_back(cur);
2499 for(TQualVector::iterator q = fbp->
quals.begin(); q != fbp->
quals.end(); ++q) {
2500 if((*q)->GetQual() ==
"gene"||
2501(! qamode && (*q)->GetQual() ==
"product"))
2504TQualVector::iterator tq = q;
2505 for(++tq; tq != fbp->
quals.end(); ++tq) {
2506 const string& q_qual = (*q)->GetQual();
2507 const string& tq_qual = (*tq)->GetQual();
2509 if(! tq_qual.empty()) {
2510 if(q_qual ==
"gene")
2519 const stringq_val = (*q)->GetVal();
2520 const stringtq_val = (*tq)->GetVal();
2525 if(! tq_val.empty()) {
2526 if(q_val[0] >=
'0'&& q_val[0] <=
'9'&&
2527tq_val[0] >=
'0'&& tq_val[0] <=
'9') {
2530}
else if(q_val <= tq_val)
2544 boolfound =
false;
2546 for(
const auto& gbqp1 : qual1) {
2548 for(
const auto& gbqp2 : qual2) {
2549 const Char* qual_a = gbqp1->IsSetQual() ? gbqp1->GetQual().c_str() :
nullptr;
2550 const Char* qual_b = gbqp2->IsSetQual() ? gbqp2->GetQual().c_str() :
nullptr;
2552 const Char* val_a = gbqp1->IsSetVal() ? gbqp1->GetVal().c_str() :
nullptr;
2553 const Char* val_b = gbqp2->IsSetVal() ? gbqp2->GetVal().c_str() :
nullptr;
2573 if(! fbp1 && ! fbp2)
2575 if(! fbp1 || ! fbp2 ||
2594 if(!
val|| *
val==
'\0')
2597 for(p =
val; *p >=
'0'&& *p <=
'9';)
2600 if(p ==
val|| p[0] !=
'.'|| p[1] !=
'.')
2604 for(p += 2, q = p; *q >=
'0'&& *q <=
'9';)
2606 if(q == p || *q !=
'\0')
2610 if(i1 == 0 || i1 > i2 || i2 > (
Int4)length)
2618 if(! fbp || fbp->
quals.empty())
2621 for(TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
2622 if(! (*cur)->IsSetQual() || ! (*cur)->IsSetVal()) {
2627 const string& qual_str = (*cur)->GetQual();
2628 const string& val_str = (*cur)->GetVal();
2636 if(_loc.size() > 20) {
2641 "/rpt_unit_range qualifier \"{}\" on feature \"{}\" at location \"{}\" is not a valid basepair range. Qualifier dropped.", val_str.empty() ?
"(EMPTY)"s : val_str,
key_or(fbp,
"Unknown"), _loc);
2643cur = fbp->
quals.erase(cur);
2650 autodbp = dbl.begin();
2651 if(dbp == dbl.end() ||
next(dbp) == dbl.end())
2654 for(; dbp != dbl.end(); ++dbp) {
2655 if(! dbp->hasData())
2658 const FeatBlk* fbp1 = dbp->GetFeatData();
2660 autotdbpprev = dbp;
2661 for(
autotdbp =
next(dbp); tdbp != dbl.end();) {
2662 if(! tdbp->hasData()) {
2663tdbp = dbl.erase_after(tdbpprev);
2667 const FeatBlk* fbp2 = tdbp->GetFeatData();
2680 if(_loc.size() > 20) {
2685 "Duplicated feature \"{}\" at location \"{}\" removed.",
key_or(fbp2,
"???"), _loc);
2688tdbp = dbl.erase_after(tdbpprev);
2711 for(
const auto& dbp : dbl) {
2712 const FeatBlk* fbp = dbp.GetFeatData();
2721 "Multiple /locus_tag values for \"{}\" feature at \"{}\".",
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown location"));
2731isLocusTag(
"locus_tag");
2733 for(
const auto& dbp : dbl) {
2734 const FeatBlk* fbp = dbp.GetFeatData();
2737 size_tolt = std::count_if(fbp->
quals.begin(), fbp->
quals.end(), isOldLocusTag);
2738 size_t lt= std::count_if(fbp->
quals.begin(), fbp->
quals.end(), isLocusTag);
2745 "Feature \"{}\" at \"{}\" has an /old_locus_tag qualifier but lacks a /locus_tag qualifier. Entry dropped.",
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown location"));
2748 for(
const auto& gbqp1 : fbp->
quals) {
2749 if(! gbqp1->IsSetQual() || ! gbqp1->IsSetVal() || ! isLocusTag(gbqp1))
2752 const string& gbqp1_val = gbqp1->GetVal();
2753 if(gbqp1_val.empty())
2756 for(
const auto& gbqp2 : fbp->
quals) {
2757 if(! gbqp2->IsSetQual() || ! gbqp2->IsSetVal())
2760 const string& gbqp2_val = gbqp2->GetVal();
2765 "Feature \"{}\" at \"{}\" has an /old_locus_tag qualifier with a value that is identical to that of a /locus_tag qualifier: \"{}\". Entry dropped.",
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown location"), gbqp1_val);
2774 for(TQualVector::const_iterator gbqp1 = fbp->
quals.begin(); gbqp1 != fbp->
quals.end(); ++gbqp1) {
2775 if(! (*gbqp1)->IsSetVal())
2777 const string& gbqp1_val = (*gbqp1)->GetVal();
2778 if(isOldLocusTag(*gbqp1) || gbqp1_val.empty())
2781TQualVector::const_iterator gbqp2 = gbqp1;
2782 for(++gbqp2; gbqp2 != fbp->
quals.end(); ++gbqp2) {
2783 const string& gbqp2_val = (*gbqp2)->GetVal();
2784 if(isOldLocusTag(*gbqp2) || gbqp2_val.empty())
2789 "Feature \"{}\" at \"{}\" has redundant /old_locus_tag qualifiers. Dropping all but the first.",
key_or(fbp,
"Unknown"),
location_or(fbp,
"unknown location"));
2794 if(gbqp2 != fbp->
quals.end())
2803 for(
auto& dbp : dbl) {
2808 boolgot_pseudo =
false;
2809 boolgot_pseudogene =
false;
2811 for(TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
2812 const string& qual_str = (*cur)->GetQual();
2813 const string& val_str = (*cur)->IsSetVal() ? (*cur)->GetVal() :
"";
2815 if(qual_str !=
"pseudogene") {
2816 if(! got_pseudo && qual_str ==
"pseudo")
2822 if(got_pseudogene) {
2824 "Dropping a /pseudogene qualifier because multiple /pseudogene qualifiers are present : <{}> : Feature key <{}> : Feature location <{}>.", val_str.empty() ?
"[empty]"s : val_str, fbp->
key, *fbp->
location);
2826cur = fbp->
quals.erase(cur);
2830got_pseudogene =
true;
2832 if(val_str.empty()) {
2834 "Dropping a /pseudogene qualifier because its value is empty : Feature key <{}> : Feature location <{}>.", fbp->
key, *fbp->
location);
2836cur = fbp->
quals.erase(cur);
2846 "Dropping a /pseudogene qualifier because its value is invalid : <{}> : Feature key <{}> : Feature location <{}>.", val_str, fbp->
key, *fbp->
location);
2848cur = fbp->
quals.erase(cur);
2851 if(! got_pseudogene || ! got_pseudo)
2855 "A legacy /pseudo qualifier and a /pseudogene qualifier are present on the same feature : Dropping /pseudo : Feature key <{}> : Feature location <{}>.", fbp->
key, *fbp->
location);
2863 for(
auto& dbp : dbl) {
2864 FeatBlk* fbp = dbp.GetFeatData();
2871 for(TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
2872 const string& qual_str = (*cur)->GetQual();
2873 const string dummy;
2874 const string& val_str = (*cur)->IsSetVal() ? (*cur)->SetVal() :
dummy;
2876 if(qual_str ==
"compare") {
2877 boolbadcom =
true;
2878 if(! val_str.empty()) {
2879 const char* q =
StringChr(val_str.c_str(),
'.');
2880 if(q && q[1] !=
'\0') {
2882 for(p = q + 1; *p >=
'0'&& *p <=
'9';)
2893 "/compare qualifier value is not a legal Accession.Version : feature \"{}\" at \"{}\" : value \"{}\" : qualifier has been dropped.", fbp->
key, *fbp->
location, val_str.empty() ?
"[empty]"s : val_str);
2894cur = fbp->
quals.erase(cur);
2898}
else if(qual_str ==
"citation")
2904 if(com_count > 0 || cit_count > 0 ||
2905(fbp->
key!=
"old_sequence"&& fbp->
key!=
"conflict"))
2909 "Feature \"{}\" at \"{}\" lacks required /citation and/or /compare qualifier : feature has been dropped.", fbp->
key, *fbp->
location);
2926 autodbp = dbl.begin();
2927 for(; dbp != dbl.end(); ++dbp) {
2928fbp = dbp->GetFeatData();
2932 for(p =
location, q = p; *p !=
'\0'; p++)
2933 if(*p !=
' '&& *p !=
'\t'&& *p !=
'\n')
2942 for(p =
location+ 1; *p !=
'\0'; p++) {
2945 for(
r=
nullptr, q = p - 1;; q--) {
2947 if(*q !=
'_'&& (*q < '0' || *q >
'9') &&
2948(*q < 'a' || *q >
'z') && (*q < 'A' || *q >
'Z'))
2960 if(*q !=
'_'&& (*q < '0' || *q >
'9') &&
2961(*q < 'a' || *q >
'z') && (*q < 'A' || *q >
'Z')) {
2970(q[1] ==
'z'|| q[1] ==
'Z') && ibp->
is_tpa==
false)
2984 if(dbp == dbl.end())
2994 "Feature \"{}\" at \"{}\" on a TSA record cannot point to a non-TSA record.", fbp->
key,
location?
location:
"empty_location");
2995}
else if(ibp->
is_tls) {
2997 "Feature \"{}\" at \"{}\" on a TLS record cannot point to a non-TLS record.", fbp->
key,
location?
location:
"empty_location");
3000 "Feature \"{}\" at \"{}\" on a TPA record cannot point to a non-TPA record.", fbp->
key,
location?
location:
"empty_location");
3009 usingFTAOperonList = list<FTAOperon*>;
3010FTAOperonList operonList;
3011FTAOperonList residentList;
3012 boolsuccess =
true;
3014 if(feats.empty()) {
3018 for(
const auto& pFeat : feats) {
3019 if(! pFeat->GetData().IsImp())
3022 const auto& featLocation = pFeat->GetLocation();
3023 const CImp_feat& featImp = pFeat->GetData().GetImp();
3027 for(
const auto& pQual : pFeat->GetQual()) {
3028 const auto& qual = *pQual;
3029 if(! qual.IsSetQual() || qual.GetQual() !=
"operon"||
3030! qual.IsSetVal() || qual.GetVal().empty()) {
3040operonList.push_back(pLatest);
3042residentList.push_back(pLatest);
3045 for(
const auto& operon : operonList) {
3046 if(pLatest == operon) {
3049 if(pLatest->
mOperon!= operon->mOperon) {
3053 "The operon features at \"{}\" and \"{}\" utilize the same /operon qualifier : \"{}\".", operon->LocationStr(), pLatest->
LocationStr(), pLatest->
mOperon);
3058 if(opQualCount > 1) {
3060 "Feature \"{}\" at \"{}\" has more than one operon qualifier.", pLatest->
mFeatname, pLatest->
LocationStr());
3064 if(opQualCount == 0 && featImp.
IsSetKey() && featImp.
GetKey() ==
"operon") {
3071 for(
const auto& resident : residentList) {
3072 boolmatched =
false;
3073 for(
const auto& operon : operonList) {
3074 if(resident->mOperon != operon->mOperon) {
3082 "Feature \"{}\" at \"{}\" with /operon qualifier \"{}\" does not fall within the span of the operon feature at \"{}\".", resident->mFeatname, resident->LocationStr(), resident->mOperon, operon->LocationStr());
3088 "/operon qualifier \"{}\" on feature \"{}\" at \"{}\" has a value that does not match any of the /operon qualifiers on operon features.", resident->mOperon, resident->mFeatname, resident->LocationStr());
3092 for(
auto& resident : residentList) {
3095 for(
auto& operon : operonList) {
3104 if(! fbp || fbp->
quals.empty())
3107 for(TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end(); ++cur) {
3108 const char* cur_qual = (*cur)->IsSetQual() ? (*cur)->GetQual().c_str() :
nullptr;
3109 const char* cur_val = (*cur)->IsSetVal() ? (*cur)->GetVal().c_str() :
nullptr;
3111TQualVector::iterator
next= cur;
3113 const char* next_qual = (*next)->IsSetQual() ? (*next)->GetQual().c_str() :
nullptr;
3114 const char* next_val = (*next)->IsSetVal() ? (*next)->GetVal().c_str() :
nullptr;
3122 if(_loc.size() > 20) {
3128 "Duplicated qualifier \"{}\" in feature \"{}\" at location \"{}\" removed.", cur_qual ? cur_qual :
"???",
key_or(fbp,
"???"), _loc);
3142list<string> linkage_evidence_names;
3145 const char* gap_type;
3148 Int4estimated_length;
3171 if(is_htg >= 0 && is_htg <= 2)
3175 else if(
key==
"HTGS_PHASE0")
3177 else if(
key==
"HTGS_PHASE1")
3179 else if(
key==
"HTGS_PHASE2")
3181 else if(
key==
"HTGS_PHASE3")
3187finished_gap =
false;
3188ibp->
gaps.clear();
3189 for(; dbp != dbp_end; ++dbp) {
3192 if(dbp->mType !=
type)
3195linkage_evidence_names.clear();
3196asn_linkage_evidence.clear();
3198 for(
const auto& tdbp : dbp->GetSubBlocks()) {
3201 const FeatBlk* fbp = tdbp.GetFeatData();
3202 if(! fbp || fbp->
key.empty())
3204 if(fbp->
key==
"gap") {
3205prev_gap = curr_gap;
3207}
else if(fbp->
key==
"assembly_gap") {
3208prev_gap = curr_gap;
3215gap_type =
nullptr;
3216linkage_evidence_names.clear();
3218asn_linkage_evidence.clear();
3219estimated_length = -1;
3221 for(
const auto& cur : fbp->
quals) {
3222 if(! cur->IsSetQual() || ! cur->IsSetVal())
3225 const string& cur_qual = cur->GetQual();
3226 const string& cur_val = cur->GetVal();
3228 if(cur_qual.empty() || cur_val.empty())
3231 if(cur_qual ==
"estimated_length") {
3232 if(cur_val ==
"unknown")
3233estimated_length = -100;
3235 if(string::npos == cur_val.find_first_not_of(
"0123456789"))
3238}
else if(cur_qual ==
"gap_type")
3239gap_type = cur_val.c_str();
3240 else if(cur_qual ==
"linkage_evidence") {
3241linkage_evidence_names.push_back(cur_val);
3248 boolhas_lt =
false;
3249 if(q.starts_with(
'<')) {
3253 size_tp = q.find_first_not_of(
"0123456789");
3255 if(p == string_view::npos) {
3259 if(q.starts_with(
'.')) {
3261 if(has_lt && from != 1)
3263 else if(q.starts_with(
'.')) {
3265 boolhas_gt =
false;
3266 if(q.starts_with(
'>')) {
3270 if(string_view::npos == q.find_first_not_of(
"0123456789"))
3272 if(has_gt && to != (
int)ibp->
bases)
3279 if(from == 0 || to == 0 || from > to) {
3280 if(curr_gap == 1) {
3282 "Invalid gap feature location : \"{}\" : all gap features must have a simple X..Y location on the plus strand.",
location_or(fbp,
"unknown"));
3285 "Invalid assembly_gap location : \"{}\".",
location_or(fbp,
"unknown"));
3293 if(gap_type && is_htg > -1 &&
3294!
StringEqu(gap_type,
"within scaffold") &&
3295!
StringEqu(gap_type,
"repeat within scaffold")) {
3297 "assembly_gap has /gap_type of \"{}\", but clone-based HTG records are only expected to have \"within scaffold\" or \"repeat within scaffold\" gaps. assembly_gap feature located at \"{}..{}\".", gap_type, from, to);
3300 if(is_htg == 0 || is_htg == 1) {
3301 for(
const string& evidence : linkage_evidence_names) {
3304 "assembly gap has /linkage_evidence of \"{}\", but unoriented and unordered Phase0/Phase1 HTG records are expected to have \"unspecified\" evidence. assembly_gap feature located at \"{}..{}\".", evidence, from, to);
3307}
else if(is_htg == 2 || is_htg == 3) {
3308 for(
const string& evidence : linkage_evidence_names) {
3312 "assembly gap has /linkage_evidence of \"unspecified\", but ordered and oriented HTG records are expected to have some level of linkage for their gaps. assembly_gap feature located at \"{}..{}\".", from, to);
3316 if(is_htg == 3 && ! finished_gap) {
3318 "Finished Phase-3 HTG records are not expected to have any gaps. First assembly_gap feature encountered at \"{}..{}\".", from, to);
3319finished_gap =
true;
3324 "assembly_gap feature at \"{}..{}\" lacks the required /gap_type qualifier.", from, to);
3334 "assembly_gap feature at \"{}..{}\" has an invalid gap type : \"{}\".", from, to, gap_type);
3338asn_gap_type = snp->
num;
3340 if(linkage_evidence_names.empty() &&
3341(
StringEqu(gap_type,
"within scaffold") ||
3342 StringEqu(gap_type,
"repeat within scaffold"))) {
3344 "assembly_gap feature at \"{}..{}\" with gap type \"{}\" lacks a /linkage_evidence qualifier.", from, to, gap_type);
3348 if(! linkage_evidence_names.empty()) {
3349 if(!
StringEqu(gap_type,
"unknown") &&
3350!
StringEqu(gap_type,
"within scaffold") &&
3351!
StringEqu(gap_type,
"repeat within scaffold")) {
3353 "The /linkage_evidence qualifier is not legal for the assembly_gap feature at \"{}..{}\" with /gap_type \"{}\".", from, to, gap_type);
3358 for(
const string& evidence : linkage_evidence_names) {
3360 if(evidence == snp->
str)
3364 "assembly_gap feature at \"{}..{}\" has an invalid linkage evidence : \"{}\".", from, to, evidence);
3370new_evidence->SetType(snp->
num);
3371asn_linkage_evidence.push_back(new_evidence);
3376 if(prev_gap + curr_gap == 3) {
3379 msg=
format(
"Legacy gap feature at \"{}..{}\" co-exists with a new AGP 2.0 assembly_gap feature at \"{}..{}\".", from, to, gfp->from, gfp->to);
3381 msg=
format(
"Legacy gap feature at \"{}..{}\" co-exists with a new AGP 2.0 assembly_gap feature at \"{}..{}\".", gfp->from, gfp->to, from, to);
3387 if(estimated_length == -1)
3390 "The gap feature at \"{}..{}\" lacks the required /estimated_length qualifier.", from, to);
3392}
else if(estimated_length == 0) {
3394 "Gap feature at \"{}..{}\" has an illegal /estimated_length qualifier : \"{}\" : should be \"unknown\" or an integer.", from, to,
"");
3397}
else if(estimated_length == -100) {
3398 if(is_htg >= 0 && to - from != 99) {
3400 "Gap feature at \"{}..{}\" has /estimated_length \"unknown\" but the gap size is not 100 bases.", from, to);
3402}
else if(estimated_length != to - from + 1) {
3411 "Gap feature at \"{}..{}\" has a size that does not match the /estimated_length : {}.", from, to, estimated_length);
3414 for(gfp = ibp->
gaps.begin(); gfp != ibp->
gaps.end(); ++gfp) {
3415 if((gfp->from >= from && gfp->from <= to) ||
3416(gfp->to >= from && gfp->to <= to) ||
3417(gfp->from <= from && gfp->to >= to)) {
3419 "Gap features at \"{}..{}\" and \"{}..{}\" overlap.", from, to, gfp->from, gfp->to);
3421}
else if(to + 1 == gfp->from || from - 1 == gfp->to) {
3430 "Gap features at \"{}..{}\" and \"{}..{}\" are contiguous, and should probably be represented by a single gap that spans both.", from, to, gfp->from, gfp->to);
3436 autotgfp = ibp->
gaps.before_begin();
3438 auto constnxt =
next(tgfp);
3439 if(nxt == ibp->
gaps.end() || nxt->from >= from) {
3444gfp = ibp->
gaps.emplace_after(tgfp);
3448gfp->estimated_length = estimated_length;
3450gfp->assembly_gap =
true;
3452gfp->gap_type = gap_type;
3453gfp->asn_gap_type = asn_gap_type;
3455 if(! asn_linkage_evidence.empty()) {
3456gfp->asn_linkage_evidence.swap(asn_linkage_evidence);
3457asn_linkage_evidence.clear();
3461linkage_evidence_names.clear();
3462asn_linkage_evidence.clear();
3466 if(ibp->
gaps.empty())
3470ibp->
gaps.clear();
3477 if(! entry || xil.empty())
3480 for(
const auto& xip : xil) {
3481 if(xip.subtags.empty())
3485 for(
const auto& xipqual : xip.subtags) {
3497quals.push_back(qual);
3506 if(! entry || xil.empty())
3509 autoxip = xil.cbegin();
3510 for(; xip != xil.cend(); ++xip)
3514 if(xip == xil.cend() || xip->subtags.empty())
3518 autodbp = dbl.before_begin();
3520 const auto& subtags = xip->subtags;
3521 for(
const auto& xip : subtags) {
3522 if(xip.subtags.empty())
3526 for(
const auto& xipfeat : xip.subtags) {
3534dbp = dbl.emplace_after(dbp, 0);
3535dbp->SetFeatData(fbp);
3539p.mData = std::move(dbl);
3557 for(
auto& cur : quals) {
3558 if(! cur->IsSetQual() || ! cur->IsSetVal())
3561 const string& cur_qual = cur->GetQual();
3562 const string& cur_val = cur->GetVal();
3564 if(cur_qual !=
"note"|| cur_val.empty())
3569 buf.reserve(cur_val.size());
3571 for(
const char* cp = cur_val.c_str(); *cp !=
'\0'; ++cp) {
3572 buf.push_back(*cp);
3573 if(*cp ==
';'&& (cp[1] ==
' '|| cp[1] ==
';')) {
3574 for(++cp; *cp ==
' '|| *cp ==
';';)
3577 buf.push_back(
' ');
3584 size+= cur->GetVal().size();
3585 for(
charc : cur->GetVal())
3594note.reserve(
size- 1);
3596 for(TQualVector::iterator cur = quals.begin(); cur != quals.end();) {
3597 if(! (*cur)->IsSetQual() || ! (*cur)->IsSetVal()) {
3602 const string& cur_qual = (*cur)->GetQual();
3603 const string& cur_val = (*cur)->GetVal();
3605 if(cur_qual !=
"note") {
3610 if(! cur_val.empty()) {
3613 if(! note.empty()) {
3614note.push_back(
';');
3615note.push_back(
'~');
3618 for(
charc : cur_val) {
3620note.push_back(
'~');
3625cur = quals.erase(cur);
3629qual_new->
SetQual(
"note");
3630qual_new->
SetVal(note);
3632quals.push_back(qual_new);
3639 for(
charc :
val) {
3642 if(
isalpha(c) || c ==
'_')
3661 for(
char& c :
str)
3662 if(c >=
'A'&& c <=
'Z')
3669 if(val_str.size() < 2)
3674 for(
size_t i= 1;
i< val_str.size(); ++
i) {
3675 if(val_str[
i- 1] ==
','&& val_str[
i] !=
' ')
3679val_str.reserve(val_str.size() + v.size());
3681 while(! v.empty()) {
3682 size_t i= v.back();
3685val_str.insert(
i, 1,
' ');
3691 const string&
str,
3692vector<string>& lines)
3716 stringbstr(bptr, eptr);
3719vector<string> qualLines;
3722 stringqualKey, qualVal;
3723 stringfeatKey(fbp->
key);
3724 stringfeatLocation(*fbp->
location);
3726 while(! qualParser.
Done()) {
3732pQual->
SetVal(qualVal);
3733fbp->
quals.push_back(pQual);
3745 auto n=
str.find(
':');
3749 "/satellite qualifier \"{}\" does not begin with a valid satellite type.",
str);
3751}
else if(
n!= string_view::npos &&
n+ 1 >=
str.size()) {
3753 "/satellite qualifier \"{}\" does not include a class or identifier after the satellite type.",
str);
3764 boolfound =
false;
3767 for(TQualVector::iterator qual = fbp->
quals.begin(); qual != fbp->
quals.end(); ++qual)
3769 if((*qual)->IsSetQual() && (*qual)->GetQual() ==
"mobile_element_type"&&
3770(*qual)->IsSetVal() && !(*qual)->GetVal().empty()) {
3771p_val = (
char*) (*qual)->GetVal().c_str();
3772 for(p = p_val; *p ==
'\"';)
3784optional<string> loc_str = fbp->
location;
3787 "Mandatory qualifier /mobile_element_type is absent or has no value : Feature \"mobile_element\" : Location \"{}\". Feature dropped.", loc_str.has_value() ? loc_str.value() :
"unknown");
3790 "Mandatory qualifier /mobile_element_type is absent or has no value : Feature \"mobile_element\" : Location \"{}\". Entry dropped.", loc_str.has_value() ? loc_str.value() :
"unknown");
3803optional<string> loc_str = fbp->
location;
3806 "The value \"{}\" of qualifier /mobile_element_type is invalid for the feature \"mobile_element\" at \"{}\". Feature dropped.", p_val, loc_str.has_value() ? loc_str.value() :
"unknown");
3809 "The value \"{}\" of qualifier /mobile_element_type is invalid for the feature \"mobile_element\" at \"{}\". Entry dropped.", p_val, loc_str.has_value() ? loc_str.value() :
"unknown");
3856loc =
"1.."+ to_string(ibp->
bases);
3858 for(
auto& dbp : dbl) {
3862dbp.SetFeatData(fbp);
3864bptr = dbp.mBuf.ptr;
3865eptr = bptr + dbp.mBuf.len;
3867 for(p = bptr; *p !=
'\n';)
3871 if(*ptr1 ==
' ') {
3874 for(ptr1 = bptr; *ptr1 ==
' ';)
3877 for(ptr2 = ptr1; *ptr2 !=
' '&& *ptr2 !=
'\n';)
3882fbp->
key=
"misc_feature";
3886 for(ptr1 = ptr2; *ptr1 ==
' ';)
3888 if(*ptr1 ==
'\n') {
3889 if(ibp->
is_mga==
false) {
3903 for(ptr2 = ptr1; *ptr2 !=
'/'&& ptr2 < eptr;)
3905 stringtmp_loc(ptr1, ptr2);
3909 for(
charc : tmp_loc)
3910 if(c !=
' '&& c !=
'\n')
3913fbp->
location= std::move(tmp_loc);
3920 if(fbp->
key==
"allele"|| fbp->
key==
"mutation") {
3922 "Obsolete feature \"{}\" found. Replaced with \"variation\".", fbp->
key);
3923fbp->
key=
"variation";
3939 if(fbp->
key!=
"assembly_gap") {
3940 for(
const auto& cur : fbp->
quals) {
3941 const string& cur_qual = cur->GetQual();
3942 if(cur_qual ==
"gap_type"||
3943cur_qual ==
"assembly_evidence") {
3945 "Qualifier /{} is invalid for the feature \"{}\" at \"{}\".", cur_qual, fbp->
key,
location_or(fbp,
"Unknown"));
3951 if(fbp->
key!=
"source") {
3952 for(
const auto& cur : fbp->
quals) {
3953 const string& cur_qual = cur->GetQual();
3954 if(cur_qual ==
"submitter_seqid") {
3956 "Qualifier /{} is invalid for the feature \"{}\" at \"{}\".", cur_qual, fbp->
key,
location_or(fbp,
"Unknown"));
3965 if(fbp->
key==
"mobile_element"&&
3990 if(fbp->
key!=
"mobile_element") {
3993 if((fbp->
key!=
"old_sequence"&& fbp->
key!=
"conflict") ||
3994(
str!=
"citation")) {
3996 "lacks required /{} qualifier : feature has been dropped.",
str);
4003}
else if(fbp->
key==
"misc_feature"&& fbp->
quals.empty()) {
4012 for(
auto& cur : fbp->
quals) {
4013 if(! cur->IsSetQual() || ! cur->IsSetVal())
4016 const string& qual_str = cur->GetQual();
4017 stringval_str = cur->GetVal();
4021 if(val_str.empty() && qual_str !=
"replace") {
4024 if(qual_str ==
"replace")
4026cur->SetVal(val_str);
4029 if(qual_str ==
"satellite")
4043 if(! fbp || fbp->
quals.empty())
4046 for(TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
4047 const string& qual_str = (*cur)->GetQual();
4049 if((*cur)->IsSetVal()) {
4050 stringval_str = (*cur)->GetVal();
4052 if(qual_str ==
"translation") {
4054}
else if(qual_str ==
"rpt_unit") {
4056}
else if(qual_str ==
"cons_splice") {
4058}
else if(qual_str ==
"note") {
4060 for(
size_tp = 0;;) {
4061p = val_str.find(
'/', p);
4062 if(p == string::npos)
4065 if(!
CheckLegalQual(string_view(val_str).substr(p),
' ',
nullptr))
4068 string_loc(val_str.substr(0, 34));
4069 if(_loc.size() > 30) {
4075 "/note qualifier value appears to contain other qualifiers : qualifier has been dropped : [{}].", _loc);
4079 "/note qualifier value appears to contain other qualifiers : [{}].", _loc);
4083cur = fbp->
quals.erase(cur);
4089 for(
charc : val_str) {
4090 if(c ==
'\"'|| c ==
' '|| c ==
'\t')
4097 if(qual_str ==
"replace")
4098(*cur)->SetVal(
"");
4102(*cur)->SetVal(val_str);
4106 if(qual_str == *
b)
4110 if(! (*cur)->IsSetVal()) {
4111 if(qual_str ==
"old_locus_tag") {
4113 "Feature \"{}\" at \"{}\" has an /old_locus_tag qualifier with no value. Qualifier has been dropped.",
key_or(fbp,
"Unknown"),
location_or(fbp,
"Empty"));
4116 "Qualifier /{} ignored because it lacks a data value. Feature \"{}\", location \"{}\".", qual_str,
key_or(fbp,
"Unknown"),
location_or(fbp,
"Empty"));
4118cur = fbp->
quals.erase(cur);
4121}
else if((*cur)->IsSetVal()) {
4123 "Qualifier /{} should not have data value. Qualifier value has been ignored. Feature \"{}\", location \"{}\".", qual_str,
key_or(fbp,
"Unknown"),
location_or(fbp,
"Empty"));
4127 if((*cur)->IsSetVal() && qual_str ==
"note") {
4128 string val= (*cur)->GetVal();
4129std::replace(
val.begin(),
val.end(),
'\"',
'\'');
4130(*cur)->SetVal(
val);
4147 for(
auto& dbp : dbl) {
4148 if(! dbp.hasData())
4150fbp = dbp.GetFeatData();
4154 if(fbp->
key==
"-") {
4156fbp->
key=
"misc_feature";
4159 if(fbp->
key==
"allele"|| fbp->
key==
"mutation") {
4161 "Obsolete feature \"{}\" found. Replaced with \"variation\".", fbp->
key);
4162fbp->
key=
"variation";
4181 if(fbp->
key==
"NON_STD")
4182fbp->
key=
"MOD_RES";
4185 if(keyindx < 0 && ! deb) {
4194 if(! fbp->
quals.empty()) {
4199 if(fbp->
key==
"mobile_element"&&
4213}
else if(fbp->
spindex< 0) {
4225 if(fbp->
key!=
"mobile_element") {
4228 if((fbp->
key!=
"old_sequence"&& fbp->
key!=
"conflict") ||
4229(
str!=
"citation")) {
4231 "lacks required /{} qualifier : feature has been dropped.",
str);
4238}
else if(fbp->
key==
"misc_feature"&& fbp->
quals.empty()) {
4247 for(
auto& cur : fbp->
quals) {
4248 if(! cur->IsSetQual() || ! cur->IsSetVal())
4251 const string& qual_str = cur->GetQual();
4252 stringval_str = cur->GetVal();
4256 if(val_str.empty() && qual_str !=
"replace") {
4259 if(qual_str ==
"replace")
4261cur->SetVal(val_str);
4275 for(
const auto& qual : feat.
GetQual()) {
4276 if(! qual->IsSetQual() || qual->GetQual().empty() ||
4277qual->GetQual() !=
"ncRNA_class")
4282 if(! qual->IsSetVal() || qual->GetVal().empty()) {
4285 "Feature \"ncRNA\" at location \"{}\" has an empty /ncRNA_class qualifier.", loc.empty() ?
"unknown"s : loc);
4293 "Feature \"ncRNA\" at location \"{}\" has an invalid /ncRNA_class qualifier: \"{}\".", loc.empty() ?
"unknown"s : loc, qual->GetVal());
4307 "Feature \"ncRNA\" at location \"{}\" {} /ncRNA_class qualifier.", loc.empty() ?
"unknown"s : loc, (
count== 0) ?
"lacks the mandatory":
"has more than one");
4315 for(
autoqual = feat.
SetQual().begin(); qual != feat.
SetQual().end(); ++qual) {
4316 if(! (*qual)->IsSetQual() || (*qual)->GetQual() !=
"artificial_location")
4319 if((*qual)->IsSetVal()) {
4320 const Char* p_val = (*qual)->GetVal().c_str();
4321 for(; *p_val ==
'\"';)
4324 if(*p_val ==
'\0')
4325(*qual)->ResetVal();
4328 string val= (*qual)->IsSetVal() ? (*qual)->GetVal() :
"";
4330 if(
val==
"heterogenous population sequenced"||
4331 val==
"low-quality sequence region") {
4338except_text +=
", ";
4339except_text +=
val;
4345 msg=
format(
"Encountered empty /artificial_location qualifier : Feature \"{}\" : Location \"{}\". Qualifier dropped.",
key.empty() ?
"unknown"s :
key, loc_str.empty() ?
"unknown"s : loc_str);
4347 msg=
format(
"Value \"{}\" is not legal for the /artificial_location qualifier : Feature \"{}\" : Location \"{}\". Qualifier dropped.",
val,
key.empty() ?
"unknown"s :
key, loc_str.empty() ?
"unknown"s : loc_str);
4377 if(fbp1->
key.empty() && ! fbp2->
key.empty())
4379 if(! fbp1->
key.empty() && fbp2->
key.empty())
4381 if(! fbp1->
key.empty() && ! fbp2->
key.empty()) {
4399 returnfbp1->
num< fbp2->
num;
4405 if(! fbp || fbp->
key.empty() || ! rclass)
4408fbp->
key=
"regulatory";
4411qual->
SetQual(
"regulatory_class");
4413fbp->
quals.push_back(qual);
4425 for(
auto& dbp : dbl) {
4426fbp = dbp.GetFeatData();
4427 if(! fbp || fbp->
key.empty())
4430 if(fbp->
key==
"attenuator")
4432 else if(fbp->
key==
"CAAT_signal")
4434 else if(fbp->
key==
"enhancer")
4436 else if(fbp->
key==
"GC_signal")
4438 else if(fbp->
key==
"-35_signal")
4440 else if(fbp->
key==
"-10_signal")
4442 else if(fbp->
key==
"polyA_signal")
4444 else if(fbp->
key==
"promoter")
4446 else if(fbp->
key==
"RBS")
4448 else if(fbp->
key==
"TATA_signal")
4450 else if(fbp->
key==
"terminator")
4452 else if(fbp->
key!=
"regulatory")
4456other_class =
false;
4459 for(
const auto& cur : fbp->
quals) {
4460 if(! cur->IsSetQual() || ! cur->IsSetVal())
4463 const string& qual_str = cur->GetQual();
4465 if(qual_str !=
"regulatory_class") {
4466 if(qual_str ==
"note")
4472 if(! cur->IsSetVal() || cur->GetVal().empty()) {
4479 "Empty /regulatory_class qualifier value in regulatory feature at location {}.", s);
4484 const string& val_str = cur->GetVal();
4487 if(val_str == *
b)
4491 if(val_str ==
"other")
4492other_class =
true;
4502 "Invalid /regulatory_class qualifier value {} provided in regulatory feature at location {}.", val_str, s);
4513 "The regulatory feature is missing mandatory /regulatory_class qualifier at location {}.", s);
4515}
else if(
count> 1) {
4525 if(other_class && ! got_note) {
4539 const string& submitter_seqid,
4544 if(seqtype == 0 || seqtype == 1 || seqtype == 7)
4546 else if(seqtype == 4 || seqtype == 5 || seqtype == 8 || seqtype == 9)
4554 tag.SetTag().SetStr(submitter_seqid);
4556bioseq.
SetId().push_back(gen_id);
4575 if(seqtype == 0 || seqtype == 3 || seqtype == 4 || seqtype == 6 ||
4576seqtype == 10 || seqtype == 12) {
4582 if(seqtype == 1 || seqtype == 5 || seqtype == 7 || seqtype == 8 ||
4583seqtype == 9 || seqtype == 11) {
4585 if(prefix[4] >=
'0'&& prefix[4] <=
'9')
4595 for(
autotbp = ibp->
secaccs.begin(); tbp != ibp->
secaccs.end(); ++tbp) {
4596 if((*tbp)[0] ==
'-')
4602 i= (prefix[4] >=
'0'&& prefix[4] <=
'9') ? 6 : 8;
4610 if(
ok&& prefix) {
4612 if(seqtype == 0 || seqtype == 1 || seqtype == 4 || seqtype == 5 ||
4613seqtype == 7 || seqtype == 8 || seqtype == 9 || seqtype == 10 ||
4615 if(prefix[4] >=
'0'&& prefix[4] <=
'9')
4632CDelta_ext::Tdata::iterator
delta;
4635 const CSeq_id*
id=
nullptr;
4637 if(! (*delta)->IsLoc())
4640 const CSeq_loc& locs = (*delta)->GetLoc();
4645 if(! loc->
IsInt())
4655 const CTextseq_id* text_id =
id->GetTextseq_Id();
4664 i= (prefix[4] >=
'0'&& prefix[4] <=
'9') ? 6 : 8;
4673 if(
delta== deltas.end() && prefix) {
4675 if(seqtype == 0 || seqtype == 1 || seqtype == 4 || seqtype == 5 ||
4676seqtype == 7 || seqtype == 8 || seqtype == 9 || seqtype == 10 ||
4678 if(prefix[4] >=
'0'&& prefix[4] <=
'9')
4699 "Submitter sequence identifiers for non-project-based TSA records are not supported. /submitter_seqid \"{}\" has been dropped.", ibp->
submitter_seqid);
4704 "Only WGS/TLS/TSA related records (contigs and scaffolds) are allowed to have /submitter_seqid qualifier. This \"{}\" is not one of them. Entry dropped.", ibp->
acnum);
4713 auto& descrList = descrs.
Set();
4714 autoit = descrList.begin();
4715 while(it != descrList.end()) {
4716 if((*it)->IsSource()) {
4717it = descrList.erase(it);
4784dab = temp_xml_chain.begin();
4785dab_end = temp_xml_chain.end();
4788dab = chain.begin();
4789dab_end = chain.end();
4791 while(dab != dab_end && dab->mType !=
type)
4795 for(
autodbp = dab; dbp != dab_end; ++dbp) {
4797 if(dblk.mType !=
type)
4803 TDataBlkList& dbl = std::get<TDataBlkList>(dblk.mData);
4814 for(
auto& tdbp : dbl) {
4819 for(
auto& tdbp : dbl)
4828 if(
i> 1 && ibp->
is_mga) {
4837 if(! ibp->
drop) {
4843p.erase(
remove(p.begin(), p.end(),
' '), p.end());
4844p.erase(
remove(p.begin(), p.end(),
'\t'), p.end());
4850 if(seq_feats.empty()) {
4852 for(; dab != dab_end; ++dab) {
4853 if(dab->hasData()) {
4854 TDataBlkList& dbl = std::get<TDataBlkList>(dab->mData);
4856dab->mData = monostate();
4860temp_xml_chain.clear();
4875descr_src->
SetSource(seq_feats.front()->SetData().SetBiosrc());
4876bioseq.
SetDescr().Set().push_back(descr_src);
4880 const CBioSource& bio = seq_feats.front()->GetData().GetBiosrc();
4883 if(! taxname.empty())
4885 "BioSource descriptor and Source dropped because \"{}\" is not present in the NCBI taxonomy database.", taxname);
4888 "BioSource descriptor and Source dropped because provided organism is not present in the NCBI taxonomy database.");
4891seq_feats.pop_front();
4893 for(; dab != dab_end; ++dab) {
4894 if(dab->mType !=
type) {
4898 TDataBlkList& dbl = std::get<TDataBlkList>(dab->mData);
4899 for(
auto& dbp : dbl) {
4900 if(dbp.mDrop ==
true)
4904 if(fbp->
key==
"source"||
4905fbp->
key==
"assembly_gap"||
4906(fbp->
key==
"gap"&&
4916 if(feat.
Empty()) {
4917 if(fbp->
key==
"CDS") {
4919 "CDS feature has unparsable location. Entry dropped. Location = [{}].", *fbp->
location);
4930 "Location pointing outside the entry [{}]", *fbp->
location);
4934 if(imp_feat.
GetKey() ==
"intron"||
4935imp_feat.
GetKey() ==
"exon") {
4951seq_feats.push_back(feat);
4960 "Mixed strands in SeqLoc of /trans_splicing feature: {}", *fbp->
location);
4963 "Mixed strands in SeqLoc: {}", *fbp->
location);
4967seq_feats.push_back(feat);
4973temp_xml_chain.clear();
4983 for(
auto& feat : seq_feats) {
4984 if(! feat->GetData().IsImp())
4987 const CImp_feat& imp_feat = feat->GetData().GetImp();
4990imp_feat.
GetKey().find(
"RNA") != string::npos) {
5007 SeqFeatPub(pp, entry, seq_feats, *seq_id, col_data, ibp);
5008 if(seq_feats.empty() && ibp->
drop) {
5015 ImpFeatPub(pp, entry, seq_feats, *seq_id, col_data, ibp);
5018 if(seq_feats.empty())
5022annot->
SetData().SetFtable().swap(seq_feats);
5024bioseq.
SetAnnot().push_back(annot);
5030 char* p =
nullptr;
5034 if(! p || (tRNA && tRNA < p))
5036 if(! p || (rRNA && rRNA < p))
5038 if(! p || (snRNA && snRNA < p))
5040 if(! p || (scRNA && scRNA < p))
5042 if(! p || (uRNA && uRNA < p))
5044 if(! p || (snoRNA && snoRNA < p))
5053 if(p == snRNA || p == uRNA)
5075 char* mRNA =
nullptr;
5076 char* tRNA =
nullptr;
5077 char* rRNA =
nullptr;
5078 char* snRNA =
nullptr;
5079 char* scRNA =
nullptr;
5080 char* uRNA =
nullptr;
5081 char* snoRNA =
nullptr;
5107 if(! ibp->
moltype.empty()) {
5118 while(*
r!=
';'&& *
r!=
'\n'&& *
r!=
'\0')
5121 while(*
r!=
';'&& *
r!=
' '&& *
r!=
'\t'&& *
r!=
'\n'&&
5124 if(
r- molstr > 10)
5130q = (
char*)
"???";
5132q = (
char*)
"???";
5135 if(ibp->
moltype==
"genomic DNA") {
5145}
else if(ibp->
moltype==
"genomic RNA") {
5154}
else if(ibp->
moltype==
"mRNA") {
5163}
else if(ibp->
moltype==
"tRNA") {
5172}
else if(ibp->
moltype==
"rRNA") {
5181}
else if(ibp->
moltype==
"snoRNA") {
5190}
else if(ibp->
moltype==
"snRNA") {
5199}
else if(ibp->
moltype==
"scRNA") {
5208}
else if(ibp->
moltype==
"pre-RNA") {
5217}
else if(ibp->
moltype==
"pre-mRNA") {
5226}
else if(ibp->
moltype==
"other RNA") {
5238}
else if(ibp->
moltype==
"other DNA") {
5250}
else if(ibp->
moltype==
"unassigned RNA") {
5262}
else if(ibp->
moltype==
"unassigned DNA") {
5274}
else if(ibp->
moltype==
"viral cRNA") {
5285}
else if(ibp->
moltype==
"transcribed RNA") {
5300 "Invalid /mol_type value \"{}\" provided in source features. Entry dropped.", ibp->
moltype);
5310 "Molecule type \"{}\" from the ID line disagrees with \"{}\" from the /mol_type qualifier.", q, ibp->
moltype);
5317 "Molecule type \"{}\" from the ID/LOCUS line disagrees with \"{}\" from the /mol_type qualifier.", q, ibp->
moltype);
5324ibp->
moltype!=
"genomic DNA")
5353 "Molecule type \"{}\" from the /mol_type qualifier disagrees with this record's sequence type: \"{}\".", ibp->
moltype, p);
5384 if(genomic < 0 || genomic > 20) {
5394 while(*q !=
';'&& *q !=
'\n'&& *q !=
'\0')
5397 while(*q !=
';'&& *q !=
' '&& *q !=
'\t'&& *q !=
'\n'&&
5400 if(q - molstr > 10)
5417 "Molecule type \"{}\" from LOCUS/ID line is not legal value for records from source \"{}\". Sequence rejected.", molstr ? molstr :
"???", p);
5425 else if(genomic > 1 && genomic < 6)
5434 else if(genomic == 2)
5436 else if(genomic == 3)
5438 else if(genomic == 4)
5444}
else if(genomic == 5)
5450}
else if(genomic == 7)
5452 else if(genomic == 8)
5454 else if(genomic == 9)
5456 else if(genomic == 10 || genomic == 12)
5458 else if(genomic == 11)
5460 else if(genomic == 13)
5462 else if(genomic == 14)
5464 else if(genomic == 15)
5466 else if(genomic == 16)
5468 else if(genomic == 17)
5474}
else if(genomic == 18)
5480}
else if(genomic == 19 || genomic == 20)
5487 const Char* div =
nullptr;
5534 for(p = tRNA + 4; *p ==
' '|| *p ==
'\t';)
5563 for(
const auto& subdbp : subblocks) {
5566 if(! subdbp.mBuf.ptr)
5583 FtaErrPost(
SEV_ERROR,
ERR_SOURCE_UnclassifiedViralRna,
"Cannot determine viral molecule type (genomic vs a specific type of RNA) based on definition line, CDS content, or taxonomic lineage. So this sequence has been classified as genomic by default (perhaps in error).");
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Int4 fta_fix_seq_loc_id(TSeqLocList &locs, ParserPtr pp, string_view location, string_view name, bool iscon)
CRef< CSeq_loc > fta_get_seqloc_int_whole(const CSeq_id &seq_id, size_t len)
bool fta_strings_same(const char *s1, const char *s2)
bool fta_number_is_huge(const Char *s)
CRef< CPatent_seq_id > MakeUsptoPatSeqId(const char *acc)
void GetSequenceOfKeywords(const DataBlk &entry, int type, Uint2 col_data, TKeywordList &keywords)
CRef< CSeq_id > MakeAccSeqId(const char *acc, Uint1 seqtype, bool accver, Int2 vernum)
void ShrinkSpaces(char *line)
size_t CheckOutsideEntry(ParserPtr pp, const char *acc, Int2 vernum)
@Gb_qual.hpp User-defined methods of the data storage class.
@Imp_feat.hpp User-defined methods of the data storage class.
list< CRef< CLinkage_evidence > > TLinkage_evidence
virtual bool GetNextQualifier(string &qualKey, string &qualVal)
@RNA_ref.hpp User-defined methods of the data storage class.
EQualifier
List of available qualifiers for feature keys.
const TQualifiers & GetMandatoryQualifiers(void) const
Get the list of all mandatory qualifiers for the feature.
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
static EQualifier GetQualifierType(CTempString qual)
convert qual string to enumerated value
static CTempString GetQualifierAsString(EQualifier qual)
Convert a qualifier from an enumerated value to a string representation or empty if not found.
static ESubtype SubtypeNameToValue(CTempString sName)
Turn a string into its ESubtype which is NOT necessarily related to the identifier of the enum.
@Seq_descr.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
const CSeq_loc * GetFirstLoc(ENullSegType null_seg=eNullSegAllow) const
std::variant< monostate, TList, EntryBlk *, FeatBlk *, const TXmlIndexList * > mData
EntryBlk * GetEntryData() const
FeatBlk * GetFeatData() const
struct DataBlk::@1166 mBuf
void SetFeatData(FeatBlk *)
bool operator()(const CRef< CGb_qual > &qual)
PredIsGivenQual(const string &qual)
constexpr const_iterator begin() const noexcept
#define ParFlat_COL_DATA_EMBL
#define ERR_FEATURE_RedundantOldLocusTag
#define ERR_FEATURE_BadAnticodonLoc
#define ERR_QUALIFIER_InvalidLinkageEvidence
#define ERR_QUALIFIER_MissingGapType
#define ERR_FEATURE_MissingOperonQual
#define ERR_LOCUS_NonViralRNAMoltype
#define ERR_FEATURE_MoreThanOneCAGEFeat
#define ERR_FEATURE_MatchingOldNewLocusTag
#define ERR_FEATURE_OldLocusTagWithoutNew
#define ERR_LOCATION_RefersToExternalRecord
#define ERR_FEATURE_LocationParsing
#define ERR_QUALIFIER_InvalidArtificialLoc
#define ERR_FEATURE_GapSizeEstLengthMissMatch
#define ERR_QUALIFIER_InvalidPseudoGeneValue
#define ERR_QUALIFIER_MultRptUnitComma
#define ERR_FEATURE_RequiredQualifierMissing
#define ERR_SOURCE_DescriptorDropped
#define ERR_QUALIFIER_IllegalCompareQualifier
#define ERR_QUALIFIER_MissingRegulatoryClass
#define ERR_FEATURE_MultipleOperonQuals
#define ERR_QUALIFIER_DbxrefUnknownDBName
#define ERR_SOURCE_SubmitterSeqidIgnored
#define ERR_QUALIFIER_ObsoleteRptUnit
#define ERR_SOURCE_InvalidMolType
#define ERR_QUALIFIER_LinkageShouldNotBeUnspecified
#define ERR_FEATURE_OverlappingGaps
#define ERR_QUALIFIER_MultiplePseudoGeneQuals
#define ERR_QUALIFIER_MultipleRegulatoryClass
#define ERR_FEATURE_UnknownGapNot100
#define ERR_QUALIFIER_LinkageShouldBeUnspecified
#define ERR_FEATURE_ContiguousGaps
#define ERR_FEATURE_EmptyOldLocusTag
#define ERR_QUALIFIER_InvalidRegulatoryClass
#define ERR_QUALIFIER_InvalidGapTypeForLinkageEvidence
#define ERR_FEATURE_InvalidGapLocation
#define ERR_REFERENCE_UnparsableLocation
#define ERR_FEATURE_AssemblyGapAndLegacyGap
#define ERR_FEATURE_InvalidAnticodonPos
#define ERR_QUALIFIER_ShouldNotHaveValue
#define ERR_QUALIFIER_Conflict
#define ERR_QUALIFIER_InvalidRptUnitRange
#define ERR_FEATURE_InvalidSatelliteType
#define ERR_QUALIFIER_InvalidGapType
#define ERR_QUALIFIER_NoNoteForOtherRegulatory
#define ERR_SOURCE_GenomicViralRnaAssumed
#define ERR_FEATURE_InvalidQualifier
#define ERR_LOCATION_AccessionNotTLS
#define ERR_FEATURE_FourBaseAntiCodon
#define ERR_SOURCE_MolTypeSeqTypeConflict
#define ERR_FEATURE_OperonLocationMisMatch
#define ERR_FEATURE_MultipleLocusTags
#define ERR_SOURCE_LineageImpliesGenomicViralRna
#define ERR_LOCATION_NCBIRefersToExternalRecord
#define ERR_LOCATION_TransSpliceMixedStrand
#define ERR_FEATURE_InvalidQualifierValue
#define ERR_FEATURE_UnknownFeatKey
#define ERR_QUALIFIER_DuplicateRemoved
#define ERR_FEATURE_ncRNA_class
#define ERR_LOCATION_AccessionNotTPA
#define ERR_FEATURE_IllegalEstimatedLength
#define ERR_QUALIFIER_DbxrefShouldBeNumeric
#define ERR_FEATURE_InvalidOperonQual
#define ERR_FEATURE_FinishedHTGHasAssemblyGap
#define ERR_FEATURE_ObsoleteFeature
#define ERR_FEATURE_OperonQualsNotUnique
#define ERR_FEATURE_Dropped
#define ERR_DEFINITION_DifferingRnaTokens
#define ERR_FORMAT_InvalidMolType
#define ERR_QUALIFIER_MissingLinkageEvidence
#define ERR_QUALIFIER_DbxrefIncorrect
#define ERR_SOURCE_SubmitterSeqidNotAllowed
#define ERR_QUALIFIER_OldPseudoWithPseudoGene
#define ERR_FEATURE_StrangeAntiCodonSize
#define ERR_QUALIFIER_EmbeddedQual
#define ERR_FEATURE_InvalidAssemblyGapLocation
#define ERR_LOCATION_AccessionNotTSA
#define ERR_LOCATION_FailedCheck
#define ERR_FEATURE_NoSatelliteClassOrIdentifier
#define ERR_QUALIFIER_InvalidEvidence
#define ERR_SOURCE_UnclassifiedViralRna
#define ERR_QUALIFIER_UnexpectedGapTypeForHTG
#define ERR_SOURCE_SubmitterSeqidDropped
#define ERR_SOURCE_MolTypesDisagree
#define ERR_QUALIFIER_DbxrefWrongType
#define ERR_FEATURE_FeatureKeyReplaced
#define ERR_QUALIFIER_EmptyQual
#define ERR_FEATURE_ObsoleteDbXref
#define ERR_FEATURE_DuplicateRemoved
#define ERR_LOCATION_MixedStrand
void ParseSourceFeat(ParserPtr pp, DataBlkCIter dbp, DataBlkCIter dbp_end, const CSeq_id &seqid, Int2 type, const CBioseq &bioseq, const string &source, TSeqFeatList &seq_feats)
unique_ptr< string > XMLFindTagValue(const char *entry, const TXmlIndexList &xil, Int4 tag)
#define INSDQUALIFIER_NAME
#define INSDSEQ_FEATURE_TABLE
TDataBlkList XMLBuildRefDataBlk(char *entry, const TXmlIndexList &xil, int type)
#define INSDFEATURE_LOCATION
unique_ptr< string > XMLGetTagValue(const char *entry, const XmlIndex &xip)
#define INSDQUALIFIER_VALUE
#define INSDREFERENCE_POSITION
#define INSDFEATURE_QUALS
void XMLGetKeywords(const char *entry, const TXmlIndexList &xil, TKeywordList &keywords)
#define INSDREFERENCE_REFERENCE
DataBlk::TList TDataBlkList
TDataBlkList::iterator DataBlkIter
std::list< CRef< objects::CSeq_id > > TSeqIdList
std::list< CRef< objects::CSeq_feat > > TSeqFeatList
forward_list< GapFeats >::iterator GapFeatsPtr
TDataBlkList::const_iterator DataBlkCIter
forward_list< XmlIndex > TXmlIndexList
int fta_atoi(string_view sv)
int StringCmp(const char *s1, const char *s2)
bool fta_StartsWith(const char *s1, string_view s2)
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
size_t StringLen(const char *s)
void StringCat(char *d, const char *s)
void FtaInstallPrefix(int prefix, string_view name, string_view location)
void FtaDeletePrefix(int prefix)
void Nlm_ErrSetContext(const char *module, const char *fname, int line)
void Nlm_ErrPostStr(ErrSev sev, int lev1, int lev2, string_view str)
#define FtaErrPost(sev, level,...)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
static const char location[]
unsigned int TSeqPos
Type for sequence locations and lengths.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
@ e_YES
SeqIds compared, but are different.
ENa_strand GetStrand(void) const
Get the location's strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
TRange GetTotalRange(void) const
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
bool CanGetInst(void) const
TSeqPos GetBioseqLength(void) const
bool CanGetInst_Length(void) const
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
char Char
Alias for char.
uint32_t Uint4
4-byte (32-bit) unsigned integer
TThisType IntersectionWith(const TThisType &r) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ eNocase
Case insensitive compare.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool CanGetOrg(void) const
Check if it is safe to call GetOrg method.
list< CRef< CSubSource > > TSubtype
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TOrg & GetOrg(void) const
Get the Org member data.
void SetOrg(TOrg &value)
Assign a value to Org data member.
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
TId GetId(void) const
Get the variant data.
const TLineage & GetLineage(void) const
Get the Lineage member data.
const TDiv & GetDiv(void) const
Get the Div member data.
TMgcode GetMgcode(void) const
Get the Mgcode member data.
TGcode GetGcode(void) const
Get the Gcode member data.
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetMgcode(void) const
mitochondrial genetic code Check if a value has been assigned to Mgcode data member.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
list< CRef< COrgMod > > TMod
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetGcode(void) const
genetic code (see CdRegion) Check if a value has been assigned to Gcode data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
TEquiv & SetEquiv(void)
Select the variant.
void SetQual(const TQual &value)
Assign a value to Qual data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
TType GetType(void) const
Get the Type member data.
const TAa & GetAa(void) const
Get the Aa member data.
const TCodon & GetCodon(void) const
Get the Codon member data.
bool IsSetAa(void) const
Check if a value has been assigned to Aa data member.
bool IsTRNA(void) const
Check if variant TRNA is selected.
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
EType
type of RNA feature
void SetExt(TExt &value)
Assign a value to Ext data member.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
TNcbieaa GetNcbieaa(void) const
Get the variant data.
bool IsSetCodon(void) const
codon(s) as in Genetic-code Check if a value has been assigned to Codon data member.
Tdata & Set(void)
Assign a value to data member.
void SetType(TType value)
Assign a value to Type data member.
void ResetExt(void)
Reset Ext data member.
const TExt & GetExt(void) const
Get the Ext member data.
const TTRNA & GetTRNA(void) const
Get the variant data.
@ eType_snoRNA
will become ncRNA, with RNA-gen.class = snoRNA
@ eType_ncRNA
non-coding RNA; subsumes snRNA, scRNA, snoRNA
const TVal & GetVal(void) const
Get the Val member data.
void SetQual(const TQual &value)
Assign a value to Qual data member.
const TKey & GetKey(void) const
Get the Key member data.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
vector< CRef< CDbtag > > TDbxref
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
void SetLoc(const TLoc &value)
Assign a value to Loc data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsImp(void) const
Check if variant Imp is selected.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetCit(TCit &value)
Assign a value to Cit data member.
void SetPartial(TPartial value)
Assign a value to Partial data member.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
const TLocation & GetLocation(void) const
Get the Location member data.
void SetExcept(TExcept value)
Assign a value to Except data member.
const TData & GetData(void) const
Get the Data member data.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
void SetData(TData &value)
Assign a value to Data data member.
void ResetComment(void)
Reset Comment data member.
void SetExp_ev(TExp_ev value)
Assign a value to Exp_ev data member.
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
const TComment & GetComment(void) const
Get the Comment member data.
void SetVal(const TVal &value)
Assign a value to Val data member.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
TPartial GetPartial(void) const
Get the Partial member data.
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
void ResetLocation(void)
Reset Location data member.
const TQual & GetQual(void) const
Get the Qual member data.
void ResetDbxref(void)
Reset Dbxref data member.
TQual & SetQual(void)
Assign a value to Qual data member.
bool IsSetVal(void) const
Check if a value has been assigned to Val data member.
void ResetQual(void)
Reset Qual data member.
const TImp & GetImp(void) const
Get the variant data.
void SetKey(const TKey &value)
Assign a value to Key data member.
@ eExp_ev_experimental
any reasonable experimental check
@ eExp_ev_not_experimental
similarity, pattern, etc
bool IsGenbank(void) const
Check if variant Genbank is selected.
TGeneral & SetGeneral(void)
Select the variant.
TPatent & SetPatent(void)
Select the variant.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsMix(void) const
Check if variant Mix is selected.
bool IsTpg(void) const
Check if variant Tpg is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
const TPnt & GetPnt(void) const
Get the variant data.
bool IsTpd(void) const
Check if variant Tpd is selected.
TPoint GetPoint(void) const
Get the Point member data.
bool IsOther(void) const
Check if variant Other is selected.
TFrom GetFrom(void) const
Get the From member data.
bool IsEquiv(void) const
Check if variant Equiv is selected.
bool IsPrf(void) const
Check if variant Prf is selected.
bool CanGetA(void) const
Check if it is safe to call GetA method.
bool IsEmbl(void) const
Check if variant Embl is selected.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSwissprot(void) const
Check if variant Swissprot is selected.
const Tdata & Get(void) const
Get the member data.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TPoints & GetPoints(void) const
Get the Points member data.
bool IsGpipe(void) const
Check if variant Gpipe is selected.
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsTpe(void) const
Check if variant Tpe is selected.
const TMix & GetMix(void) const
Get the variant data.
bool IsPir(void) const
Check if variant Pir is selected.
const TAccession & GetAccession(void) const
Get the Accession member data.
const TBond & GetBond(void) const
Get the variant data.
bool IsDdbj(void) const
Check if variant Ddbj is selected.
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_not_set
No variant selected.
@ e_Tpg
Third Party Annot/Seq Genbank.
@ e_Empty
to NULL one Seq-id in a collection
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
const TSource & GetSource(void) const
Get the variant data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TId & GetId(void) const
Get the Id member data.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
bool IsDelta(void) const
Check if variant Delta is selected.
void SetInst(TInst &value)
Assign a value to Inst data member.
TSource & SetSource(void)
Select the variant.
const TExt & GetExt(void) const
Get the Ext member data.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TDelta & GetDelta(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CDelta_seq > > Tdata
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_htc
high throughput cDNA
@ eTech_targeted
targeted locus sets/studies
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_survey
one-pass genomic sequence
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
@ eMol_na
just a nucleic acid
@ eType_proximity_ligation
char * dbname(DBPROCESS *dbproc)
Get name of current database.
int fta_if_wgs_acc(string_view accession)
Int2 CheckNA(const char *str)
CSeq_id::E_Choice GetNucAccOwner(string_view acc)
Int2 CheckNADDBJ(const char *str)
The blob sat and sat key Both must be positive integers</td > n< td > Non empty string The interpretation of the blob id depends on a processor Cassandra n processor expects the following format
static void fta_parse_rrna_feat(CSeq_feat &feat, CRNA_ref &rna_ref)
#define Seq_descr_GIBB_mol_tRNA
static bool fta_qual_a_in_b(const TQualVector &qual1, const TQualVector &qual2)
static const char * DbxrefTagStr[]
static bool fta_check_ncrna(const CSeq_feat &feat)
void GetFlatBiomol(CMolInfo::TBiomol &biomol, CMolInfo::TTech tech, char *molstr, ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
const char * MobileElementQualValues[]
static const char * EmptyQuals[]
static bool fta_feats_same(const FeatBlk *fbp1, const FeatBlk *fbp2)
static bool fta_check_evidence(CSeq_feat &feat, FeatBlkPtr fbp)
static void fta_convert_to_regulatory(FeatBlkPtr fbp, const char *rclass)
void fta_sort_biosource(CBioSource &bio)
static void CollectGapFeats(const DataBlk &entry, DataBlkCIter dbp, DataBlkCIter dbp_end, ParserPtr pp, Int2 type)
const char * ParFlat_ESTmod[]
static TDataBlkList XMLLoadFeatBlk(char *entry, const TXmlIndexList &xil)
#define Seq_descr_GIBB_mol_rRNA
#define Seq_descr_GIBB_mol_trRNA
const AaCodons aacodons[]
static void ParseQualifiers(FeatBlkPtr fbp, const char *bptr, const char *eptr, Parser::EFormat format)
unique_ptr< string > CheckLocStr(const Char *str)
static CRef< CTrna_ext > fta_get_trna_from_product(CSeq_feat &feat, const string &product, unsigned char *remove)
static void MergeNoteQual(TQualVector &quals)
static void XMLGetQuals(char *entry, const TXmlIndexList &xil, TQualVector &quals)
string location_to_string_or_unknown(const CSeq_loc &loc)
static bool PackSeqPntCheckCpp(const CSeq_loc &loc)
#define Seq_descr_GIBB_mol_other_genetic
static void fta_check_compare_qual(TDataBlkList &dbl, bool is_tpa)
StrNum LinkageEvidenceValues[]
static const char * EMBLDbxrefTagStr[]
static void fta_sort_quals(FeatBlkPtr fbp, bool qamode)
static void s_RemoveSourceDescriptors(CSeq_descr &descrs)
void xSplitLines(const string &str, vector< string > &lines)
static bool fta_check_rpt_unit_span(const char *val, size_t length)
int ParseFeatureBlock(IndexblkPtr ibp, bool deb, TDataBlkList &dbl, Parser::ESource source, Parser::EFormat format)
static void GetRnaRef(CSeq_feat &feat, CBioseq &bioseq, Parser::ESource source, bool accver)
static void FreeFeatBlk(TDataBlkList &dbl, Parser::EFormat format)
static Uint1 fta_get_aa_from_string(char *str)
static bool SortFeaturesByOrder(const DataBlk &sp1, const DataBlk &sp2)
#define Seq_descr_GIBB_mol_snRNA
#define Seq_descr_GIBB_mol_other
static void fta_get_gcode_from_biosource(const CBioSource &bio_src, IndexblkPtr ibp)
#define Seq_descr_GIBB_mol_genomic
static void FilterDb_xref(CSeq_feat &feat, Parser::ESource source)
static void fta_parse_rpt_units(FeatBlkPtr fbp)
Int2 SpFeatKeyNameValid(const Char *keystr)
static int get_aa_from_trna(const CTrna_ext &trna)
static bool SeqIntCheckCpp(const CSeq_loc &loc)
static const char * DbxrefObsolete[]
const char * ncRNA_class_values[]
static void fta_check_pseudogene_qual(TDataBlkList &dbl)
static void fta_check_satellite(string_view str, bool *drop)
static Int4 flat2asn_range_func(void *pp_ptr, const CSeq_id &id)
static bool fta_perform_operon_checks(TSeqFeatList &feats, IndexblkPtr ibp)
static void SeqFeatPub(ParserPtr pp, const DataBlk &entry, TSeqFeatList &feats, const CSeq_id &seqid, Int4 col_data, IndexblkPtr ibp)
static bool CheckLegalQual(string_view val, Char ch, string *qual)
static bool SeqPntCheckCpp(const CSeq_loc &loc)
static string_view key_or(const FeatBlk *fbp, string_view sv)
static void fta_create_wgs_seqid(CBioseq &bioseq, IndexblkPtr ibp, Parser::ESource source)
static void ConvertQualifierValue(CRef< CGb_qual > &qual)
static void fta_fake_gbparse_err_handler(string_view, string_view)
void LoadFeat(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
static void fta_check_replace_regulatory(TDataBlkList &dbl, bool *drop)
const char * RegulatoryClassValues[]
static void ImpFeatPub(ParserPtr pp, const DataBlk &entry, TSeqFeatList &feats, CSeq_id &seq_id, Int4 col_data, IndexblkPtr ibp)
#define Seq_descr_GIBB_mol_unknown
static CRef< CSeq_feat > ProcFeatBlk(ParserPtr pp, FeatBlkPtr fbp, const CSeq_id &seqid)
static CRef< CDbtag > DbxrefQualToDbtag(const CGb_qual &qual, Parser::ESource source)
static void fta_convert_to_lower_case(string &str)
#define Seq_descr_GIBB_mol_snoRNA
static bool fta_check_mobile_element(FeatBlkPtr fbp, Parser::ESource source, Parser::EFormat format)
static const char * DbxrefTagAny[]
static bool CheckForeignLoc(const CSeq_loc &loc, const CSeq_id &sid)
static int get_first_codon_from_trna(const CTrna_ext &trna)
#define Seq_descr_GIBB_mol_scRNA
static void GetImpFeat(CSeq_feat &feat, FeatBlkPtr fbp, bool locmap)
const char * PseudoGeneValues[]
static void fta_create_wgs_dbtag(CBioseq &bioseq, const string &submitter_seqid, char *prefix, Int4 seqtype)
static int XMLParseFeatureBlock(IndexblkPtr ibp, bool deb, TDataBlkList &dbl, Parser::ESource source)
static void fta_check_artificial_location(CSeq_feat &feat, const string &key)
#define Seq_descr_GIBB_mol_preRNA
static CRef< CTrna_ext > fta_get_trna_from_comment(const string &comment, unsigned char *remove)
static const char * trna_tags[]
static void fta_check_non_tpa_tsa_tls_locations(TDataBlkList &dbl, IndexblkPtr ibp)
static bool SortFeaturesByLoc(const DataBlk &sp1, const DataBlk &sp2)
static void fta_check_multiple_locus_tag(TDataBlkList &dbl, bool *drop)
CRef< CSeq_feat > SpProcFeatBlk(ParserPtr pp, FeatBlkPtr fbp, const CSeq_id &seqid)
const char * SatelliteValues[]
static Uint1 FTASeqLocCheck(const CSeq_loc &locs, char *accession)
static void fta_remove_dup_quals(FeatBlkPtr fbp)
static void XMLCheckQualifiers(FeatBlkPtr fbp, Parser::ESource source)
static CRef< CSeq_loc > GetTrnaAnticodon(const CSeq_feat &feat, char *qval, const TSeqIdList &seqids, bool accver)
bool GetSeqLocation(CSeq_feat &feat, string_view location, const CSeq_id &seqid, bool *hard_err, ParserPtr pp, string_view name)
static const char * DbxrefTagInt[]
static void fta_strip_aa(string &str)
static CMolInfo::EBiomol GetBiomolFromToks(char *mRNA, char *tRNA, char *rRNA, char *snRNA, char *scRNA, char *uRNA, char *snoRNA)
const char * TransSplicingFeats[]
static void fta_check_old_locus_tags(TDataBlkList &dbl, bool *drop)
static string_view location_or(const FeatBlk *fbp, string_view sv)
static const char * ParFlat_RNA_array[]
#define Seq_descr_GIBB_mol_cRNA
static void DelCharBtwData(string &value)
static Uint1 fta_get_aa_from_symbol(Char ch)
static void fta_check_rpt_unit_range(FeatBlkPtr fbp, size_t length)
#define Seq_descr_GIBB_mol_mRNA
static void fta_remove_dup_feats(TDataBlkList &dbl)
static void fta_process_cons_splice(string &val_str)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
std::list< SeqLoc > TSeqLocList
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
CRef< CPubdesc > DescrRefs(ParserPtr pp, DataBlk &dbp, Uint2 col_data)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CRef< objects::CSeq_entry > seq_entry
string LocationStr() const
optional< string > location
vector< IndexblkPtr > entrylist
bool DeleteQual(TQualVector &qlist, const Char *qual)
string location_to_string(const CSeq_loc &loc)
string CpTheQualValue(const TQualVector &qlist, const Char *qual)
bool SeqLocHaveFuzz(const CSeq_loc &loc)
optional< string > GetTheQualValue(TQualVector &qlist, const Char *qual)
Uint1 GetQualValueAa(string_view qval, bool checkseq)
bool SrchNodeType(const DataBlk &entry, Int4 type, size_t *plen, char **pptr)
TDataBlkList & TrackNodes(const DataBlk &entry)
char * SrchTheStr(string_view sv, string_view leadstr)
void fta_StringCpy(char *dst, const char *src)
Int2 MatchArrayString(const char **array, string_view text)
DataBlk * TrackNodeType(const DataBlk &entry, Int2 type)
Int2 MatchArrayIString(const Char **array, string_view text)
Char * StringIStr(const Char *where, const Char *what)
int XGBFeatKeyQualValid(CSeqFeatData::ESubtype subtype, TQualVector &quals, bool error_msgs, bool perform_corrections)
std::vector< CRef< objects::CGb_qual > > TQualVector
#define GB_FEAT_ERR_REPAIRABLE
void xinstall_gbparse_error_handler(X_gbparse_errfunc new_func)
void xinstall_gbparse_range_func(void *data, X_gbparse_rangefunc new_func)
CRef< CSeq_loc > xgbparseint_ver(string_view raw_intervals, bool &keep_rawPt, int &numErrors, const TSeqIdList &seq_ids, bool accver)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4