m_CommentInternalIndent(0),
91m_NeedPeriod(need_period)
103(
const string& comment,
107m_CommentInternalIndent(0),
112 if(!
ctx.Config().IsFormatGBSeq() && !
ctx.Config().IsFormatINSDSeq()) {
124m_CommentInternalIndent(0),
139m_CommentInternalIndent(0),
156m_CommentInternalIndent(0),
182 if( ends_with_ellipsis ) {
197 const string& next_comment_first_string = next_comment.
m_Comment.front();
198 boolnext_comment_starts_with_empty_line =
false;
199 ITERATE(
string, next_com_line_it, next_comment_first_string ) {
200 const charch = *next_com_line_it;
202next_comment_starts_with_empty_line =
true;
209 if( ! next_comment_starts_with_empty_line ) {
216 string& last_str_of_comment =
m_Comment.back();
217 if( last_str_of_comment.empty() ) {
221string::size_type pos = (last_str_of_comment.length() - 1);
222 if( last_str_of_comment[pos] ==
'\n') {
227 for( ; pos < last_str_of_comment.length(); --pos ) {
228 const charch = last_str_of_comment[pos];
232last_str_of_comment.erase(pos);
254 static const stringkNsAreGaps =
"The strings of n's in this record represent "\
255 "gaps between contigs, and the length of each string corresponds "\
256 "to the length of the gap.";
277seglist.push_back(
TAln(&aln) );
296 static const stringtpa_string =
297 "THIRD PARTY DATABASE: This TPA record uses data from DDBJ/EMBL/GenBank ";
299 if( !
ctx.IsTPA() ||
ctx.IsRefSeq() ) {
313vector<string> histaccns;
324 if(other_id->
IsGi()) {
333 if(other_id->
IsGi()) {
344 if( !tid.empty() ) {
348 if( histaccns.empty() ) {
352 sort( histaccns.begin(), histaccns.end() );
353histaccns.erase( unique( histaccns.begin(), histaccns.end() ), histaccns.end() );
356 text<< tpa_string << ((histaccns.size() > 1) ?
"entries ":
"entry ");
358 size_t size= histaccns.size();
361 for(
size_t i= 0;
i<
size; ) {
362 text<< histaccns[
i];
365 text<< ((
i==
last) ?
" and ":
", ");
373vector<string> accessions;
381 if( !(*ufi)->CanGetData() || !(*ufi)->GetData().IsStr() ||
382!(*ufi)->CanGetLabel() ) {
386 if( oid.
IsStr() &&
388 stringacc = (*ufi)->GetData().
GetStr();
389 if( !acc.empty() ) {
395 if( accessions.empty() ) {
400 text<< tpa_string << ((accessions.size() > 1) ?
"entries ":
"entry ");
402 size_t size= accessions.size();
405 for(
size_t i= 0;
i<
size; ) {
406 text<< accessions[
i];
409 text<< ((
i==
last) ?
" and ":
", ");
424 const string*uvc =
nullptr, *bic =
nullptr, *smc =
nullptr;
426 if( uo.
HasField(
"UniVecComment") ) {
432 if( uo.
HasField(
"AdditionalComment") ) {
438 if( uo.
HasField(
"SmartComment") && dump_mode ) {
448 text<< pfx <<
"Vector Explanation: "<< *uvc;
452 text<< pfx <<
"Bankit Comment: "<< *bic;
456 text<< pfx <<
"Bankit Comment: "<< *smc;
470vector<string> assembly_pieces;
472 if( uo.
HasField(
"Assembly") ) {
481 if( !(*fit)->GetData().IsFields() ) {
495(*fit)->GetData().GetFields())
504 if(
label==
"accession") {
506}
else if(
label==
"name") {
510 if(
label==
"gi") {
515}
else if(
label==
"from") {
517}
else if(
label==
"to") {
523 if( ! accession.empty() ) {
538 ctx.Config().GetHTMLFormatter().FormatGeneralId(oss, accession);
543 if( from > 0 && to > 0 ) {
544oss <<
" (range: "<< from <<
"-"<< to <<
")";
547 stringnew_piece = oss.str();
548assembly_pieces.push_back( new_piece );
549}
else if( ! name.empty() ) {
550assembly_pieces.push_back( name );
555 if( ! assembly_pieces.empty() ) {
557oss <<
" The reference sequence was derived from ";
559 size_tassembly_size = assembly_pieces.size();
560 for(
size_tii = 0; ii < assembly_size; ++ii ) {
562oss << ((ii < assembly_size - 1) ?
", ":
" and ");
564oss << assembly_pieces[ii];
620 boolis_html =
ctx.Config().DoHTML();
635 if( uo.
HasField(
"Collaborator") ) {
643 if( uo.
HasField(
"GenomicSource") ) {
650 stringidentical_to_start;
651 stringidentical_to_end;
655 enumEIdenticalToPriority {
656eIdenticalToPriority_Nothing = 1,
657eIdenticalToPriority_Gi,
658eIdenticalToPriority_Name,
659eIdenticalToPriority_Accn
661 intidentical_to_priority = eIdenticalToPriority_Nothing;
663 if(uo.
HasField(
"IdenticalTo")) {
666 if( !(*it)->GetData().IsFields() ) {
677 if(sub.
GetLabel().
GetStr() ==
"accession"&& identical_to_priority <= eIdenticalToPriority_Accn ) {
679identical_to_priority = eIdenticalToPriority_Accn;
681 if(sub.
GetLabel().
GetStr() ==
"name"&& identical_to_priority <= eIdenticalToPriority_Name ) {
683identical_to_priority = eIdenticalToPriority_Name;
685 if(sub.
GetLabel().
GetStr() ==
"gi"&& identical_to_priority <= eIdenticalToPriority_Gi ) {
686identical_to =
"gi:"+
688identical_to_priority = eIdenticalToPriority_Gi;
700oss << status_str <<
' ' 705oss <<
" This record is predicted by genome sequence analysis and is " 706<<
"not yet supported by experimental evidence.";
710 if( !build_num.empty() ) {
711oss <<
" Features on this sequence have been produced for build " 712<< build_num <<
" of the NCBI's genome annotation" 717oss <<
"documentation";
723oss <<
" NCBI contigs are derived from assembled genomic sequence data.~" 725<<
" Documentation of NCBI's Annotation Process ";
730 if(collaborator.empty()) {
731oss <<
" This record has not yet been subject to final NCBI review.";
733oss <<
" This record is based on preliminary " 734 "annotation provided by "<< collaborator <<
'.';
738oss <<
" This record has not been reviewed and the function is unknown.";
741oss <<
" This record has undergone validation or preliminary review.";
744oss <<
" This record has been curated by " 745<< (collaborator.empty() ?
"NCBI staff": collaborator) <<
'.';
748oss <<
" This record is predicted by automated computational analysis.";
751oss <<
" This record is provided to represent a collection of " 752<<
"whole genome shotgun sequences.";
755oss <<
" This record is provided to represent a collection of " 756<<
"transcriptome shotgun assembly sequences.";
764!collaborator.empty() ) {
765oss <<
" This record has been curated by "<< collaborator <<
'.';
768 if( !
source.empty() ) {
769oss <<
" This record is derived from an annotated genomic sequence (" 773 if( !identical_to.empty() ) {
774oss <<
" The reference sequence is identical to ";
775 const booladd_link = (is_html && identical_to_priority != eIdenticalToPriority_Name);
777 ctx.Config().GetHTMLFormatter().FormatGeneralId(oss, identical_to);
783 if( ! identical_to_start.empty() && ! identical_to_end.empty() ) {
784oss <<
" (range: "<< identical_to_start <<
"-"<<
785identical_to_end <<
")";
797 const static stringkRefSeqGeneLink =
"<a href=\"https://www.ncbi.nlm.nih.gov/refseq/rsg/\">RefSeqGene</a>";
798 const static stringkRefSeqGene =
"RefSeqGene";
802desc_it; ++desc_it) {
807 if(
f&&
f->GetData().IsStr()) {
808 const string& status1 =
f->GetData().GetStr();
809 if(status1 ==
"Reference Standard") {
810oss <<
"~This sequence is a reference standard in the " 811<< (is_html ? kRefSeqGeneLink : kRefSeqGene)
831ostringstream result_oss;
833 const static stringkRefSeqCat =
"RefSeq Category";
836result_oss << kRefSeqCat <<
": ";
838 if( pCategoryField &&
841 const string& sCategory = pCategoryField->
GetData().
GetStr();
842result_oss << sCategory <<
'\n';
844result_oss <<
"(?UNKNOWN?)"<<
'\n';
851 if( pDetailsField ) {
855 const static char* arrFieldNames[] = {
856 "CALC",
"CCA",
"CLI",
"COM",
"FGS",
"MOD",
"PHY",
"PRT",
"QfO",
"TYS",
"UPR" 860 const CTempStringsFieldName( arrFieldNames[field_idx] );
862field_name += sFieldName;
865mapFieldNameToRef.
find(field_name);
866 if( find_iter == mapFieldNameToRef.
end() ) {
877 if( sFieldName.
length() < kRefSeqCat.length() ) {
879(kRefSeqCat.length() - sFieldName.
length()),
' ');
882result_oss << sFieldName <<
": " 883<< find_iter->second->GetData().GetStr() <<
'\n';
887 returnresult_oss.str();
894 static const stringdefault_str =
"?";
896 if(!
ctx.IsWGSMaster()) {
900 const string& wgsaccn =
ctx.GetWGSMasterAccn();
901 const string& wgsname =
ctx.GetWGSMasterName();
907 const string* taxname = &default_str;
916 const string*
first= &default_str, *
last= &default_str;
921 if(uo.
HasField(
"WGS_accession_first")) {
928 if(uo.
HasField(
"WGS_accession_last")) {
946 text<<
"The "<< *taxname
947<<
" whole genome shotgun (WGS) project has the project accession " 948<< wgsaccn <<
". This version of the project ("<<
version 949<<
") has the accession number "<< wgsname <<
",";
951 text<<
" and consists of sequences "<< *
first<<
"-"<< *
last<<
".";
953 text<<
" and consists of sequence "<< *
first<<
".";
961 static const stringdefault_str =
"?";
963 if(!
ctx.IsTSAMaster()) {
967 const string& tsaaccn =
ctx.GetTSAMasterAccn();
968 const string& tsaname =
ctx.GetTSAMasterName();
974 const string* taxname = &default_str;
983 const string*
first= &default_str, *
last= &default_str;
990 if(uo.
HasField(
"Accession_first")) {
996}
else if(uo.
HasField(
"TSA_accession_first")) {
1003 if(uo.
HasField(
"Accession_last")) {
1009}
else if(uo.
HasField(
"TSA_accession_last")) {
1022ostringstream
text;
1023 text<<
"The "<< *taxname
1024<<
" transcriptome shotgun assembly (TSA) project has the project accession " 1025<< tsaaccn <<
". This version of the project ("<<
version 1026<<
") has the accession number "<< tsaname <<
",";
1028 text<<
" and consists of sequences "<< *
first<<
"-"<< *
last<<
".";
1030 text<<
" and consists of sequence "<< *
first<<
".";
1038 static const stringdefault_str =
"?";
1040 if(!
ctx.IsTLSMaster()) {
1044 const string& tlsaccn =
ctx.GetTLSMasterAccn();
1045 const string& tlsname =
ctx.GetTLSMasterName();
1051 const string* taxname = &default_str;
1060 const string*
first= &default_str, *
last= &default_str;
1066 if(uo.
HasField(
"TLS_accession_first")) {
1073 if(uo.
HasField(
"TLS_accession_last")) {
1086ostringstream
text;
1087 text<<
"The "<< *taxname
1088<<
" targeted locus study (TLS) project has the project accession " 1089<< tlsaccn <<
". This version of the project ("<<
version 1090<<
") has the accession number "<< tlsname <<
",";
1092 text<<
" and consists of sequences "<< *
first<<
"-"<< *
last<<
".";
1094 text<<
" and consists of sequence "<< *
first<<
".";
1104 boolis_prot =
ctx.IsProt();
1108 return "COMPLETENESS: full length";
1111 return "COMPLETENESS: not full length";
1114 return(is_prot ?
"COMPLETENESS: incomplete on the amino end":
1115 "COMPLETENESS: incomplete on the 5' end");
1118 return(is_prot ?
"COMPLETENESS: incomplete on the carboxy end":
1119 "COMPLETENESS: incomplete on the 3' end");
1122 return "COMPLETENESS: incomplete on both ends";
1125 return(is_prot ?
"COMPLETENESS: complete on the amino end":
1126 "COMPLETENESS: complete on the 5' end");
1129 return(is_prot ?
"COMPLETENESS: complete on the carboxy end":
1130 "COMPLETENESS: complete on the 3' end");
1133 return "COMPLETENESS: unknown";
1143 if(
ctx.IsDelta()) {
1147ostringstream
text;
1149 text<<
"* NOTE: This is a partial genome representation.";
1151 text<<
" It currently~* consists of "<< (summary.
num_gaps+ 1) <<
" contigs. The true order of the pieces~" 1152<<
"* is not known and their order in this sequence record is~" 1153<<
"* arbitrary. Gaps between the contigs are represented as~" 1154<<
"* runs of N, but the exact sizes of the gaps are unknown.";
1158 stringcomment =
text.str();
1169 if(
ctx.IsDelta()) {
1175ostringstream
text;
1179 text<<
"* NOTE: This record contains "<< (summary.
num_gaps+ 1) <<
" individual~" 1180<<
"* sequencing reads that have not been assembled into~" 1181<<
"* contigs. Runs of N are used to separate the reads~" 1182<<
"* and the order in which they appear is completely~" 1183<<
"* arbitrary. Low-pass sequence sampling is useful for~" 1184<<
"* identifying clones that may be gene-rich and allows~" 1185<<
"* overlap relationships among clones to be deduced.~" 1186<<
"* However, it should not be assumed that this clone~" 1187<<
"* will be sequenced to completion. In the event that~" 1188<<
"* the record is updated, the accession number will~" 1189<<
"* be preserved.";
1194 text<<
"* NOTE: This is a \"working draft\" sequence.";
1196 text<<
" It currently~" 1197<<
"* consists of "<< (summary.
num_gaps+ 1) <<
" contigs. The true order of the pieces~" 1198<<
"* is not known and their order in this sequence record is~" 1199<<
"* arbitrary. Gaps between the contigs are represented as~" 1200<<
"* runs of N, but the exact sizes of the gaps are unknown.";
1202 text<<
"~* This record will be updated with the finished sequence~" 1203<<
"* as soon as it is available and the accession number will~" 1204<<
"* be preserved." 1208 text<<
"* NOTE: This is a \"working draft\" sequence.";
1210 text<<
" It currently~* consists of "<< (summary.
num_gaps+ 1)
1211<<
" contigs. Gaps between the contigs~" 1212<<
"* are represented as runs of N. The order of the pieces~" 1213<<
"* is believed to be correct as given, however the sizes~" 1214<<
"* of the gaps between them are based on estimates that have~" 1215<<
"* provided by the submitter.";
1217 text<<
"~* This sequence will be replaced~" 1218<<
"* by the finished sequence as soon as it is available and~" 1219<<
"* the accession number will be preserved." 1226 stringcomment =
text.str();
1235 const boolbHtml =
ctx.Config().DoHTML();
1239ostringstream
text;
1242 ctx.Config().GetHTMLFormatter().FormatModelEvidence(me_name, me);
1244 text<<
"MODEL "<< *refseq <<
": "<<
"This record is predicted by " 1245<<
"automated computational analysis. This record is derived from " 1246<<
"a genomic sequence ("<< me_name <<
")";
1250 text<<
" and transcript sequence";
1259 ctx.Config().GetHTMLFormatter().FormatTranscript(tr_name, *
str);
1260 text<< prefix << tr_name;
1262 if(num_assm ==
count+ 1) {
1271 if( !me.
method.empty() ) {
1272 text<<
" annotated using gene prediction method: "<< me.
method;
1276 text<<
", supported by ";
1278 text<<
"mRNA and EST ";
1279}
else if( me.
mrnaEv) {
1285 text<<
"evidence";
1288 const char*documentation_str = ( bHtml ?
1289 "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/\">Documentation</a>":
1292 text<<
".~Also see:~" 1293<<
" "<< documentation_str <<
" of NCBI's Annotation Process ";
1299(
string& chromosome,
1300 string& assembly_date,
1301 string& ncbi_annotation,
1307 if(uo.
HasField(
"AssemblyDate")) {
1315 if(uo.
HasField(
"NcbiAnnotation")) {
1324 const string* name =
nullptr;
1329name = &(*st)->GetName();
1344assembly_date =
"?";
1347ncbi_annotation =
"?";
1355 const static stringkEncodeProjLink =
"https://www.nhgri.nih.gov/10005107";
1357 const boolbHtml =
ctx.Config().DoHTML();
1359 if(!
ctx.IsEncode()) {
1364 str<<
"REFSEQ: This record was provided by the ";
1366 str<<
"<a href=\""<< kEncodeProjLink <<
"\">";
1372 str<<
" project.";
1374 stringchromosome, assembly_date, ncbi_annotation;
1376 str<<
" It is defined by coordinates on the sequence of chromosome " 1377<< chromosome <<
" from the "<< assembly_date
1378<<
" assembly of the human genome (NCBI build "<< ncbi_annotation
1387 const boolbHtml =
ctx.Config().DoHTML();
1389 const string& sAuthorizedAccess =
ctx.GetAuthorizedAccess();
1390 if( sAuthorizedAccess.empty() ) {
1396 str<<
"These data are available through the dbGaP authorized access system. ";
1399<<
"https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?adddataset=" 1400<< sAuthorizedAccess <<
"&page=login\">";
1401 str<<
"Request access";
1403 str<<
" to Study ";
1405<<
"https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=" 1406<< sAuthorizedAccess <<
"\">";
1407 str<< sAuthorizedAccess;
1410 str<<
"Request access to Study ";
1411 str<< sAuthorizedAccess;
1420 const boolbHtml =
ctx.Config().DoHTML();
1423 if( ! pOpticalMapPoints ||
1429 const string& sFiletrackURL =
ctx.GetFiletrackURL();
1431 const boolbIsCircular =
FIELD_EQUALS(
ctx.GetHandle(), Inst_Topology,
1441 _ASSERT( ! vecOfPoints.empty() );
1444 if( bHtml && ! sFiletrackURL.empty() ) {
1445 str<<
"<a href=\""<< sFiletrackURL <<
"\">";
1448 if( bHtml && ! sFiletrackURL.empty() ) {
1453 size_tuNumFrags = pOpticalMapPoints->
GetPoints().size();
1454 if( ! bIsCircular )
1459 if(uNumFrags > 1 && vecOfPoints[uNumFrags-1] < uBioseqLength - 1) {
1464 str<<
" piece"<< ( (uNumFrags > 1) ?
"s":
"") <<
":";
1468 TSeqPosthisEndPos = vecOfPoints[0] + 1;
1471 if( ! bIsCircular ) {
1473 str, prevEndPos, thisEndPos, uBioseqLength,
1476prevEndPos = thisEndPos + 1;
1479 for(
size_tidx = 1; idx < vecOfPoints.size(); ++idx ) {
1480thisEndPos = vecOfPoints[idx] + 1;
1482 str, prevEndPos, thisEndPos, uBioseqLength,
1484prevEndPos = thisEndPos + 1;
1489thisEndPos = ( bIsCircular ? vecOfPoints[0] + 1 : uBioseqLength );
1490 if( bIsCircular || prevEndPos < uBioseqLength - 1 ) {
1492 str, prevEndPos, thisEndPos, uBioseqLength,
1503 const boolbHtml =
ctx.Config().DoHTML();
1505 constvector< string > & sBasemodURLs =
ctx.GetBasemodURLs();
1506 intnumBases = (
int) sBasemodURLs.size();
1510 if( numBases < 1 ) {
1514 if( numBases == 1 ) {
1515 str<<
"This genome has a ";
1519 if( ! url.empty() ) {
1521 str<<
"<a href=\""<< url <<
"\">"<<
"base modification file"<<
"</a>";
1525 str<<
"base modification file";
1527 str<<
" available.";
1529 str<<
"There are ";
1531 str<<
" base modification files";
1538 if( ! url.empty() ) {
1541 str<< pfx <<
"<a href=\""<< url <<
"\">"<< j <<
"</a>";
1542 if( numBases == 2 ) {
1544}
else if( j == numBases - 1 ) {
1554 str<<
" available for this genome.";
1562 if( !
ctx.IsRSUniqueProt() ) {
1571 str<<
"REFSEQ: This record represents a single, non-redundant, protein " 1572<<
"sequence which may be annotated on many different RefSeq " 1573<<
"genomes from the same, or different, species.";
1608 const char* provider,
const char* pipeline,
const char* status,
boolhas_name,
const char* organism,
1609 const char*
source,
const char* category,
const char* accession )
1616 if( label_str ==
"GOLD Stamp ID"&&
NStr::StartsWith(data_str,
"Gi") ) {
1617 result<<
"<a href=\"http://genomesonline.org/cgi-bin/GOLD/bin/GOLDCards.cgi?goldstamp="<< data_str
1618<<
"\">"<< data_str <<
"</a>";
1621 if( label_str ==
"Annotation Software Version") {
1622 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/release_notes/#version" 1624<<
"\">"<< data_str <<
"</a>";
1626}
else if(
NStr::Equal(label_str,
"Annotation Name") &&
1632 if(
NStr::Find(data_str,
"Updated Annotation Release") !=
NPOS) {
1633 NStr::Replace( data_str,
" Updated Annotation Release ",
"/", fst );
1635 NStr::Replace( data_str,
" Annotation Release ",
"/", fst );
1639 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/" 1641<<
"\">"<< data_str <<
"</a>";
1643 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/" 1646<<
"\">"<< data_str <<
"</a>";
1652 NStr::Replace( data_str,
" Annotation Release ",
"/", fst );
1654 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/" 1656<<
"\">"<< data_str <<
"</a>";
1659 stringaccn = data_str;
1664 result<<
"<a href=\"https://www.ebi.ac.uk/interpro/entry/pfam/" 1666<<
"\">"<< data_str <<
"</a>";
1671 NStr::Replace( data_str,
"Domain architecture ID ",
"", fst );
1673 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/Structure/sparcle/archview.html?archid=" 1675<<
"\">"<< data_str <<
"</a>";
1677}
else if(
NStr::Equal(label_str,
"Evidence Category") &&
1678 NStr::Equal(data_str,
"Antimicrobial Resistance Allele") &&
1679 NStr::Equal(
source,
"Bacterial Antimicrobial Resistance Reference Gene Database") ) {
1680 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/bioproject/" 1682<<
"\">"<< data_str <<
"</a>";
1684}
else if(
NStr::Equal(label_str,
"Evidence Accession") &&
1685 NStr::Equal(
source,
"Bacterial Antimicrobial Resistance Reference Gene Database") ) {
1686 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/nuccore/" 1688<<
"\">"<< data_str <<
"</a>";
1691 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_prok/evidence/" 1693<<
"\">"<< data_str <<
"</a>";
1696 result<<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_prok/evidence/" 1698<<
"\">"<< data_str <<
"</a>";
1712list<string> &out_lines,
1713 int&out_prefix_len,
1714 const boolis_first,
1715 const boolis_html )
1717 static const intkFieldLenThreshold = 45;
1720 const char* prefix =
"##Metadata-START##";
1721 const char* suffix =
"##Metadata-END##";
1722 const char* provider =
"";
1723 const char* pipeline =
"";
1724 const char* status =
"";
1725 const char*
source=
"";
1726 const char* category =
"";
1727 const char* organism =
"";
1729 boolhas_name =
false;
1731 boolfieldOverThreshold =
false;
1735string::size_type longest_label_len = 1;
1737 if( (*it_for_len)->GetLabel().IsStr() &&
1738(*it_for_len)->GetData().IsStr() && ! (*it_for_len)->GetData().GetStr().empty() ) {
1739 const string&
label= (*it_for_len)->GetLabel().GetStr();
1741 if(
label==
"StructuredCommentPrefix") {
1742prefix = (*it_for_len)->GetData().GetStr().c_str();
1743}
else if(
label==
"StructuredCommentSuffix") {
1744suffix = (*it_for_len)->GetData().GetStr().c_str();
1746 if(
label==
"Annotation Provider") {
1747provider = (*it_for_len)->GetData().GetStr().c_str();
1748}
else if(
label==
"Annotation Pipeline") {
1749pipeline = (*it_for_len)->GetData().GetStr().c_str();
1750}
else if(
label==
"Annotation Status") {
1751status = (*it_for_len)->GetData().GetStr().c_str();
1752}
else if(
label==
"Annotation Name") {
1754}
else if(
label==
"URL Organism") {
1755organism = (*it_for_len)->GetData().GetStr().c_str();
1756}
else if(
NStr::EqualNocase(prefix,
"##Evidence-For-Name-Assignment-START##")) {
1757 if(
label==
"Evidence Source") {
1758 source= (*it_for_len)->GetData().GetStr().c_str();
1760 if(
label==
"Evidence Category") {
1761category = (*it_for_len)->GetData().GetStr().c_str();
1763 if(
label==
"Evidence Accession") {
1764 stringaccn = (*it_for_len)->GetData().GetStr();
1769 conststring::size_type label_len =
label.length();
1770 if( (label_len > longest_label_len) && (label_len <= kFieldLenThreshold) ) {
1771longest_label_len = label_len;
1773 if( label_len > kFieldLenThreshold ) {
1774fieldOverThreshold =
true;
1779out_prefix_len = (longest_label_len + 4);
1786out_lines.push_back( prefix );
1787out_lines.back().append(
"\n");
1792 if( ! (*it)->GetLabel().IsStr() || (*it)->GetLabel().GetStr().empty() ) {
1797 if( ! (*it)->GetData().IsStr() || (*it)->GetData().GetStr().empty() ) {
1802 if( (*it)->GetLabel().GetStr() ==
"StructuredCommentPrefix"||
1803(*it)->GetLabel().GetStr() ==
"StructuredCommentSuffix"||
1804(*it)->GetLabel().GetStr() ==
"Annotation Freeze"||
1805(*it)->GetLabel().GetStr() ==
"URL Organism") {
1810out_lines.push_back( (*it)->GetLabel().GetStr() );
1811 string&next_line = out_lines.back();
1817 if( ! fieldOverThreshold ) {
1818next_line.resize(
max( next_line.size(), longest_label_len),
' ');
1820next_line.append(
" :: ");
1822provider, pipeline, status, has_name, organism,
source, category, accession.c_str() ) );
1823next_line.append(
"\n");
1828out_lines.push_back( suffix );
1829out_lines.back().append(
"\n");
1837 stringprefix,
str, suffix;
1838 switch( desc.
Which() ) {
1857 if( oid.
IsStr() ) {
1858prefix =
"Map location: ";
1862prefix =
"Map location: (Database ";
1872prefix =
"Region: ";
1893 if(
type.IsStr() &&
type.GetStr() ==
"StructuredComment") {
1907 if(
str.empty() ||
str==
".") {
1929 if(
type.IsStr() &&
type.GetStr() ==
"StructuredComment") {
1956 if(!
ctx.Config().IsFormatGBSeq() && !
ctx.Config().IsFormatINSDSeq()) {
1963(
const string& prefix,
1964 const string&
str,
1965 const string& suffix,
1970 stringcomment = prefix;
1974 if(!
ctx.Config().IsFormatGBSeq() && !
ctx.Config().IsFormatINSDSeq()) {
1982 size_tpos = comment.find_last_not_of(
" \n\t\r.~");
1983 if(pos != comment.length() - 1) {
1984 size_tperiod = comment.find_last_of(
'.');
1985 booladd_period = period > pos;
2015<< setw(7) << (prevEndPos)
2017<< setw(7) << (thisEndPos)
2018<<
": fragment of ";
2020 boolbLengthIsOkay =
true;
2022(thisEndPos <= prevEndPos) )
2024bLengthIsOkay =
false;
2026(thisEndPos >= prevEndPos) )
2028bLengthIsOkay =
false;
2031 if( ! bLengthIsOkay ) {
2032 str<<
"(ERROR: CANNOT CALCULATE LENGTH)";
2033}
else if( (thisEndPos > uBioseqLength) ||
2034(prevEndPos > uBioseqLength) )
2036 str<<
"(ERROR: FRAGMENT IS OUTSIDE BIOSEQ BOUNDS)";
2039 str<< (thisEndPos - prevEndPos + 1);
2041 str<< (uBioseqLength + thisEndPos - prevEndPos + 1);
2044 str<<
" bp in length";
2055 const string& build_num) :
2066 if( uo.
HasField(
"NcbiAnnotation") ) {
2074 if( uo.
HasField(
"NcbiVersion") ) {
2078build_num +=
" version ";
2084}
else if( uo.
HasField(
"Annotation") ) {
2088 static const stringprefix =
"NCBI build ";
2114 const boolbHtml =
ctx.Config().DoHTML();
2118ostringstream
text;
2120 text<<
"GENOME ANNOTATION "<< *refseq <<
": ";
2122 text<<
"Features on this sequence have been produced for build " 2128 text<<
"documentation";
2134 text<<
"NCBI contigs are derived from assembled genomic sequence data." 2136<<
" Documentation of NCBI's Annotation Process ";
2141desc_it; ++desc_it) {
2154 strings =
text.str();
2173(
const string& prefix,
2174 const string& suffix,
2184hist.
GetDate().
GetDate(&date,
"%{%3N%|???%} %{%D%|??%}, %{%4Y%|????%}");
2189 if( (*id)->IsGi() ) {
2190gis.push_back((*id)->GetGi());
2194ostringstream
text;
2196 text<< prefix << ((gis.size() > 1) ?
" or before ":
" ") << date
2199 if( gis.empty() ) {
2221 text<<
'.'<<
'\n';
2232 if(
ctx.IsWGSMaster() ||
ctx.IsTSAMaster() ) {
2235 "this project was updated. The new version is",
2241 "this sequence was replaced by",
2249 "this sequence version replaced",
2294 if(! desc.
IsUser())
continue;
2298 if(! oi.
IsStr())
continue;
2323 if( orig_id.length() < 1000 ) {
2324 msg<<
"LocalID: "<< orig_id;
2326 msg<<
"LocalID string too large";
2337 msg<<
"LocalID string too large";
2369 msg<<
"FileID string too large";
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
CBioseqContext * GetContext(void)
void x_SetObject(const CSerialObject &obj)
const CSerialObject * GetObject(void) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
namespace ncbi::objects::
Base class for all serializable objects.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
@ fFieldMapFlags_ExcludeThis
= 0x1 (excludes this CUser_field's name and mapping to self from results)
void GetFieldsMap(CUser_field::TMapFieldNameToRef &out_mapFieldNameToRef, TFieldMapFlags fFieldMapFlags=0, const SFieldNameChain &parent_name=SFieldNameChain()) const
Recursively get the map of field names like the input for GetFieldRef to the user-field.
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
virtual void FormatComment(const CCommentItem &comment, IFlatTextOStream &text_os)=0
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
Utility macros and typedefs for exploring NCBI objects from general.asn.
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
#define ITERATE_0_IDX(idx, up_to)
idx loops from 0 (inclusive) to up_to (exclusive)
unsigned int TSeqPos
Type for sequence locations and lengths.
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CConstRef< CSeq_id > GetSeqId(void) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
string GetAccessionForGi(TGi gi, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession for a given GI.
@ eWithAccessionVersion
accession.version (when possible)
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
const TInst_Hist & GetInst_Hist(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsSetInst_Hist(void) const
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
size_type length(void) const
Return the length of the represented array.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string & ToUpper(string &str)
Convert string to upper case â string& version.
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
list< CRef< CSubSource > > TSubtype
const TOrg & GetOrg(void) const
Get the Org member data.
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
bool CanGetType(void) const
Check if it is safe to call GetType method.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsId(void) const
Check if variant Id is selected.
const TData & GetData(void) const
Get the Data member data.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
bool CanGetData(void) const
Check if it is safe to call GetData method.
bool CanGetTag(void) const
Check if it is safe to call GetTag method.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
const TFields & GetFields(void) const
Get the variant data.
const TDb & GetDb(void) const
Get the Db member data.
vector< CRef< CUser_field > > TFields
E_Choice Which(void) const
Which variant is currently selected.
bool IsFields(void) const
Check if variant Fields is selected.
bool IsInt(void) const
Check if variant Int is selected.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
TInt GetInt(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
bool CanGetLabel(void) const
Check if it is safe to call GetLabel method.
vector< CRef< CUser_field > > TData
TId GetId(void) const
Get the variant data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
bool IsDisc(void) const
Check if variant Disc is selected.
const TDisc & GetDisc(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
const TData & GetData(void) const
Get the Data member data.
const TComment & GetComment(void) const
Get the Comment member data.
bool IsComment(void) const
Check if variant Comment is selected.
bool CanGetComment(void) const
Check if it is safe to call GetComment method.
bool IsGeneral(void) const
Check if variant General is selected.
vector< TSeqPos > TPoints
const TPoints & GetPoints(void) const
Get the Points member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
const TUser & GetUser(void) const
Get the variant data.
bool IsSetAssembly(void) const
how was this assembled? Check if a value has been assigned to Assembly data member.
const TMaploc & GetMaploc(void) const
Get the variant data.
const TAssembly & GetAssembly(void) const
Get the Assembly member data.
list< CRef< CSeq_id > > TIds
const TIds & GetIds(void) const
Get the Ids member data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
bool CanGetCompleteness(void) const
Check if it is safe to call GetCompleteness method.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
const TReplaces & GetReplaces(void) const
Get the Replaces member data.
const TDate & GetDate(void) const
Get the Date member data.
const TReplaced_by & GetReplaced_by(void) const
Get the Replaced_by member data.
const TComment & GetComment(void) const
Get the variant data.
const TName & GetName(void) const
Get the variant data.
const TRegion & GetRegion(void) const
Get the variant data.
bool IsUser(void) const
Check if variant User is selected.
@ eCompleteness_has_left
5' or NH3 end present
@ eCompleteness_complete
complete biological entity
@ eCompleteness_has_right
3' or COOH end present
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_htgs_0
single genomic reads for coordination
@ e_User
user defined object
@ e_Comment
a more extensive comment
@ e_Region
overall region (globin locus)
@ e_Maploc
map location of this sequence
@ e_Name
a name for this sequence
@ e_Source
source of materials, includes Org-ref
unsigned int
A callback function used to compare two keys in a database.
static void text(MDB_val *v)
constexpr auto sort(_Init &&init)
const string version
version string
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
bool TrimSpacesAndJunkFromEnds(string &str, bool allow_ellipsis=false)
bool IsValidAccession(const string &accn, EAccValFlag flag=eValidateAcc)
void ExpandTildes(string &s, ETildeStyle style)
void GetDeltaSeqSummary(const CBioseq_Handle &seq, SDeltaSeqSummary &summary)
void AddPeriod(string &str)
void NcbiId(CNcbiOstream &os, const T &id, bool html=false)
const string & GetTechString(int tech)
void ConvertQuotes(string &str)
Utility macros and typedefs for exploring NCBI objects from seq.asn.
#define FOR_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQDESC_ON_BIOSEQ EDIT_EACH_SEQDESC_ON_BIOSEQ.
Generic utility macros and templates for exploring NCBI objects.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define FOR_EACH_STRING_IN_LIST(Itr, Var)
FOR_EACH_STRING_IN_LIST EDIT_EACH_STRING_IN_LIST.
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)
RAW_FIELD_IS_EMPTY_OR_UNSET macro.
#define GET_FIELD_OR_DEFAULT(Var, Fld, Dflt)
GET_FIELD_OR_DEFAULT base macro.
#define FIELD_EQUALS(Var, Fld, Value)
FIELD_EQUALS base macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
For functions that don't use delims, we instead use a chain of names.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4