,
75 "expressed sequence tag",
76 "EST (expressed sequence tag)",
77 "EST (expressed sequence tags)",
78 "EST(expressed sequence tag)",
79 "transcribed sequence fragment",
85 "GSS (genome survey sequence)",
92 "STS(sequence tagged site)",
93 "STS (sequence tagged site)",
95 "sequence tagged site",
116 "CAGE (Cap Analysis Gene Expression)",
122 "CAGE (Cap Analysis Gene Expression)",
135 "Transcriptome Shotgun Assembly",
144 "Targeted Locus Study",
153 "THIRD PARTY ANNOTATION",
159 "TPA:SPECIALIST_DB",
165 "THIRD PARTY ANNOTATION",
176 "Metagenome Assembled Genome",
184 Int4sign = (m < 0) ? -1 : 1;
187 for(m *= sign; m > 9; m /= 10)
188res += m % 10 +
'0';
195std::reverse(res.begin(), res.end());
207 for(
const string& acc : extra_accs) {
211 size_tdash = acc.find(
'-');
212 if(dash == string::npos) {
217 string first(acc.begin(), acc.begin() + dash),
218 last(acc.begin() + dash + 1, acc.end());
219 size_tacclen =
first.size();
222 for(; (*p >=
'A'&& *p <=
'Z') || *p ==
'_';)
225 size_tpreflen = p -
first.c_str();
227 stringprefix =
first.substr(0, preflen);
232 for(q = p; *p >=
'0'&& *p <=
'9';)
236 for(p =
last.c_str() + preflen; *p ==
'0';)
238 for(q = p; *p >=
'0'&& *p <=
'9';)
242ret.push_back(
first);
247 for(num1++; num1 <= num2; num1++) {
248 stringnew_acc = prefix;
249 stringnum_str =
FTAitoa(num1);
250 size_tj = acclen - preflen - num_str.size();
252 for(
size_t i= 0;
i< j;
i++)
256ret.push_back(new_acc);
265 return(
'A'<= c && c <=
'Z') || c ==
'_';
270 return(
'A'<= c && c <=
'Z');
274 return(
'0'<= c && c <=
'9');
279 auto& tokens = tsbp.
list;
282 if((
int)skip >= tsbp.
num)
285 autotbp = tokens.begin();
289 boolbad =
false, msg_issued =
false;
290 for(; tbp != tokens.end(); ++tbp) {
291 const string& token = *tbp;
292string_view tok_view = token;
295 size_tdash = token.find(
'-');
296 if(dash == string::npos)
298 if(dash == 0 || tok_view.size() != (dash + 1 + dash)) {
303string_view
first(tok_view.substr(0, dash));
304string_view
last(tok_view.substr(dash + 1));
311 if(first_it ==
first.end() || !
IsDigit(*first_it)) {
316 if(last_it ==
last.end() || !
IsDigit(*last_it)) {
321 size_tpreflen = first_it -
first.begin();
322 size_tpreflen2 = last_it -
last.begin();
323string_view first_prefix =
first.substr(0, preflen);
324string_view last_prefix =
last.substr(0, preflen2);
325 if(first_prefix != last_prefix) {
332string_view first_digits =
first.substr(preflen);
333string_view last_digits =
last.substr(preflen);
334 if(! all_of(first_digits.begin(), first_digits.end(),
IsDigit) ||
335! all_of(last_digits.begin(), last_digits.end(),
IsDigit)) {
352tbp = tokens.insert_after(tbp,
"-");
353tbp = tokens.insert_after(tbp,
tmp);
376 autotail = tokens.
list.before_begin();
380 autoptr =
str.begin();
384 while(ptr !=
str.end() && *ptr !=
'\r'&& *ptr !=
'\n') {
386 while(ptr !=
str.end() && *ptr !=
delimiter&& *ptr !=
'\r'&& *ptr !=
'\n'&&
387*ptr !=
'\t'&& *ptr !=
' ')
390tail = tokens.
list.insert_after(tail,
string(bptr, ptr));
393 while(ptr !=
str.end() && (*ptr ==
delimiter|| *ptr ==
'\t'|| *ptr ==
' '))
514 if(! where || *where ==
'\0'|| ! what || *what ==
'\0')
518 for(; *where !=
'\0'; where++) {
519 for(q = what, p = where; *q !=
'\0'&& *p !=
'\0'; q++, p++) {
523 if(*q >=
'A'&& *q <=
'Z') {
526}
else if(*q >=
'a'&& *q <=
'z') {
532 if(*p ==
'\0'|| *q ==
'\0')
535 if(q && *q ==
'\0')
536 return const_cast<char*
>(where);
564vector<string> lines;
567 for(
const auto& line : lines) {
568 if(line.empty() || line.starts_with(
"XX") || line.size() <=
indent) {
571replaced += line.substr(
indent);
572 auto last= line.size() - 1;
573 if(line[
last] !=
'-') {
575}
else if(line[
last- 1] ==
' ') {
587 if(!
str.empty()) {
588 for(
size_tret =
str.size(); ret > 0;) {
589 charc =
str[--ret];
590 if(c !=
' '&& c !=
'\n'&& c !=
'\\'&& c !=
','&&
591c !=
';'&& c !=
'~'&& c !=
'.'&& c !=
':') {
639bptr = retptr = *ptr;
640 if(! retptr || *retptr ==
'\0')
643 while(*retptr !=
'\0'&& *retptr !=
' ')
646 string str(bptr, retptr);
648 while(*retptr !=
'\0'&& *retptr ==
' ')
668 if(
i!= string_view::npos)
669 return const_cast<char*
>(sv.data() +
i);
685 auto i= sv.find(leadstr);
686 if(
i!= string_view::npos)
687 return const_cast<char*
>(sv.data() +
i);
707new_text_id->SetVersion(text_id->
GetVersion());
709 SetTextId(
id.Which(), *new_id, *new_text_id);
714ibp->
ids.push_back(new_id);
718ibp->
ids.push_back(std::move(pId));
726 if(from_chars(sv.data(), sv.data()+sv.size(),
val).ec == errc{}) {
742 if(date_view.length()<3) {
747 if(
isdigit(date_view.front())) {
749date_view = date_view.substr(3);
753 static constvector<string>
months{
754 "JAN",
"FEB",
"MAR",
"APR",
"MAY",
"JUN",
"JUL",
"AUG",
"SEP",
"OCT",
"NOV",
"DEC" 756string_view maybe_month = date_view.substr(0,3);
757 autoit = find(
months.begin(),
months.end(), maybe_month);
759 if(it ==
months.end()) {
760string_view
msg= date_view.substr(0,10);
765 intmonth =
int(it -
months.begin()) + 1;
767date_view = date_view.substr(4);
769 if(! parsed_year.has_value()) {
772 autoyear = *parsed_year;
776 if(1900 <= year && year <= cur_year) {
778}
else if(0 <= year && year <= 99 &&
'0'<= date_view[1] && date_view[1] <=
'9') {
780(year < 70) ? (year += 2000) : (year += 1900);
793date->SetMonth(month);
812 SIZE_TYPEkeywordCount = keywordList.size();
814 for(
unsigned i= 0;
i< keywordCount; ++
i) {
815 if(
str.starts_with(keywordList[
i])) {
826 for(
charc :
str) {
827 if(c >=
'0'&& c <=
'9')
835 for(
const auto& keyword : keywordList) {
836 if(
str.starts_with(keyword))
841 if(
msg.size() > 50)
843 auto n=
msg.find(
'\n');
844 if(
n!= string_view::npos)
879 returnstring_view(
tmp->mBuf.ptr,
tmp->mBuf.len);
899 for(
auto& temp : chain)
900 if(temp.mType ==
type)
923 boolkwd_tpa =
false;
924 boolkwd_party =
false;
925 boolkwd_inf =
false;
926 boolkwd_exp =
false;
927 boolkwd_asm =
false;
928 boolkwd_spedb =
false;
939 for(
const string&
key: kwds) {
943 const char* p =
key.c_str();
947 else if(
i== 1 ||
i== 2)
953 else if(
i== 5 ||
i== 6)
961}
else if(p[3] !=
'\0'&& p[4] !=
'\0') {
965 if(
i> 2 &&
i< 8 && j < 4) {
972 if(kwd_tpa && ! kwd_party) {
975}
else if(! kwd_tpa && kwd_party) {
979 if(! kwd_tpa && (kwd_inf || kwd_exp)) {
982}
else if(kwd_tpa && kwd_inf ==
false&& kwd_exp ==
false&&
983kwd_asm ==
false&& kwd_spedb ==
false) {
988 for(
i= 0;
i< j;
i++) {
1003 boolkwd_tsa =
false;
1004 boolkwd_assembly =
false;
1011 for(
const string&
key: kwds) {
1018kwd_assembly =
true;
1021kwd_assembly =
true;
1024 if(kwd_tsa && ! kwd_assembly) {
1027}
else if(! kwd_tsa && kwd_assembly) {
1037 boolkwd_tls =
false;
1038 boolkwd_study =
false;
1045 for(
const string&
key: kwds) {
1058 if(kwd_tls && ! kwd_study) {
1061}
else if(! kwd_tls && kwd_study) {
1094 void fta_keywords_check(string_view
str,
bool* estk,
bool* stsk,
bool* gssk,
bool* htck,
bool* flik,
bool* wgsk,
bool* tpak,
bool* envk,
bool* mgak,
bool* tsak,
bool* tlsk)
1153 for(TKeywordList::iterator
key= kwds.begin();
key!= kwds.end();) {
1167 for(TKeywordList::iterator
key= kwds.begin();
key!= kwds.end();) {
1181 for(TKeywordList::iterator
key= kwds.begin();
key!= kwds.end();) {
1196 for(TKeywordList::iterator
key= kwds.begin();
key!= kwds.end();) {
1211 for(TKeywordList::iterator
key= kwds.begin();
key!= kwds.end();) {
1225 for(TKeywordList::iterator
key= kwds.begin();
key!= kwds.end();) {
1235 constlist<string> keywordList,
1244 if(keywordList.empty()) {
1247 for(
autokeyword : keywordList) {
1249keyword, &entry->
EST, &entry->
STS, &entry->
GSS, &entry->
HTC,
nullptr,
nullptr, (tpa_check ? &entry->
is_tpa:
nullptr),
nullptr,
nullptr,
nullptr,
nullptr);
1275 if(kwds.empty() || kwds.front().empty() ||
len< 1)
1280 for(
const auto& kwd : kwds) {
1283 char* line =
buf.data();
1284 for(p = line; *p !=
'\0'; p++)
1285 if(*p ==
'\n'|| *p ==
'\t')
1287 for(p = line; *p ==
' '|| *p ==
'.'|| *p ==
';';)
1292 for(q = p; *q !=
'\0';)
1294 for(q--; *q ==
' '|| *q ==
'.'|| *q ==
';'; q--)
1296 for(q = p, p = line; *q !=
'\0';) {
1297 if(*q !=
' '&& *q !=
';') {
1302 for(q++; *q ==
' ';)
1309 while(*q ==
' '|| *q ==
';')
1315 for(p = line;; p = q + 1) {
1320 fta_keywords_check(p, &entry->
EST, &entry->
STS, &entry->
GSS, &entry->
HTC,
nullptr,
nullptr, (tpa_check ? &entry->
is_tpa:
nullptr),
nullptr,
nullptr,
nullptr,
nullptr);
1323specialist_db =
true;
1327inferential =
true;
1329experimental =
true;
1339TKeywordList::const_iterator key_it = kwds.end();
1343 for(TKeywordList::const_iterator
key= kwds.begin();
key!= kwds.end(); ++
key) {
1361 for(is_sage =
false, is_cage =
false; key_it != kwds.end(); ++key_it) {
1362 const char* p = key_it->c_str();
1387 for(q = dst, p = src; *p !=
'\0';)
1395 boolwasSet =
true;
1451 for(
const string&
key: keywords) {
1462 for(
const string&
key: keywords) {
1463 if(
key==
"HTG"||
key==
"HTGS_PHASE0"||
1464 key==
"HTGS_PHASE1"||
key==
"HTGS_PHASE2"||
1465 key==
"HTGS_PHASE3") {
1476 for(TKeywordList::iterator
key= keywords.begin();
key!= keywords.end();) {
1477 const char* p =
key->c_str();
1479(p[10] ==
'0'|| p[10] ==
'1'|| p[10] ==
'2'||
1482 key= keywords.erase(
key);
1491 for(
const string&
key: keywords) {
EntryBlk * GetEntryData() const
struct DataBlk::@1166 mBuf
The NCBI C++ standard methods for dealing with std::string.
#define ERR_REFERENCE_IllegalDate
#define ERR_DATE_IllegalDate
DataBlk::TList TDataBlkList
std::list< std::string > TKeywordList
int fta_atoi(string_view sv)
size_t StringLen(const char *s)
#define FtaErrPost(sev, level,...)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
char Char
Alias for char.
uint16_t Uint2
2-byte (16-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
@ fConvErr_NoThrow
Do not throw an exception on error.
int Year(void) const
Get year.
list< string > TExtra_accessions
TNamed_annot_track & SetNamed_annot_track(void)
Select the variant.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
TEmbl & SetEmbl(void)
Select the variant.
TOther & SetOther(void)
Select the variant.
const TName & GetName(void) const
Get the Name member data.
TTpe & SetTpe(void)
Select the variant.
TTpg & SetTpg(void)
Select the variant.
TPir & SetPir(void)
Select the variant.
TTpd & SetTpd(void)
Select the variant.
TVersion GetVersion(void) const
Get the Version member data.
TGpipe & SetGpipe(void)
Select the variant.
TDdbj & SetDdbj(void)
Select the variant.
TPrf & SetPrf(void)
Select the variant.
TGenbank & SetGenbank(void)
Select the variant.
TSwissprot & SetSwissprot(void)
Select the variant.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
void SetChain_id(const TChain_id &value)
Assign a value to Chain_id data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
TPdb & SetPdb(void)
Select the variant.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_Gpipe
Internal NCBI genome pipeline processing ID.
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_Named_annot_track
Internal named annotation tracking ID.
@ e_Tpg
Third Party Annot/Seq Genbank.
bool IsSetTechexp(void) const
explanation if tech not enough
const TTechexp & GetTechexp(void) const
Get the Techexp member data.
void SetTechexp(const TTechexp &value)
Assign a value to Techexp data member.
@ eTech_htc
high throughput cDNA
@ eTech_sts
Sequence Tagged Site.
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_survey
one-pass genomic sequence
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
unsigned int
A callback function used to compare two keys in a database.
#define ERR_KEYWORD_MissingTPAKeywords
#define ERR_ACCESSION_Invalid2ndAccRange
#define ERR_ACCESSION_2ndAccPrefixMismatch
#define ERR_KEYWORD_InvalidTPATier
#define ERR_KEYWORD_UnexpectedTPA
#define ERR_KEYWORD_MissingTSAKeywords
#define ERR_KEYWORD_MissingTPATier
#define ERR_KEYWORD_ConflictingTPATiers
#define ERR_KEYWORD_MissingTLSKeywords
#define ERR_ENTRY_InvalidLineType
#define ERR_KEYWORD_MissingMGAKeywords
#define ERR_KEYWORD_ConflictingMGAKeywords
static void text(MDB_val *v)
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
Defines: CTimeFormat - storage class for time format.
static const char delimiter[]
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
list< SectionPtr > mSections
string_view GetNodeData(const DataBlk &entry, int nodeType)
static const char * ParFlat_TLS_kw_array[]
bool HasHtg(const TKeywordList &keywords)
static const char * ParFlat_STS_kw_array[]
char * SrchTheChar(string_view sv, Char letter)
bool HasHtc(const TKeywordList &keywords)
void check_est_sts_gss_tpa_kwds(const TKeywordList &kwds, size_t len, IndexblkPtr entry, bool tpa_check, bool &specialist_db, bool &inferential, bool &experimental, bool &assembly)
static const char * ParFlat_MGA_kw_array[]
static const char * ParFlat_MAG_kw_array[]
bool fta_tls_keywords_check(const TKeywordList &kwds, Parser::ESource source)
void PointToNextToken(char *&ptr)
void RemoveHtgPhase(TKeywordList &keywords)
static bool sIsPrefixChar(char c)
static const char * ParFlat_TPA_kw_array_to_remove[]
bool SetTextId(Uint1 seqtype, CSeq_id &seqId, CTextseq_id &textId)
string GetBlkDataReplaceNewLine(string_view instr, Uint2 indent)
bool CheckLineType(string_view str, Int4 type, const vector< string > &keywordList, bool after_origin)
Int2 StringMatchIcase(const Char **array, string_view text)
void fta_remove_tsa_keywords(TKeywordList &kwds, Parser::ESource source)
Int2 MatchArrayISubString(const Char **array, string_view text)
CRef< CDate_std > get_full_date(string_view date_view, bool is_ref, Parser::ESource source)
void fta_remove_tpa_keywords(TKeywordList &kwds)
static optional< int > s_GetNextInt(string_view sv)
Int2 MatchArraySubString(const Char **array, string_view text)
static size_t SeekLastAlphaChar(string_view str)
bool SrchNodeType(const DataBlk &entry, Int4 type, size_t *plen, char **pptr)
static const char * ParFlat_FLI_kw_array[]
static const char * ParFlat_ENV_kw_array[]
static const char * ParFlat_TPA_kw_array[]
const Section * xTrackNodeType(const Entry &entry, int type)
static const char * ParFlat_TSA_kw_array[]
TDataBlkList & TrackNodes(const DataBlk &entry)
void fta_remove_keywords(CMolInfo::TTech tech, TKeywordList &kwds)
static const char * ParFlat_MGA_more_kw_array[]
void fta_remove_tls_keywords(TKeywordList &kwds, Parser::ESource source)
char * SrchTheStr(string_view sv, string_view leadstr)
Int2 fta_StringMatch(const Char **array, string_view text)
bool IsLeadPrefixChar(char c)
static const char * ParFlat_WGS_kw_array[]
void fta_StringCpy(char *dst, const char *src)
Int2 MatchArrayString(const char **array, string_view text)
static const char * ParFlat_HTC_kw_array[]
DataBlk * TrackNodeType(const DataBlk &entry, Int2 type)
bool fta_is_tls_keyword(string_view str)
string GetTheCurrentToken(char **ptr)
void fta_remove_mag_keywords(TKeywordList &kwds)
TokenStatBlk TokenString(string_view str, Char delimiter)
bool ParseAccessionRange(TokenStatBlk &tsbp, unsigned skip)
Int2 MatchArrayIString(const Char **array, string_view text)
void fta_keywords_check(string_view str, bool *estk, bool *stsk, bool *gssk, bool *htck, bool *flik, bool *wgsk, bool *tpak, bool *envk, bool *mgak, bool *tsak, bool *tlsk)
bool IsCancelled(const TKeywordList &keywords)
static const char * ParFlat_GSS_kw_array[]
static string FTAitoa(Int4 m)
int SrchKeyword(string_view str, const vector< string > &keywordList)
bool fta_is_tpa_keyword(string_view str)
Char * StringIStr(const Char *where, const Char *what)
bool fta_tsa_keywords_check(const TKeywordList &kwds, Parser::ESource source)
void CpSeqId(InfoBioseqPtr ibp, const CSeq_id &id)
static const char * ParFlat_EST_kw_array[]
void fta_remove_env_keywords(TKeywordList &kwds)
bool fta_is_tsa_keyword(string_view str)
bool fta_tpa_keywords_check(const TKeywordList &kwds)
bool fta_check_mga_keywords(CMolInfo &mol_info, const TKeywordList &kwds)
void xCheckEstStsGssTpaKeywords(const list< string > keywordList, bool tpa_check, IndexblkPtr entry)
void UnwrapAccessionRange(const CGB_block::TExtra_accessions &extra_accs, CGB_block::TExtra_accessions &hist)
void CleanTailNonAlphaChar(string &str)
static Uint4 letter(char c)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4