#include <ncbi_pch.hpp>
#include <corelib/ncbitime.hpp>
#include "ftacpp.hpp"
#include <corelib/ncbistr.hpp>
#include <objmgr/scope.hpp>
#include <objmgr/object_manager.hpp>
#include <objects/seq/MolInfo.hpp>
#include <objects/seqloc/PDB_seq_id.hpp>
#include <corelib/tempstr.hpp>
#include "index.h"
#include "ftaerr.hpp"
#include "indx_def.h"
#include "utilfun.h"
#include <algorithm>
#include <charconv>
#include <optional>
Go to the source code of this file.
Go to the SVN repository for this file.
USING_SCOPE (objects) CScope & GetScope () static string FTAitoa (Int4 m) void UnwrapAccessionRange (const CGB_block::TExtra_accessions &extra_accs, CGB_block::TExtra_accessions &hist) static bool sIsPrefixChar (char c) bool IsLeadPrefixChar (char c) bool IsDigit (char c) bool ParseAccessionRange (TokenStatBlk &tsbp, unsigned skip) TokenStatBlk TokenString (string_view str, Char delimiter) Int2 fta_StringMatch (const Char **array, string_view text) Int2 StringMatchIcase (const Char **array, string_view text) Int2 MatchArrayString (const char **array, string_view text) Int2 MatchArrayIString (const Char **array, string_view text) Int2 MatchArraySubString (const Char **array, string_view text) Char * StringIStr (const Char *where, const Char *what) Int2 MatchArrayISubString (const Char **array, string_view text) string GetBlkDataReplaceNewLine (string_view instr, Uint2 indent) static size_t SeekLastAlphaChar (string_view str) void CleanTailNonAlphaChar (string &str) void PointToNextToken (char *&ptr) string GetTheCurrentToken (char **ptr) char * SrchTheChar (string_view sv, Char letter) char * SrchTheStr (string_view sv, string_view leadstr) void CpSeqId (InfoBioseqPtr ibp, const CSeq_id &id) static optional< int > s_GetNextInt (string_view sv) CRef< CDate_std > get_full_date (string_view date_view, bool is_ref, Parser::ESource source) int SrchKeyword (string_view str, const vector< string > &keywordList) bool CheckLineType (string_view str, Int4 type, const vector< string > &keywordList, bool after_origin) bool SrchNodeType (const DataBlk &entry, Int4 type, size_t *plen, char **pptr) string_view GetNodeData (const DataBlk &entry, int nodeType) TDataBlkList & TrackNodes (const DataBlk &entry) DataBlk * TrackNodeType (const DataBlk &entry, Int2 type) const Section * xTrackNodeType (const Entry &entry, int type) bool fta_tpa_keywords_check (const TKeywordList &kwds) bool fta_tsa_keywords_check (const TKeywordList &kwds, Parser::ESource source) bool fta_tls_keywords_check (const TKeywordList &kwds, Parser::ESource source) bool fta_is_tpa_keyword (string_view str) bool fta_is_tsa_keyword (string_view str) bool fta_is_tls_keyword (string_view str) void fta_keywords_check (string_view str, bool *estk, bool *stsk, bool *gssk, bool *htck, bool *flik, bool *wgsk, bool *tpak, bool *envk, bool *mgak, bool *tsak, bool *tlsk) void fta_remove_keywords (CMolInfo::TTech tech, TKeywordList &kwds) void fta_remove_tpa_keywords (TKeywordList &kwds) void fta_remove_tsa_keywords (TKeywordList &kwds, Parser::ESource source) void fta_remove_tls_keywords (TKeywordList &kwds, Parser::ESource source) void fta_remove_env_keywords (TKeywordList &kwds) void fta_remove_mag_keywords (TKeywordList &kwds) void xCheckEstStsGssTpaKeywords (const list< string > keywordList, bool tpa_check, IndexblkPtr entry) void check_est_sts_gss_tpa_kwds (const TKeywordList &kwds, size_t len, IndexblkPtr entry, bool tpa_check, bool &specialist_db, bool &inferential, bool &experimental, bool &assembly) bool fta_check_mga_keywords (CMolInfo &mol_info, const TKeywordList &kwds) void fta_StringCpy (char *dst, const char *src) bool SetTextId (Uint1 seqtype, CSeq_id &seqId, CTextseq_id &textId) bool IsCancelled (const TKeywordList &keywords) bool HasHtg (const TKeywordList &keywords) void RemoveHtgPhase (TKeywordList &keywords) bool HasHtc (const TKeywordList &keywords) ◆ THIS_FILE #define THIS_FILE "utilfun.cpp"Definition at line 59 of file utilfun.cpp.
◆ check_est_sts_gss_tpa_kwds()Definition at line 1270 of file utilfun.cpp.
References buf, NStr::EqualNocase(), Indexblk::EST, fta_keywords_check(), Indexblk::GSS, Indexblk::HTC, Indexblk::is_tpa, len, StringChr, and Indexblk::STS.
Referenced by GenBankIndex(), and XMLKeywordsCheck().
◆ CheckLineType() ◆ CleanTailNonAlphaChar() void CleanTailNonAlphaChar ( string & str ) ◆ CpSeqId()Definition at line 693 of file utilfun.cpp.
References CSeq_id::Assign(), CTextseq_id_Base::GetAccession(), CTextseq_id_Base::GetName(), CTextseq_id_Base::GetVersion(), InfoBioseq::ids, CTextseq_id_Base::IsSetAccession(), CTextseq_id_Base::IsSetName(), CTextseq_id_Base::IsSetVersion(), InfoBioseq::mAccNum, InfoBioseq::mLocus, Ref(), and SetTextId().
Referenced by FindCd().
◆ fta_check_mga_keywords()Definition at line 1334 of file utilfun.cpp.
References NStr::EqualNocase(), ERR_KEYWORD_ConflictingMGAKeywords, ERR_KEYWORD_MissingMGAKeywords, FtaErrPost, CMolInfo_Base::GetTechexp(), CMolInfo_Base::IsSetTechexp(), ncbi::grid::netcache::search::fields::key, MatchArrayIString(), ParFlat_MGA_more_kw_array, CMolInfo_Base::SetTechexp(), and SEV_REJECT.
Referenced by GetGBBlock(), and XMLGetGBBlock().
◆ fta_is_tls_keyword() bool fta_is_tls_keyword ( string_view str ) ◆ fta_is_tpa_keyword() bool fta_is_tpa_keyword ( string_view str ) ◆ fta_is_tsa_keyword() bool fta_is_tsa_keyword ( string_view str ) ◆ fta_keywords_check() void fta_keywords_check ( string_view str, bool * estk, bool * stsk, bool * gssk, bool * htck, bool * flik, bool * wgsk, bool * tpak, bool * envk, bool * mgak, bool * tsak, bool * tlsk )Definition at line 1094 of file utilfun.cpp.
References MatchArrayString(), ParFlat_ENV_kw_array, ParFlat_EST_kw_array, ParFlat_FLI_kw_array, ParFlat_GSS_kw_array, ParFlat_HTC_kw_array, ParFlat_MGA_kw_array, ParFlat_STS_kw_array, ParFlat_TLS_kw_array, ParFlat_TPA_kw_array, ParFlat_TSA_kw_array, ParFlat_WGS_kw_array, and str().
Referenced by check_est_sts_gss_tpa_kwds(), GetDescrEmblBlock(), GetGBBlock(), xCheckEstStsGssTpaKeywords(), XMLGetEMBLBlock(), and XMLGetGBBlock().
◆ fta_remove_env_keywords() ◆ fta_remove_keywords()Definition at line 1131 of file utilfun.cpp.
References b, CMolInfo_Base::eTech_est, CMolInfo_Base::eTech_fli_cdna, CMolInfo_Base::eTech_htc, CMolInfo_Base::eTech_sts, CMolInfo_Base::eTech_survey, CMolInfo_Base::eTech_wgs, ncbi::grid::netcache::search::fields::key, MatchArrayString(), ParFlat_EST_kw_array, ParFlat_FLI_kw_array, ParFlat_GSS_kw_array, ParFlat_HTC_kw_array, ParFlat_STS_kw_array, and ParFlat_WGS_kw_array.
Referenced by GetDescrEmblBlock(), GetGBBlock(), XMLGetEMBLBlock(), and XMLGetGBBlock().
◆ fta_remove_mag_keywords() ◆ fta_remove_tls_keywords() ◆ fta_remove_tpa_keywords() ◆ fta_remove_tsa_keywords() ◆ fta_StringCpy() void fta_StringCpy ( char * dst, const char * src )Definition at line 1382 of file utilfun.cpp.
Referenced by CheckDelGbblockSourceFromDescrs(), DefVsHTGKeywords(), ExtractErratum(), fta_get_part_sup(), fta_parse_rrna_feat(), fta_parse_structured_comment(), GetDescrComment(), GetSPDescrComment(), GetSPDescrTitle(), ind_subdbp(), LoadEntry(), LoadEntryGenbank(), ShrinkSpaces(), SPPostProcVarSeq(), SPValidateEcnum(), stripStr(), XMLGetDescr(), and XMLGetDescrComment().
◆ fta_StringMatch()Definition at line 409 of file utilfun.cpp.
References NStr::EqualCase(), i, StringLen(), and text().
Referenced by CheckDIV(), CheckNA(), CheckNADDBJ(), CkSPComTopics(), EmblGetDivisionNewID(), GetDescrEmblBlock(), GetEmblBlockXref(), IsTPAAccPrefix(), IsTSAAccPrefix(), IsWGSAccPrefix(), ParseSpComment(), and XMLGetEMBLBlock().
◆ fta_tls_keywords_check()Definition at line 1035 of file utilfun.cpp.
References Parser::EMBL, NStr::EqualNocase(), ERR_KEYWORD_MissingTLSKeywords, FtaErrPost, i, ncbi::grid::netcache::search::fields::key, MatchArrayIString(), ParFlat_TLS_kw_array, SEV_REJECT, and rapidjson::source.
Referenced by GetDescrEmblBlock(), GetGBBlock(), and XMLGetGBBlock().
◆ fta_tpa_keywords_check()Definition at line 919 of file utilfun.cpp.
References b, buf, NStr::EqualNocase(), ERR_KEYWORD_ConflictingTPATiers, ERR_KEYWORD_InvalidTPATier, ERR_KEYWORD_MissingTPAKeywords, ERR_KEYWORD_MissingTPATier, ERR_KEYWORD_UnexpectedTPA, FtaErrPost, i, ncbi::grid::netcache::search::fields::key, len, MatchArrayIString(), ParFlat_TPA_kw_array, SEV_ERROR, SEV_REJECT, and SEV_WARNING.
Referenced by GetDescrEmblBlock(), GetGBBlock(), and XMLGetGBBlock().
◆ fta_tsa_keywords_check()Definition at line 1001 of file utilfun.cpp.
References Parser::EMBL, NStr::EqualNocase(), ERR_KEYWORD_MissingTSAKeywords, FtaErrPost, i, ncbi::grid::netcache::search::fields::key, MatchArrayIString(), ParFlat_TSA_kw_array, SEV_REJECT, and rapidjson::source.
Referenced by GetDescrEmblBlock(), GetGBBlock(), and XMLGetGBBlock().
◆ FTAitoa() ◆ get_full_date()Definition at line 740 of file utilfun.cpp.
References ERR_DATE_IllegalDate, ERR_REFERENCE_IllegalDate, FtaErrPost, int, isdigit(), months, msg(), Ref(), s_GetNextInt(), SEV_ERROR, SEV_WARNING, rapidjson::source, Parser::SPROT, and CTime::Year().
Referenced by get_pat(), get_sub(), get_sub_gsdb(), GetCitPatent(), GetUpdateDate(), and MakeDatePtr().
◆ GetBlkDataReplaceNewLine() string GetBlkDataReplaceNewLine ( string_view instr, Uint2 indent )Definition at line 562 of file utilfun.cpp.
References indent(), last(), NStr::Split(), and NStr::TruncateSpacesInPlace().
Referenced by GetDescrEmblBlock(), GetEmblDescr(), GetGBBlock(), GetGenBankDescr(), GetGenBankLineage(), GetSequenceOfKeywords(), GetSPDescrTitle(), SPFeatGeneRef(), SPFeatProtRef(), and SprotRefString().
◆ GetNodeData() ◆ GetScope() ◆ GetTheCurrentToken() string GetTheCurrentToken ( char ** ptr ) ◆ HasHtc() ◆ HasHtg() ◆ IsCancelled() ◆ IsDigit() ◆ IsLeadPrefixChar() bool IsLeadPrefixChar ( char c ) inline ◆ MatchArrayIString()Definition at line 472 of file utilfun.cpp.
References NStr::EqualNocase(), i, and text().
Referenced by DbxrefQualToDbtag(), fta_check_mga_keywords(), fta_is_tls_keyword(), fta_is_tpa_keyword(), fta_is_tsa_keyword(), fta_remove_env_keywords(), fta_remove_mag_keywords(), fta_remove_tls_keywords(), fta_remove_tpa_keywords(), fta_remove_tsa_keywords(), fta_tls_keywords_check(), fta_tpa_keywords_check(), fta_tsa_keywords_check(), and SPGetPEValue().
◆ MatchArrayISubString() Int2 MatchArrayISubString ( const Char ** array, string_view text ) ◆ MatchArrayString() Int2 MatchArrayString ( const char ** array, string_view text )Definition at line 456 of file utilfun.cpp.
References NStr::Equal(), i, and text().
Referenced by CheckCollectionDate(), fta_check_mobile_element(), fta_check_ncrna(), fta_check_pseudogene_qual(), fta_check_satellite(), fta_keywords_check(), fta_remove_keywords(), GetFeatNameAndLoc(), GetRnaRef(), and UpdateRawBioSource().
◆ MatchArraySubString() Int2 MatchArraySubString ( const Char ** array, string_view text ) ◆ ParseAccessionRange()Definition at line 277 of file utilfun.cpp.
References ERR_ACCESSION_2ndAccPrefixMismatch, ERR_ACCESSION_Invalid2ndAccRange, NStr::fConvErr_NoThrow, first(), FtaErrPost, IsDigit(), IsLeadPrefixChar(), last(), TokenStatBlk::list, TokenStatBlk::num, SEV_REJECT, sIsPrefixChar(), NStr::StringToInt(), and tmp.
Referenced by GetAccession().
◆ PointToNextToken() void PointToNextToken ( char *& ptr ) ◆ RemoveHtgPhase() ◆ s_GetNextInt() static optional<int> s_GetNextInt ( string_view sv ) static ◆ SeekLastAlphaChar() static size_t SeekLastAlphaChar ( string_view str ) static ◆ SetTextId()Definition at line 1393 of file utilfun.cpp.
References CSeq_id_Base::e_Ddbj, CSeq_id_Base::e_Embl, CSeq_id_Base::e_Genbank, CSeq_id_Base::e_Gpipe, CSeq_id_Base::e_Named_annot_track, CSeq_id_Base::e_Other, CSeq_id_Base::e_Pdb, CSeq_id_Base::e_Pir, CSeq_id_Base::e_Prf, CSeq_id_Base::e_Swissprot, CSeq_id_Base::e_Tpd, CSeq_id_Base::e_Tpe, CSeq_id_Base::e_Tpg, CPDB_seq_id_Base::SetChain_id(), CSeq_id_Base::SetDdbj(), CSeq_id_Base::SetEmbl(), CSeq_id_Base::SetGenbank(), CSeq_id_Base::SetGpipe(), CSeq_id_Base::SetNamed_annot_track(), CSeq_id_Base::SetOther(), CSeq_id_Base::SetPdb(), CSeq_id_Base::SetPir(), CSeq_id_Base::SetPrf(), CSeq_id_Base::SetSwissprot(), CSeq_id_Base::SetTpd(), CSeq_id_Base::SetTpe(), and CSeq_id_Base::SetTpg().
Referenced by CpSeqId(), CreateEntryBioseq(), fta_fix_seq_id(), fta_parse_tpa_tsa_block(), GetProtRefSeqId(), MakeAccSeqId(), and MakeLocusSeqId().
◆ sIsPrefixChar() ◆ SrchKeyword() int SrchKeyword ( string_view str, const vector< string > & keywordList ) ◆ SrchNodeType()Definition at line 859 of file utilfun.cpp.
References DataBlk::len, DataBlk::mBuf, DataBlk::ptr, and TrackNodeType().
Referenced by fta_get_user_object(), GetDescrEmblBlock(), GetDRlineDataSP(), GetEmblBlockXref(), GetEmblDate(), GetEmblDescr(), GetFlatBiomol(), GetGBBlock(), GetGenBankDescr(), GetReleaseInfo(), GetSeqData(), GetSPDate(), GetSPDescrComment(), GetSprotDescr(), ParseSPFeat(), SpAddToIndexBlk(), SPFeatGeneRef(), SPFeatProtRef(), and SPGetPEValue().
◆ SrchTheChar() char* SrchTheChar ( string_view sv, Char letter )Definition at line 665 of file utilfun.cpp.
Referenced by BuildFeatureBlock(), FakeGenBankBioSources(), fta_parse_tpa_tsa_block(), GetDescrComment(), GetEmblBlockXref(), GetEmblDate(), GetGBBlock(), GetReleaseInfo(), ParseSPFeat(), and TrimEmblFeatBlk().
◆ SrchTheStr() char* SrchTheStr ( string_view sv, string_view leadstr ) ◆ StringIStr()Definition at line 509 of file utilfun.cpp.
Referenced by CheckForUnusualFullLengthOrgs(), fill_orgref(), fta_get_trna_from_product(), fta_if_special_org(), fta_parse_rrna_feat(), fta_remark_is_er(), fta_strip_pub_comment(), get_embl_pmid(), get_embl_str_pub_id(), get_muid(), get_plasmid(), GetCitBook(), GetCitBookOld(), GetFlatBiomol(), GetRnaRef(), GetSPDescrTitle(), GetTaxnameNameFromDescrs(), GetViralHostsFrom_OH(), QualsToSeqID(), SPFeatGeneRef(), and SPGetVerNum().
◆ StringMatchIcase()Definition at line 430 of file utilfun.cpp.
References NStr::EqualNocase(), i, StringLen(), and text().
Referenced by CheckDelGbblockSourceFromDescrs(), CkLocusLinePos(), get_plasmid(), GetGenomeInfo(), GetSPGenomeFrom_OS_OG(), UpdateRawBioSource(), XMLCheckSTRAND(), and XMLCheckTPG().
◆ TokenString() ◆ TrackNodes()Definition at line 890 of file utilfun.cpp.
References EntryBlk::chain, and DataBlk::GetEntryData().
Referenced by check_cds(), FakeEmblBioSources(), get_plasmid(), GetEmblDescr(), GetEmblSubBlock(), GetGenBankDescr(), GetGenBankSubBlock(), GetSprotDescr(), GetSprotSubBlock(), ImpFeatPub(), LoadFeat(), SeqFeatPub(), and TrackNodeType().
◆ TrackNodeType()Definition at line 896 of file utilfun.cpp.
References TrackNodes().
Referenced by AddNIDSeqId(), DefVsHTGKeywords(), GetEmblDescr(), GetEmblInstContig(), GetFlatBiomol(), GetGenBankDescr(), GetGenBankInstContig(), GetGenBankSubBlock(), GetNodeData(), GetSprotSubBlock(), SrchNodeSubType(), and SrchNodeType().
◆ UnwrapAccessionRange() ◆ USING_SCOPE() ◆ xCheckEstStsGssTpaKeywords()Definition at line 1234 of file utilfun.cpp.
References Indexblk::assembly, NStr::EqualNocase(), Indexblk::EST, Indexblk::experimental, fta_keywords_check(), Indexblk::GSS, Indexblk::HTC, Indexblk::inferential, Indexblk::is_tpa, Indexblk::specialist_db, and Indexblk::STS.
Referenced by EmblIndex().
◆ xTrackNodeType() ◆ ParFlat_ENV_kw_array const char* ParFlat_ENV_kw_array[] static ◆ ParFlat_EST_kw_array const char* ParFlat_EST_kw_array[] static Initial value:= {
"EST",
"EST PROTO((expressed sequence tag)",
"expressed sequence tag",
"EST (expressed sequence tag)",
"EST (expressed sequence tags)",
"EST(expressed sequence tag)",
"transcribed sequence fragment",
nullptr
}
Definition at line 72 of file utilfun.cpp.
Referenced by fta_keywords_check(), and fta_remove_keywords().
◆ ParFlat_FLI_kw_array const char* ParFlat_FLI_kw_array[] static ◆ ParFlat_GSS_kw_array const char* ParFlat_GSS_kw_array[] static ◆ ParFlat_HTC_kw_array const char* ParFlat_HTC_kw_array[] static ◆ ParFlat_MAG_kw_array const char* ParFlat_MAG_kw_array[] static ◆ ParFlat_MGA_kw_array const char* ParFlat_MGA_kw_array[] static ◆ ParFlat_MGA_more_kw_array const char* ParFlat_MGA_more_kw_array[] static Initial value:= {
"CAGE (Cap Analysis Gene Expression)",
"5'-SAGE",
"5'-end tag",
"unspecified tag",
"small RNA",
nullptr
}
Definition at line 121 of file utilfun.cpp.
Referenced by fta_check_mga_keywords().
◆ ParFlat_STS_kw_array const char* ParFlat_STS_kw_array[] static ◆ ParFlat_TLS_kw_array const char* ParFlat_TLS_kw_array[] static ◆ ParFlat_TPA_kw_array const char* ParFlat_TPA_kw_array[] static Initial value:= {
"TPA",
"THIRD PARTY ANNOTATION",
"THIRD PARTY DATA",
"TPA:INFERENTIAL",
"TPA:EXPERIMENTAL",
"TPA:REASSEMBLY",
"TPA:ASSEMBLY",
"TPA:SPECIALIST_DB",
nullptr
}
Definition at line 151 of file utilfun.cpp.
Referenced by fta_is_tpa_keyword(), fta_keywords_check(), and fta_tpa_keywords_check().
◆ ParFlat_TPA_kw_array_to_remove const char* ParFlat_TPA_kw_array_to_remove[] static ◆ ParFlat_TSA_kw_array const char* ParFlat_TSA_kw_array[] static ◆ ParFlat_WGS_kw_array const char* ParFlat_WGS_kw_array[] staticRetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4