=
"taxonomy";
56 static const string kNucDb=
"nucleotide";
57 static const string kAssmDb=
"assembly";
65{ return NStr::NumericToString(ENTREZ_ID_TO(SStrictId_Entrez::TId, id)); });
70 const string& release_type)
76uids_from.push_back(gi);
84 LOG_POST(
Error<<
"Failed to get assembly entrez ids for gi: "<< gi <<
". Error: "<< e.
GetMsg());
88 if(uids_to.empty()) {
104 const string& release_type)
107 string msg=
"Invalid tax-id: "+ tax_id;
113 stringterm = tax_id +
"[taxid]";
117 string msg=
"Can't find the associated assembly for tax-id: "+ tax_id;
131 const string& release_type,
148 if( !uids.empty() ) {
173 string msg=
"Can't find any assembly related to \""+ term +
"\"";
194 LOG_POST(
Error<<
"Failed to search for tax id: "<< tax_id <<
". Error: "<< e.
GetMsg());
197 return(
count?
true:
false);
205 stringterm = EntrezIdsToString(uids);
212 LOG_POST(
Error<<
"Get error when trying to search assembly for tax ids: "<< term <<
". Error: "<< e.
GetMsg());
224ecli.
Search(db, term, uids);
226 LOG_POST(
Error<<
"Get error when trying to search records on db: " 227<< db <<
" with search term: " 228<< term <<
". Error: "<< e.
GetMsg());
242 for(itDocSum = doc_sums.
begin(); itDocSum != doc_sums.
end(); ++itDocSum) {
249assm->SetRelease_type(
"refseq");
251assm->SetRelease_type(
"genbank");
254 stringn_name = sub_iter->get_name();
255 stringn_value = sub_iter->get_content();
256 if(n_value.empty())
continue;
259assm->SetOrganism(n_value);
261assm->SetClass(n_value);
263assm->SetDescr(n_value);
265assm->SetRelease_date(n_value);
267assm->SetOther_names().push_back(
"UCSC name: "+ n_value);
269assm->SetOther_names().push_back(
"Ensembl name: "+ n_value);
272assms.push_back(assm);
280 if(assm->GetRelease_type() ==
"genbank") {
286 if(gbid_node != itDocSum->
end() && !
string(gbid_node->
get_content()).empty()) {
288 if(syn_node != itDocSum->
end()) {
290 if(gb_assm_node != syn_node->
end()) {
292gb_assm->SetName(assm->GetName());
293gb_assm->SetAccession(gb_assm_node->
get_content());
294gb_assm->SetRelease_type(
"genbank");
295 if(assm->IsSetOrganism()) {
296gb_assm->SetOrganism(assm->GetOrganism());
298 if(assm->IsSetClass()) {
299gb_assm->SetClass(assm->GetClass());
301 if(assm->IsSetDescr()) {
302gb_assm->SetDescr(assm->GetDescr());
304 if(assm->IsSetRelease_date()) {
305gb_assm->SetRelease_date(assm->GetRelease_date());
307 if(assm->IsSetOther_names()) {
308gb_assm->SetOther_names() = assm->GetOther_names();
311assms.push_back(gb_assm);
317 LOG_POST(
Error<<
"Failed to get summary for the following assemblies: "<< EntrezIdsToString(uids) <<
". Error: "<< e.
GetMsg());
323 const string& release_type)
326 if( !release_type.empty()) {
327CDL_AssmInfo::TAssemblies::iterator iter = assms.begin();
328 while(iter != assms.end()) {
329 if((*iter)->CanGetRelease_type() &&
330(*iter)->GetRelease_type() != release_type) {
331iter = assms.erase(iter);
342 if(assm2->CanGetRelease_date() && assm1->CanGetRelease_date())
343 returnassm2->GetRelease_date() < assm1->GetRelease_date();
345 if(assm2->CanGetRelease_date() && !assm1->CanGetRelease_date())
354 if(assms.size() < 2)
return;
366a_map[
"Homo sapiens"] =
"GCF_000001405";
367a_map[
"Mus musculus"] =
"GCF_000001635";
368a_map[
"Bos taurus"] =
"GCF_000003205";
369a_map[
"Rattus norvegicus"] =
"GCF_000001895";
371CDL_AssmInfo::TAssemblies::iterator iter = assms.
begin();
373 if((*iter)->CanGetOrganism()) {
374TAssmMap::const_iterator a_iter = a_map.find((*iter)->GetOrganism());
375 if(a_iter != a_map.end()) {
376 while(iter != assms.end() &&
377(*iter)->GetAccession().find(a_iter->second) == string::npos) {
381 if(iter != assms.end() && iter != assms.begin()) {
384assms.push_front(assm);
393 if( !assms.empty() ) {
404equiv_accs.
clear();
406 if(equiv_assemblies && equiv_assemblies->CanGetAssemblies()) {
408equiv_accs.
insert((*it)->GetAccession());
420 autoit = equiv_accs.
find(acc2);
421 returnit != equiv_accs.
end();
422}
catch(exception& e) {
434 if(assm_acc == term)
436 if(gc_assm.
GetName() == term)
439 CRegexpregexGenAcc(
"GC(A|F)_\\d{9}(.\\d+)");
441 if(regexGenAcc.
IsMatch(term)) {
445 boolequiv_acc_fetched =
false;
447 strings_term = term;
450 string query(
"\"");
452 query+=
"\"[All+Names]";
454 size_ttotal_uids = 0;
459 if(itAccession == it->
end())
462 if(assm_acc == acc) {
466 if(equiv_acc_fetched ==
false) {
469equiv_acc_fetched =
true;
471 if(equiv_accs.count(acc) > 0) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static bool s_IsAsmAccEquiv(CGenomicCollectionsService &gc, const string &acc1, const string &acc2)
static bool s_AssmCompare(const CRef< CDL_Assembly > &assm1, const CRef< CDL_Assembly > &assm2)
static const string kTaxDb
static const string kAssmDb
static const string kNucDb
static void s_GetAssmEquiv(CGenomicCollectionsService &gc, const string &acc, set< string > &equiv_accs)
static CRef< objects::CDL_AssmInfo > x_CreateAssmInfo(objects::CDL_AssmInfo::TAssemblies &assms)
static CRef< objects::CDL_AssmInfo > GetAssms_Term(const string &term, const string &release_type=NcbiEmptyString, ICanceled *cancel=0)
static void x_GetAssmInfo(const CSeqUtils::TEntrezIds &uids, objects::CDL_AssmInfo::TAssemblies &assms)
get detailed assembly information for a list of gencoll ids.
static CRef< objects::CDL_AssmInfo > GetAssms_Gi(TGi gi, const string &release_type=NcbiEmptyString)
static bool x_ValidateTaxId(const string &tax_id)
validate if the given tax-id.
static CRef< objects::CDL_AssmInfo > GetAssms_TaxId(const string &tax_id, const string &release_type=NcbiEmptyString)
static void x_FilterAssemblies(objects::CDL_AssmInfo::TAssemblies &assms, const string &release_type)
filter the assemblies by release type and category=full-assembly.
static void x_SortAssms(objects::CDL_AssmInfo::TAssemblies &assms)
sort assemblies based on release date.
static bool IsSameAssembly(const objects::CGC_Assembly &gc_assm, const string &term)
Check if assembly name (e.g. hg19, grch37) matches CGC_Assembly.
static void x_FilterTaxIds(CSeqUtils::TEntrezIds &uids)
filter out the tax ids that don't have assembly on them.
static void x_SearchIds(const string &term, const string &db, CSeqUtils::TEntrezIds &uids)
search all related links in the given db with a search term.
static void Query(const string &db_name, const string &terms, size_t &total_uids, xml::document &docsums, size_t max_return=0)
void SetMaxReturn(int ret_max)
Uint8 Search(const string &db, const string &term, vector< objects::CSeq_id_Handle > &uids, const string &xml_path=kEmptyStr)
void Summary(const string &db, const vector< objects::CSeq_id_Handle > &uids, xml::document &docsums, const string &version="")
Uint8 Count(const string &db, const string &term)
string GetAccession() const
Retrieve the accession for this assembly.
string GetName() const
Retrieve the name of this assembly.
static CRef< objects::CGenomicCollectionsService > GetGenCollService(int timeout_sec=-1)
CRef< CGCClient_EquivalentAssemblies > GetEquivalentAssemblies(const string &acc, int equivalency)
Interface for testing cancellation request in a long lasting operation.
const_iterator begin() const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
The xml::document class is used to hold the XML tree and various bits of information about it.
const node & get_root_node(void) const
Get a reference to the root node of this document.
The xml::node::const_iterator provides a way to access children nodes similar to a standard C++ conta...
The xml::node_set::const_iterator class is used to iterate over nodes in a node set.
The xml::node_set class is used to store xpath query result set.
iterator begin()
Get an iterator that points to the beginning of the xpath query result node set.
iterator end()
Get an iterator that points one past the last node in the xpath query result node set.
The xml::node class is used to hold information about one XML node.
iterator end(void)
Get an iterator that points one past the last child for this node.
iterator find(const char *name, const ns *nspace=NULL)
Find the first child node that has the given name and namespace.
iterator begin(void)
Get an iterator that points to the beginning of this node's children.
node_set run_xpath_query(const xpath_expression &expr)
Run the given XPath query.
const char * get_content(void) const
Get the content for this text node.
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
static void ELinkQuery(const string &db_from, const string &db_to, const TEntrezIds &uids_from, TEntrezIds &uids_to, const string &cmd="neighbor", const string &xpath="//Link/Id/text()")
Queries elink.fcgi with a vector of uids/seq-ids (seq-ids preferred for future compatibility) and ret...
vector< TEntrezId > TEntrezIds
void Reset(void)
Reset reference object.
uint64_t Uint8
8-byte (64-bit) unsigned integer
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static string TransformJoin(TIterator from, TIterator to, const CTempString &delim, FTransform fnTransform)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ eNocase
Case insensitive compare.
TAssemblies & SetAssemblies(void)
Select the variant.
list< CRef< CDL_Assembly > > TAssemblies
list< CRef< C_E_Assemblies > > TAssemblies
@ eEquivalency_all_types_same_coordinates
All assemblies with same major release, both Gb and Rs.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4