A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/blastdb__dataextract_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/blast/blastdb_format/blastdb_dataextract.cpp Source File

49 #define NOT_AVAILABLE "N/A" 65

}

else if

(

id

.IsGi()) {

69

}

else if

(

id

.IsPig()) {

72  string

acc(

id

.GetStringId());

81

target_seq_id = &(*seq_id);

88  "Entry not found in BLAST database"

);

94  "Entry found in BLAST database has invalid length"

);

107  "start pos > length of sequence"

);

123  if

(e.

GetMsg

().find(

"oid headers do not contain target gi"

)) {

125  "Entry not found in BLAST database"

);

153  if

((*itr)->IsGi()) {

154  m_Gi

= (*itr)->GetGi();

183  if

((*itr)->IsSetLinks()) {

184  if

(seqid->

IsGi

()) {

199

retval.erase(retval.size()-1, 1);

220

(*itr)->IsSetMemberships()) {

222

(*itr)->GetMemberships()) {

320

gi2title[gi] = (*bd)->GetTitle();

362

retval = retval.erase(0, 4);

379  if

((*itr)->IsTitle()) {

380  return

(*itr)->GetTitle();

393  if

(taxids.

empty

()) {

398  if

(retval.empty()) {

425  const TTaxId

kTaxID = *taxid_iter;

429  if

(retval.empty()) {

436  if

(retval.empty()) {

461  const TTaxId

kTaxID = *taxid_iter;

465  if

(retval.empty()) {

472  if

(retval.empty()) {

553 #if ((defined(NCBI_COMPILER_WORKSHOP) && (NCBI_COMPILER_VERSION <= 550)) || \ 554  defined(NCBI_COMPILER_MIPSPRO)) 563  out

<< range->first <<

"-"

<< range->second <<

SEPARATOR

;

604 #define CTRL_A "\001" 608  static const string

kTarget(

" >gi|"

);

623  return

(*desc)->GetTitle();

633  static const string

kStandardSeparator(

" >"

);

639  for

(

auto

token : tokens) {

645  const string

kPossibleId(token, 0, pos !=

NPOS

? pos : token.length());

652  if

(!seqids.empty()) {

657

retval += token.substr(pos, token.length() - pos);

659

retval += kStandardSeparator + token;

666

stringstream

out

(

""

);

697  if

(!masked_ranges.

empty

()) {

703

fasta.

SetMask

(kMaskType, masks);

710

lcl_tmp = lcl_tmp.erase(0, 4);

722  CRef<CSeq_id> id = FindBestChoice(m_Bioseq->GetId(), CSeq_id::Score); 723  out << GetBareId(*id); 725  string title = s_GetTitle(*m_Bioseq.GetNonNullPointer()); 726  out << ' ' << s_ConfigureDeflineTitle(title, m_UseCtrlA); 729  CScope scope(*CObjectManager::GetInstance()); 730  fasta.WriteSequence(scope.AddBioseq(*m_Bioseq), range); 733  catch (const CObjmgrUtilException& e) { 734  if (e.GetErrCode() == CObjmgrUtilException::eBadLocation) { 735  NCBI_THROW(CInvalidDataException, eInvalidRange, 736  "Invalid sequence range"); 742 TTaxId CBlastDBExtractor::x_ExtractTaxId() 746  if (m_Gi != ZERO_GI) { 747  if (m_Gi2TaxidMap.first != m_Oid) 749  m_Gi2TaxidMap.first = m_Oid; 750  m_BlastDb.GetTaxIDs(m_Oid, m_Gi2TaxidMap.second); 752  return m_Gi2TaxidMap.second[m_Gi]; 754  // for database without Gi: 755  vector<TTaxId> taxid; 756  m_BlastDb.GetTaxIDs(m_Oid, taxid); 757  return taxid.size() ? taxid[0] : ZERO_TAX_ID; 760 void CBlastDBExtractor::x_ExtractLeafTaxIds(set<TTaxId>& taxids) 764  if (m_Gi != ZERO_GI) { 765  if (m_Gi2TaxidSetMap.first != m_Oid) 767  m_Gi2TaxidSetMap.first = m_Oid; 768  m_BlastDb.GetLeafTaxIDs(m_Oid, m_Gi2TaxidSetMap.second); 771  const set<TTaxId>& taxid_set = m_Gi2TaxidSetMap.second[m_Gi]; 772  taxids.insert(taxid_set.begin(), taxid_set.end()); 775  // for database without Gi: 776  vector<TTaxId> taxid; 777  m_BlastDb.GetLeafTaxIDs(m_Oid, taxid); 779  taxids.insert(taxid.begin(), taxid.end()); 783 CBlastDBExtractor::x_ExtractMaskingData(CSeqDB::TSequenceRanges &ranges, 788  m_BlastDb.GetMaskData(m_Oid, algo_id, ranges); 792 void CBlastDBExtractor::SetConfig(TSeqRange range, objects::ENa_strand strand, 795  m_OrigSeqRange = range; 797  m_FiltAlgoId = filt_algo_id; 800 void CBlastDeflineUtil::ExtractDataFromBlastDeflineSet(const CBlast_def_line_set & dl_set, 801  vector<string> & results, 802  BlastDeflineFields fields, 806  CSeq_id target_seq_id (target_id, CSeq_id::fParse_PartialOK | CSeq_id::fParse_Default); 807  Int8 num_id = NStr::StringToNumeric<Int8>(target_id, NStr::fConvErr_NoThrow); 808  bool can_be_gi = errno ? false: true; 809  ITERATE(CBlast_def_line_set::Tdata, itr, dl_set.Get()) { 810  ITERATE(CBlast_def_line::TSeqid, id, (*itr)->GetSeqid()) { 811  if ((*id)->Match(target_seq_id) || (can_be_gi && (*id)->IsGi() && ((*id)->GetGi() == GI_FROM(TIntId, num_id)))) { 812  CBlastDeflineUtil::ExtractDataFromBlastDefline( **itr, results, fields, use_long_id); 818  NCBI_THROW(CException, eInvalid, "Failed to find target id " + target_id); 821 static string s_CheckName(const string & name) 823  if(name == "-") return NOT_AVAILABLE; 824  if(name == "unclassified") return NOT_AVAILABLE; 829 void CBlastDeflineUtil::ExtractDataFromBlastDefline(const CBlast_def_line & dl, 830  vector<string> & results, 831  BlastDeflineFields fields, 835  results.resize(CBlastDeflineUtil::max_index, kEmptyStr); 836  if (fields.gi == 1) { 837  results[CBlastDeflineUtil::gi] = NOT_AVAILABLE; 838  ITERATE(CBlast_def_line::TSeqid, id, dl.GetSeqid()) { 840  TGi gi = (*id)->GetGi(); 841  results[CBlastDeflineUtil::gi] = NStr::NumericToString(gi); 846  if ((fields.accession == 1) || (fields.seq_id == 1)) { 847  CRef<CSeq_id> theId = FindBestChoice(dl.GetSeqid(), CSeq_id::WorstRank); 848  if(fields.seq_id == 1) { 849  results[CBlastDeflineUtil::seq_id] = theId->AsFastaString(); 851  if(fields.accession == 1) { 852  results[CBlastDeflineUtil::accession] = GetBareId(*theId); 855  if(fields.title == 1) { 856  if(dl.IsSetTitle()) { 857  results[CBlastDeflineUtil::title] = dl.GetTitle(); 860  results[CBlastDeflineUtil::title] = NOT_AVAILABLE; 863  if ((fields.tax_id == 1) || (fields.tax_names == 1)) { 864  TTaxId tax_id = ZERO_TAX_ID; 865  if (dl.IsSetTaxid()) { 866  tax_id = dl.GetTaxid(); 869  if (fields.tax_id == 1) { 870  results[CBlastDeflineUtil::tax_id] = NStr::NumericToString(tax_id); 873  if (fields.tax_names == 1) { 875  SSeqDBTaxInfo taxinfo; 876  CSeqDB::GetTaxInfo(tax_id, taxinfo); 877  results[CBlastDeflineUtil::scientific_name] = taxinfo.scientific_name; 878  results[CBlastDeflineUtil::common_name] = taxinfo.common_name; 879  results[CBlastDeflineUtil::blast_name] = s_CheckName(taxinfo.blast_name); 880  results[CBlastDeflineUtil::super_kingdom] = s_CheckName(taxinfo.s_kingdom); 881  } catch (const CException&) { 882  results[CBlastDeflineUtil::scientific_name] = NOT_AVAILABLE; 883  results[CBlastDeflineUtil::common_name] = NOT_AVAILABLE; 884  results[CBlastDeflineUtil::blast_name] = NOT_AVAILABLE; 885  results[CBlastDeflineUtil::super_kingdom] = NOT_AVAILABLE; 890  if ((fields.leaf_node_tax_ids == 1) || (fields.leaf_node_tax_names == 1)) { 891  set<TTaxId> tax_id_set = dl.GetLeafTaxIds(); 892  if (tax_id_set.empty()) { 893  if (dl.IsSetTaxid()) { 894  tax_id_set.insert(dl.GetTaxid()); 897  tax_id_set.insert(ZERO_TAX_ID); 901  string separator = kEmptyStr; 902  ITERATE(set<TTaxId>, itr, tax_id_set) { 903  if (fields.leaf_node_tax_names == 1) { 905  SSeqDBTaxInfo taxinfo; 906  CSeqDB::GetTaxInfo(*itr, taxinfo); 907  results[CBlastDeflineUtil::leaf_node_scientific_names] += separator + taxinfo.scientific_name; 908  results[CBlastDeflineUtil::leaf_node_common_names] += separator + taxinfo.common_name; 909  } catch (const CException&) { 910  results[CBlastDeflineUtil::leaf_node_scientific_names] += separator + NOT_AVAILABLE; 911  results[CBlastDeflineUtil::leaf_node_common_names] += separator + NOT_AVAILABLE; 914  results[CBlastDeflineUtil::leaf_node_tax_ids] += separator + NStr::NumericToString(*itr); 915  separator = SEPARATOR; 919  if (fields.membership == 1) { 921  if(dl.IsSetMemberships()) { 922  ITERATE(CBlast_def_line::TMemberships, memb_int, dl.GetMemberships()) { 923  membership += *memb_int; 926  results[CBlastDeflineUtil::membership] = NStr::NumericToString(membership); 929  if (fields.pig == 1) { 931  if (dl.IsSetOther_info()) { 932  ITERATE(CBlast_def_line::TOther_info, itr, dl.GetOther_info()) { 939  results[CBlastDeflineUtil::pig] = NStr::NumericToString(pig); 941  if(fields.links == 1) { 942  if (dl.IsSetLinks()) { 943  ITERATE(CBlast_def_line::TLinks, links_int, dl.GetLinks()) { 944  results[CBlastDeflineUtil::links] += NStr::NumericToString(*links_int) + SEPARATOR; 948  results[CBlastDeflineUtil::links] = NOT_AVAILABLE; 952  if(fields.asn_defline == 1) { 954  tmp << MSerial_AsnText << dl; 955  results[CBlastDeflineUtil::asn_defline] = CNcbiOstrstreamToString(tmp); 959 void CBlastDeflineUtil::ProcessFastaDeflines( 966  const CSeq_id* id = bioseq.GetFirstId(); 970  if (id->IsGeneral() && id->GetGeneral().GetDb() == "BL_ORD_ID") { 971  out = ">" + s_GetTitle(bioseq) + '\n'; 973  else if (id->IsLocal()) { 974  string lcl_tmp = id->AsFastaString(); 975  lcl_tmp = lcl_tmp.erase(0,4); 976  out = ">" + lcl_tmp + ' ' + s_GetTitle(bioseq) + '\n'; 979  id = FindBestChoice(bioseq.GetId(), CSeq_id::Score); 980  out += GetBareId(*id) + ' '; 982  string title = s_GetTitle(bioseq); 983  out += s_ConfigureDeflineTitle(title, use_ctrla); 988 void CBlastDeflineUtil::ProcessFastaDeflines( 992  const CSeq_loc* location, 997  const CSeq_id* id = bioseq.GetFirstId(); 1002  if (location != NULL) { 1003  TSeqPos start = location->GetStart(eExtreme_Biological) + 1; 1004  TSeqPos stop = location->GetStop(eExtreme_Biological) + 1; 1005  if (strand == eNa_strand_minus) { 1007  + NStr::IntToString(stop) + "-" + NStr::IntToString(start) 1011  + NStr::IntToString(start) + "-" + NStr::IntToString(stop) 1015  if (id->IsGeneral() && id->GetGeneral().GetDb() == "BL_ORD_ID") { 1016  out = ">" + range + s_GetTitle(bioseq) + '\n'; 1018  else if (id->IsLocal()) { 1019  string lcl_tmp = id->AsFastaString(); 1020  lcl_tmp = lcl_tmp.erase(0,4); 1021  out = ">" + lcl_tmp + (range.empty() ? " " : range) 1022  + s_GetTitle(bioseq) + '\n'; 1025  id = FindBestChoice(bioseq.GetId(), CSeq_id::Score); 1026  out += GetBareId(*id) + (range.empty() ? " " : range); 1028  string title = s_GetTitle(bioseq); 1029  out += s_ConfigureDeflineTitle(title, use_ctrla); 1034 // Calculates hash for a buffer in IUPACna (NCBIeaa for proteins) format. 1035 // NOTE: if sequence is in a different format, the function below can be modified to convert 1036 // each byte into IUPACna encoding on the fly. 1037 Uint4 CBlastSeqUtil::GetSeqHash(const char* buffer, int length) 1039  CChecksum crc(CChecksum::eCRC32ZIP); 1041  for(int ii = 0; ii < length; ii++) { 1042  if (buffer[ii] != '\n') 1043  crc.AddChars(buffer+ii,1); 1045  return (crc.GetChecksum() ^ (0xFFFFFFFFL)); 1048 void CBlastSeqUtil::ApplySeqMask(string & seq, const CSeqDB::TSequenceRanges & masks, const TSeqRange r) 1051  ITERATE(CSeqDB::TSequenceRanges, itr, masks) { 1052  transform(&seq[itr->first], &seq[itr->second], 1053  &seq[itr->first], (int (*)(int))::tolower); 1057  const TSeqPos r_from = r.GetFrom(); 1058  ITERATE(CSeqDB::TSequenceRanges, itr, masks) { 1059  TSeqRange mask (*itr); 1060  if(mask.GetFrom() > r.GetTo()) { 1063  TSeqRange tmp = r.IntersectionWith(mask); 1065  transform(&seq[tmp.GetFrom() -r_from], &seq[tmp.GetToOpen() - r_from], 1066  &seq[tmp.GetFrom() -r_from], (int (*)(int))::tolower); 1072 void CBlastSeqUtil::GetReverseStrandSeq(string & seq) 1074  CSeqManip::ReverseComplement(seq, CSeqUtil::e_Iupacna, 0, static_cast<ncbi::TSeqPos>(seq.size())); 1077 string CBlastSeqUtil::GetMasksString(const CSeqDB::TSequenceRanges & masks) 1079  if (masks.empty()) { 1080  return kNoMasksFound; 1082  CNcbiOstrstream out; 1083  ITERATE(CSeqDB::TSequenceRanges, range, masks) { 1084  out << range->first << "-" << range->second << SEPARATOR; 1086  return CNcbiOstrstreamToString(out);

ncbi::TMaskedQueryRegions mask

void transform(Container &c, UnaryFunction *op)

Checksum and hash calculation classes.

TSeqPos GetLength(void) const

Encapsulates identifier to retrieve data from a BLAST database.

static Uint4 GetSeqHash(const char *buffer, int length)

FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>

Defines invalid user input exceptions.

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

@ eFileErr

Files were missing or contents were incorrect.

@ eArgErr

Argument validation failed.

int TPIG

Sequence type accepted and returned for PIG indices.

bool OidToPig(int oid, int &pig) const

Translate an OID to a PIG.

bool PigToOid(int pig, int &oid) const

Translate a PIG to an OID.

void GetSequenceAsString(int oid, CSeqUtil::ECoding coding, string &output, TSeqRange range=TSeqRange()) const

Get a sequence in a given encoding.

int GetSeqLength(int oid) const

Returns the sequence length in base pairs or residues.

CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const

Get a CBioseq for a sequence.

CRef< CBioseq > GetBioseqNoData(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const

Get a CBioseq for a sequence without sequence data.

void AccessionToOids(const string &acc, vector< int > &oids) const

Translate an Accession to a list of OIDs.

static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)

Get taxonomy information.

CRef< CBlast_def_line_set > GetHdr(int oid) const

Get the ASN.1 header for the sequence.

static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)

Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.

bool GiToOid(TGi gi, int &oid) const

Translate a GI to an OID.

static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)

std::ofstream out("events_result.xml")

main entry point for tests

#define GI_FROM(T, value)

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

SStrictId_Tax::TId TTaxId

Taxon id type.

TErrCode GetErrCode(void) const

Get error code.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

const string & GetMsg(void) const

Get message string.

#define MSerial_AsnText

I/O stream manipulators –.

const string AsFastaString(void) const

static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)

Parse a string representing one or more Seq-ids, appending the results to IDS.

void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const

Append a label for this Seq-id to the supplied string.

static int WorstRank(const CRef< CSeq_id > &id)

static int Score(const CRef< CSeq_id > &id)

Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>

static int BestRank(const CRef< CSeq_id > &id)

@ fParse_PartialOK

Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...

@ fParse_Default

By default in ParseIDs and IsValid, allow raw parsable non-numeric accessions and plausible local acc...

@ eContent

Untagged human-readable accession or the like.

void SetMask(EMaskType type, CConstRef< CSeq_loc > location)

virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)

Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...

void SetWidth(TSeqPos width)

EMaskType

Which residues to mask out in subsequent output.

virtual void WriteSequence(const CBioseq_Handle &handle, const CSeq_loc *location=0, CSeq_loc::EOpFlags merge_flags=CSeq_loc::fMerge_AbuttingOnly)

void SetFlag(EFlags flag)

void SetAllFlags(TFlags flags)

void ResetFlag(EFlags flag)

@ fKeepGTSigns

don't convert '>' to '_' in title

@ fSuppressRange

never include location details in defline

@ fEnableGI

Use this flag to enable GI output in the defline.

@ eSoftMask

write as lowercase rather than uppercase

CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)

Add bioseq, return bioseq handle.

static CRef< CObjectManager > GetInstance(void)

Return the existing object manager or create one.

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

bool NotEmpty(void) const

static position_type GetPositionMax(void)

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

NCBI_NS_STD::string::size_type SIZE_TYPE

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

static string & ToUpper(string &str)

Convert string to upper case – string& version.

@ fSplit_ByPattern

Require full delimiter strings.

C::value_type FindBestChoice(const C &container, F score_func)

Find the best choice (lowest score) for values in a container.

list< CRef< CSeq_id > > TSeqid

const Tdata & Get(void) const

Get the member data.

list< CRef< CBlast_def_line > > Tdata

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

void SetTo(TTo value)

Assign a value to To data member.

const TDb & GetDb(void) const

Get the Db member data.

bool IsGeneral(void) const

Check if variant General is selected.

TGi GetGi(void) const

Get the variant data.

bool IsLocal(void) const

Check if variant Local is selected.

const TGeneral & GetGeneral(void) const

Get the variant data.

bool IsGi(void) const

Check if variant Gi is selected.

list< CRef< CSeqdesc > > Tdata

const TId & GetId(void) const

Get the Id member data.

const Tdata & Get(void) const

Get the member data.

bool CanGetDescr(void) const

Check if it is safe to call GetDescr method.

list< CRef< CSeq_id > > TId

void SetDescr(TDescr &value)

Assign a value to Descr data member.

const TDescr & GetDescr(void) const

Get the Descr member data.

@ e_Title

a title for this sequence

unsigned int

A callback function used to compare two keys in a database.

Useful/utility classes and methods.

bool IsStringId(const CSeq_id &id)

Determine if id is srting id.

List of sequence offset ranges.

string common_name

Common name, such as "noisy night monkey".

string blast_name

A simple category name, such as "birds".

string s_kingdom

A string of length 1 indicating the "Super Kingdom".

string scientific_name

Scientific name, such as "Aotus vociferans".

TTaxId taxid

An identifier for this species or taxonomic group.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4