A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/text__util_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/text/text_util.cpp Source File

85  for

(

int i

= 0;

i

< 256; ++

i

) {

107  string

clause_ends(

".?!;:\"{}[]()"

);

108  ITERATE

(

string

, it, clause_ends) {

139  bool

is_alpha =

false

;

160

string::size_type

i

)

162  for

( ;

i

< s.size(); ++

i

) {

167  return

(

i

== s.size() ? string::npos :

i

);

171

string::size_type

i

)

173  for

( ;

i

< s.size(); ++

i

) {

178  return

(

i

== s.size() ? string::npos :

i

);

182

string::size_type

i

)

184  for

( ;

i

< s.size(); ++

i

) {

189  return

(

i

== s.size() ? string::npos :

i

);

201  if

(iter->first[0] ==

'p'

&& iter->first.find(

"phrase: "

) == 0) {

202

phrase_out.

insert

(phrase_out.

end

(), *iter);

218

freq.

Add

(iter->first, iter->second);

224  const string

& prefix,

228  if

(iter->first.find_first_of(

":"

) != string::npos) {

234

freq.

Add

(prefix +

": "

+ iter->first, iter->second);

340  _TRACE

(

"CTextUtil::GetWordFrequencies(): text = "

<<

text

);

341

string::size_type clause_start = 0;

342

string::size_type clause_end =

text

.size();

344

list<string> prev_words;

350  while

(clause_start != clause_end) {

351

clause_end =

text

.size();

353  if

(pos != string::npos) {

358  _TRACE

(

"clause: |"

<<

text

.substr(clause_start, clause_end - clause_start) <<

"|"

);

359  for

( ; clause_start != clause_end; clause_start = pos) {

363  if

(clause_start == clause_end) {

368 

pos =

min

(clause_end,

376 

word.assign(

text

, clause_start, pos - clause_start);

379 

string::size_type pos1 =

380

word.find_first_not_of(

"0123456789"

);

381  if

(pos1 == string::npos) {

390 

string::iterator copy_to = word.begin();

392  if

(*copy_from ==

'\''

) {

397  if

(copy_to != word.end()) {

398

word.erase(copy_to, word.end());

409  _TRACE

(

" word: "

<< word);

413  typedef

pair<string, string> TDiphPair;

414  static const

TDiphPair sc_DiphPairs[] = {

415

TDiphPair(

"oe"

,

"e"

),

416

TDiphPair(

"ae"

,

"e"

)

420  for

(

size_t i

= 0;

i

<

sizeof

(sc_DiphPairs) /

sizeof

(TDiphPair); ++

i

) {

421  if

(word.find(sc_DiphPairs[

i

].first) != string::npos) {

423

sc_DiphPairs[

i

].

first

,

424

sc_DiphPairs[

i

].second,

441

prev_words.push_back(word);

444

prev_words.push_back(stem);

449  while

(prev_words.size() > 3) {

450

prev_words.pop_front();

452  if

(prev_words.size() > 1) {

453

list<string>::iterator pit = prev_words.begin();

454

list<string>::iterator end = prev_words.end();

456  for

( ; pit != end; ++pit) {

465

phrase =

"phrase: "

;

476  _TRACE

(

" phrase: |"

<< phrase <<

"|"

);

478

freq.

Add

(phrase, 1);

495  if

(clause_start == string::npos) {

502  _TRACE

(

" word: "

<< it->first <<

" count: "

<< it->second);

514  if

(iter->first.find_first_of(

":"

) != string::npos) {

521  if

(it != stem_freq.

end

()) {

522

it->second += iter->second;

655  return

(iter != sc_StopWords.end());

668  for

( ; stop_it != stop_end && it != end; ) {

669  if

(it->first == *stop_it) {

673  if

(it->first < *stop_it) {

685

string::size_type pos = 0;

686  while

( (pos = title.find_first_of(

".,[](){};:'\"/?<>"

, pos)) != string::npos) {

695

vector<unsigned char>&

data

)

716  const

vector<unsigned char>&

data

)

723  const

vector<char>&

data

)

737  const void

*

data

,

size_t

data_len)

static void Stem(const string &in_str, string *out_str)

Compute the Porter stem for a given word.

iterator find(const Key &key)

pair< iterator, bool > insert(const value_type &val)

void Add(Key idx, Score weight=Score(1))

TVector::value_type value_type

TVector::iterator iterator

Reallocable memory buffer (no memory copy overhead) Mimics vector<>, without the overhead of explicit...

TBase::const_iterator const_iterator

static void GetStemFrequencies(const TWordFreq &freq, TWordFreq &stems, TFlags flags=fDefaults)

retrieve stem frequencies from a set of word frequencies

static bool IsStopWord(const string &str)

return true if the provided word is a stop word

static void TrimStopWords(TWordFreq &freq)

eliminate the stop words frm a set of word frequencies

static void CleanJournalTitle(string &title)

perform a set of punctuational clean-ups on a string suitable for a journal or book title

static void SplitWordFrequencies(const TWordFreq &wf_in, TWordFreq &wf_out, TWordFreq &phrase_out)

split a set of word frequencies into phrase and non-phrase frequencies this is done to treat the two ...

static void EncodeFreqs(const TWordFreq &freq, vector< char > &data)

static void AddWordFrequencies(TWordFreq &freq, const TWordFreq &wf, TFlags flags=0)

add a set of frequencies into another set

static void GetWordFrequencies(const string &text, TWordFreq &freq, TFlags flags=fDefaults)

retrieve word frequencies for a given piece of text

static void DecodeFreqs(TWordFreq &freq, const vector< char > &data)

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static const char * str(char *buf, int n)

double wf(double lambda, double D_LR, double D_LU, double D_LD, double D_RU, double D_RD, double D_DU)

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

uint16_t Uint2

2-byte (16-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)

Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)

Convert Int8 to string.

static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)

Convert double to string.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)

Convert UInt to string.

static string & ToLower(string &str)

Convert string to lower case – string& version.

static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)

Convert UInt8 to string.

unsigned int

A callback function used to compare two keys in a database.

static void text(MDB_val *v)

CStaticArraySet< const char *, PCase_CStr > TStopWords

static string::size_type s_NextClauseStop(const string &s, string::size_type i)

static bool s_IsNumeric(unsigned char c)

static SLoadTokens s_ForceTokenLoad

void s_NumericToFreq(const T &val, CTextUtil::TWordFreq &freq)

static const char *const sc_StopWordArray[]

Stop Word Pruning.

static bool s_IsAlphaNumeric(unsigned char c)

string s_ValToString(Int4 i)

static string::size_type s_NextTokenStart(const string &s, string::size_type i)

static char s_ToLower(unsigned char c)

DEFINE_STATIC_ARRAY_MAP(TStopWords, sc_StopWords, sc_StopWordArray)

static string::size_type s_NextTokenStop(const string &s, string::size_type i)

static Uint2 sc_Tokens[256]

void Encode(const CRawScoreVector< Key, Score > &, vector< char > &)

void Decode(const vector< char > &, CRawScoreVector< Key, Score > &)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4