<
classT,
classU>
68 bool operator()(
constpair<T,U>& it1,
constpair<T,U>& it2)
const 70 return(it1.first < it2.first);
75 template<
classT,
classU>
78 bool operator()(
constpair<T,U>& it1,
constpair<T,U>& it2)
const 80 return(it2.second < it1.second);
89 template<
classKey,
classScore>
97 template<
classKey,
classScore>
105 template<
classKey,
classScore>
114 template<
classKey,
classScore>
118 if(
this!= &other) {
120m_Uid = other.
m_Uid;
126 template<
classKey,
classScore>
131m_Data.reserve(vec.
size());
134 ITERATE(
typenameTOtherVector, iter, vec) {
138m_Uid = vec.
GetId();
145 template<
classKey,
classScore>
149 if(
this!= &other) {
150m_Data.swap(other.
m_Data);
156 template<
classKey,
classScore>
164 template<
classKey,
classScore>
172 template<
classKey,
classScore>
176 returnm_Data.size();
180 template<
classKey,
classScore>
188 template<
classKey,
classScore>
193 returnm_Data.begin();
197 template<
classKey,
classScore>
206 template<
classKey,
classScore>
211 returnm_Data.begin();
215 template<
classKey,
classScore>
224 template<
classKey,
classScore>
230 returnstd::find(m_Data.begin(), m_Data.end(), v);
234 template<
classKey,
classScore>
240 returnstd::find(m_Data.begin(), m_Data.end(), v);
244 template<
classKey,
classScore>
252 template<
classKey,
classScore>
261 template<
classKey,
classScore>
266 iteratoriter = lower_bound(m_Data.begin(), m_Data.end(), p,
268 if(iter == m_Data.end() || iter->first !=
val.first) {
269m_Data.insert(iter,
val);
271iter->second =
val.second;
276 template<
classKey,
classScore>
287 _ASSERT(
i== m_Data.end() ||
i->first != it->first);
290m_Data.insert(ins_before, start, stop);
294 template<
classKey,
classScore>
298 returnm_Data.empty();
302 template<
classKey,
classScore>
307 const_iteratoriter = lower_bound(m_Data.begin(), m_Data.end(), p,
309 if(iter == m_Data.end() || iter->first != idx) {
317 template<
classKey,
classScore>
325 template<
classKey,
classScore>
330 size_tdiff = end_in - begin_in;
331m_Data.reserve(m_Data.size() + diff);
332 size_torig_size = m_Data.size();
334 boolneed_sort =
false;
335 for( ; begin_in != end_in; ++begin_in) {
337lower_bound(m_Data.begin(), m_Data.begin() + orig_size,
339 if(iter == m_Data.end() || iter->first != begin_in->first) {
340m_Data.push_back(*begin_in);
343iter->second = begin_in->second;
348 if(
is_sorted(m_Data.begin() + orig_size, m_Data.end())) {
349std::inplace_merge(m_Data.begin(),
350m_Data.begin() + orig_size,
361 template<
classKey,
classScore>
366 iteratoriter = lower_bound(m_Data.begin(), m_Data.end(), p,
368 if(iter == m_Data.end() || iter->first != idx) {
369m_Data.insert(iter, p);
377 template<
classKey,
classScore>
392Score length = Length() * trim_pct;
394 for( ; iter != m_Data.end(); ++iter) {
395sum += iter->second * iter->second;
396 if(sqrt(sum) >= length) {
402 if(iter == m_Data.begin()) {
408 for(; iter != m_Data.end() && iter->second ==
prev->second; ++iter) {
412m_Data.erase(iter, m_Data.end());
418 template<
classKey,
classScore>
422max_words =
max(max_words, (
size_t)1);
423 if(m_Data.size() <= max_words) {
432 iteratoriter = m_Data.begin() + max_words - 1;
436 for(; iter != m_Data.end() && iter->second ==
prev->second; ++iter) {
440 if(iter != m_Data.end()) {
441m_Data.erase(iter, m_Data.end());
450 template<
classKey,
classScore>
460 for( ; iter != m_Data.end(); ++iter) {
461 if(iter->second < min_score) {
468 for(; iter != m_Data.end() && iter->second ==
prev->second; ++iter) {
472m_Data.erase(iter, m_Data.end());
478 template<
classKey,
classScore>
487 template<
classKey,
classScore>
499 template<
classKey,
classScore>
505 len+= iter->second * iter->second;
511 template<
classKey,
classScore>
515 returnsqrt(Length2());
519 template<
classKey,
classScore>
523Score inv_len = Length();
525inv_len = 1.0f / inv_len;
528iter->second *= inv_len;
534 template<
classKey,
classScore>
540inv_len += iter->second;
543inv_len = 1.0f / inv_len;
546iter->second *= inv_len;
552 template<
classKey,
classScore>
558 size_tdiff = end_in - begin_in;
559 size_torig_size = m_Data.size();
560m_Data.reserve(m_Data.size() + diff);
562 boolneed_sort =
false;
563 for( ; begin_in != end_in; ++begin_in) {
564 iteratorpseudo_end = m_Data.begin() + orig_size;
566lower_bound(m_Data.begin(), pseudo_end,
568 if(iter == pseudo_end || iter->first != begin_in->first) {
569m_Data.push_back(*begin_in);
572iter->second += begin_in->second;
578std::inplace_merge(m_Data.begin(),
579m_Data.begin() + orig_size,
592 template<
classKey,
classScore>
600 for( ; iter1 != m_Data.end() && iter2 != other.
m_Data.end(); ) {
601 if(iter1->first == iter2->first) {
602iter1->second -= iter2->second;
606 if(iter1->first < iter2->first) {
609 TIdxScorep(iter2->first, -iter2->second);
610iter1 = m_Data.insert(iter1, p);
616 for( ; iter2 != other.
m_Data.end(); ++iter2) {
617 TIdxScorep(iter2->first, -iter2->second);
625 template<
classKey,
classScore>
630iter->second *=
val;
636 template<
classKey,
classScore>
645iter->second *=
val;
656 template<
classKey,
classScore>
665 template<
classKey,
classScore>
674 template<
classKey,
classScore>
683 template<
classKey,
classScore>
687 if(
this!= &other) {
689m_Uid = other.
m_Uid;
695 template<
classKey,
classScore>
701 ITERATE(
typenameTOtherVector, iter, other) {
702m_Data.insert(m_Data.end(),
value_type(iter->first, iter->second));
704m_Uid = other.
GetId();
709 template<
classKey,
classScore>
717 template<
classKey,
classScore>
725 template<
classKey,
classScore>
729 returnm_Data.size();
733 template<
classKey,
classScore>
737 returnm_Data.begin();
741 template<
classKey,
classScore>
749 template<
classKey,
classScore>
753 returnm_Data.begin();
757 template<
classKey,
classScore>
765 template<
classKey,
classScore>
770 returnm_Data.find(
key);
774 template<
classKey,
classScore>
779 returnm_Data.find(
key);
783 template<
classKey,
classScore>
788 returnm_Data.insert(hint, v);
792 template<
classKey,
classScore>
794pair<typename CScoreVector<Key, Score>::iterator,
bool>
797 returnm_Data.insert(v);
801 template<
classKey,
classScore>
810 template<
classKey,
classScore>
818 template<
classKey,
classScore>
826 template<
classKey,
classScore>
830 returnm_Data.empty();
834 template<
classKey,
classScore>
838 if(
this!= &other) {
839m_Data.swap(other.
m_Data);
845 template<
classKey,
classScore>
850 if(iter == m_Data.end()) {
858 template<
classKey,
classScore>
863 if(iter == m_Data.end()) {
871 template<
classKey,
classScore>
878magnitude += Score(iter->second) * Score(iter->second);
881magnitude = 1.0f / sqrt(magnitude);
883iter->second *= magnitude;
889 template<
classKey,
classScore>
895inv_len += iter->second;
898inv_len = 1.0f / inv_len;
901iter->second *= inv_len;
907 template<
classKey,
classScore>
925 template<
classKey,
classScore>
931 len+= iter->second * iter->second;
937 template<
classKey,
classScore>
941 returnsqrt(Length2());
945 template<
classKey,
classScore>
950iter->second *=
val;
956 template<
classKey,
classScore>
962iter->second *=
val;
968 template<
classKey,
classScore>
977 for( ; iter1 != end1 && iter2 != end2; ) {
978 if(iter1->first == iter2->first) {
979iter1->second += iter2->second;
983 if(iter1->first < iter2->first) {
986m_Data.insert(iter1, *iter2);
992 for( ; iter2 != end2; ++iter2) {
1001 template<
classKey,
classScore>
1010 for( ; iter1 != end1 && iter2 != end2; ) {
1011 if(iter1->first == iter2->first) {
1012iter1->second -= iter2->second;
1016 if(iter1->first < iter2->first) {
1020m_Data.insert(iter1,
val);
1026 for( ; iter2 != end2; ++iter2) {
1028m_Data.insert(iter1,
val);
1035 template<
classKey,
classScore>
1039 if(trim_pct < 1.0f) {
1042 typedefvector< pair<Key, Score> > TInvVector;
1050trim_pct *= Length();
1052 typenameTInvVector::iterator iter = v.begin();
1053 typenameTInvVector::iterator iter_end = v.end();
1056 for( ; iter != iter_end && sqrt(sum) < trim_pct; ++iter) {
1057sum += iter->second * iter->second;
1061 typenameTInvVector::iterator
prev= iter;
1062 if(
prev!= v.begin()) {
1065 for(; iter != iter_end &&
prev->first == iter->first; ++iter) {
1069 for( ; iter != iter_end; ++iter) {
1070Set().erase(iter->first);
1075 template<
classKey,
classScore>
1079 if(max_words < m_Data.size()) {
1082 typedefvector< pair<Key, Score> > TInvVector;
1090 typenameTInvVector::iterator iter = v.begin() + max_words;
1091 typenameTInvVector::iterator
prev= iter - 1;
1092 typenameTInvVector::iterator iter_end = v.end();
1093 for( ; iter != iter_end &&
prev->first == iter->first; ++iter) {
1097 for( ; iter != iter_end; ++iter) {
1098m_Data.erase(iter->first);
1103 template<
classKey,
classScore>
1109 for( ; iter != m_Data.end(); ) {
1110 if(iter->second < min_score) {
1111m_Data.erase(iter++);
1120 template<
classKey,
classScore>
1129 for( ; iter1 != end1 && iter2 != end2; ) {
1130 if(iter1->first == iter2->first) {
1134 if(iter1->first < iter2->first) {
1137m_Data.insert(iter1,
1145 for( ; iter2 != end2; ++iter2) {
1147m_Data.insert(iter1,
1154 template<
classKey,
classScore>
1163 for( ; iter1 != end1 && iter2 != end2; ) {
1164 if(iter1->first == iter2->first) {
1165iter1->second += iter2->second;
1169 if(iter1->first < iter2->first) {
1172m_Data.insert(iter1, *iter2);
1178 for( ; iter2 != end2; ++iter2) {
1180m_Data.insert(iter1,
1190 template<
classScoreVectorA,
classScoreVectorB>
1194 typenameScoreVectorA::score_type dot = 0;
1195 typenameScoreVectorA::score_type distance = 0;
1197vec2.begin(), vec2.end(),
1199 returndot * distance / (vec1.Length() * vec2.Length());
1203 template<
classScoreVectorA,
classScoreVectorB>
1205 float ScoreCosine(
constScoreVectorA& vec1,
constScoreVectorB& vec2)
1208vec2.begin(), vec2.end());
1218 template<
classScoreVectorA,
classScoreVectorB>
1220 float ScoreDice(
constScoreVectorA& vec_a,
constScoreVectorB& vec_b)
1222 return ncbi::Dice(vec_a.begin(), vec_a.end(),
1223vec_b.begin(), vec_b.end());
1227 template<
classScoreVectorA,
classScoreVectorB>
1232vec_b.begin(), vec_b.end());
1236 template<
classScoreVectorA,
classScoreVectorB>
1238 float ScoreDot(
constScoreVectorA& vec_a,
constScoreVectorB& vec_b)
1240 return ncbi::Dot(vec_a.begin(), vec_a.end(),
1241vec_b.begin(), vec_b.end());
1251 template<
classScoreVectorA,
classScoreVectorB>
1256vec_b.begin(), vec_b.end());
1264 template<
classScoreVectorA,
classScoreVectorB>
1269vec_b.begin(), vec_b.end());
class CRawScoreVector stores its data in a (sorted) STL vector this gives a better memory profile and...
void TrimThresh(Score min_score)
vector< TIdxScore > TVector
CRawScoreVector< Key, Score > & operator+=(const CRawScoreVector< Key, Score > &other)
void TrimLength(float trim_pct)
virtual void Swap(CRawScoreVector< Key, Score > &other)
void SortByIndex()
re-sort the vector by index.
TVector::const_iterator const_iterator
key_type GetId() const
setup functions
void TrimCount(size_t max_words)
pair< Key, Score > TIdxScore
const TVector & Get() const
void insert(const value_type &val)
void SortByScore()
force the vector to be sorted in order of descending score
iterator find(const Key &key)
key_type m_Uid
UID for this set.
void reserve(size_t size)
CRawScoreVector & operator=(const CScoreVector< Key, Score > &)
float Length2() const
math functions
TVector m_Data
the data for this document
CRawScoreVector< Key, Score > & operator/=(Score val)
void Add(Key idx, Score weight=Score(1))
TVector::iterator iterator
CRawScoreVector< Key, Score > & operator*=(Score val)
CRawScoreVector< Key, Score > & operator-=(const CRawScoreVector< Key, Score > &other)
void TrimThresh(Score min_score)
CScoreVector< Key, Score > & operator-=(const CScoreVector< Key, Score > &other)
TVector::const_iterator const_iterator
const TVector & Get() const
iterator find(const Key &key)
CScoreVector< Key, Score > & operator/=(Score val)
void TrimLength(float trim_pct)
CScoreVector & operator=(const CScoreVector< Key, Score > &other)
void SubtractMissing(const CScoreVector< Key, Score > &other)
pair< iterator, bool > insert(const value_type &val)
CScoreVector< Key, Score > & operator*=(Score val)
CScoreVector< Key, Score > & operator+=(const CScoreVector< Key, Score > &other)
TVector m_Data
the data for this document
float Length2() const
math functions
key_type m_Uid
UID for this set.
void Add(Key idx, Score weight=Score(1))
TVector::value_type value_type
TVector::iterator iterator
key_type GetId() const
setup functions
virtual void Swap(CScoreVector< Key, Score > &other)
void TrimCount(size_t max_words)
void AddScores(const CScoreVector< Key, Score > &other)
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
BEGIN_STD_SCOPE bool is_sorted(Iterator iter1, Iterator iter2)
is_sorted is provided by some implementations of the STL and may be included in future releases of al...
const TYPE & Get(const CNamedParameterList *param)
constexpr auto sort(_Init &&init)
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
Useful/utility classes and methods.
bool operator()(const pair< T, U > &it1, const pair< T, U > &it2) const
bool operator()(const pair< T, U > &it1, const pair< T, U > &it2) const
float ScoreCosine(const ScoreVectorA &vec1, const ScoreVectorB &vec2)
float ScoreDot(const ScoreVectorA &vec_a, const ScoreVectorB &vec_b)
float ScoreDistance(const ScoreVectorA &vec_a, const ScoreVectorB &vec_b)
float ScoreDice(const ScoreVectorA &vec_a, const ScoreVectorB &vec_b)
The dice coefficient is defined as.
float ScoreCombined(const ScoreVectorA &vec1, const ScoreVectorB &vec2)
float ScoreJaccard(const ScoreVectorA &vec_a, const ScoreVectorB &vec_b)
The Jaccard coefficient is defined as.
float ScoreOverlap(const ScoreVectorA &vec_a, const ScoreVectorB &vec_b)
The overlap function is a dot product weighted by the *shortest* of each term.
string InitialValue< string >(string *)
float Dice(iterator1 iter1, iterator1 end1, iterator2 iter2, iterator2 end2)
Dice coefficient.
float Distance(iterator1 iter1, iterator1 end1, iterator2 iter2, iterator2 end2)
Euclidean distance measure.
float Dot(iterator1 iter1, iterator1 end1, iterator2 iter2, iterator2 end2)
Dot-product similarity.
float Jaccard(iterator1 iter1, iterator1 end1, iterator2 iter2, iterator2 end2)
Jaccard similarity.
float Cosine(iterator1 iter1, iterator1 end1, iterator2 iter2, iterator2 end2)
Cosine similarity measure.
float Overlap(iterator1 iter1, iterator1 end1, iterator2 iter2, iterator2 end2)
Overlap measure.
void DotAndDistance(iterator1 iter1, iterator1 end1, iterator2 iter2, iterator2 end2, float *dot_in, float *dist_in)
Dot and distance in one step.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4