(
charc : initials) {
82 if(c !=
' '&& c !=
'.'&& c !=
',') {
94 if(
names.IsStd()) {
96 if(auth->IsSetName()) {
97 stringcur_auth, cur_initials;
102cur_auth = std_name.
GetLast();
108}
else if(person.
IsStr()) {
109cur_auth = person.
GetStr();
110}
else if(person.
IsMl()) {
111cur_auth = person.
GetMl();
114 if(! cur_auth.empty()) {
115 if(! cur_initials.empty()) {
116cur_auth +=
' '+ cur_initials;
118authors.push_back(cur_auth);
123 for(
const string& auth :
names.IsStr() ?
names.GetStr() :
names.GetMl()) {
124 if(! auth.empty()) {
125authors.push_back(auth);
141m_title_words_set(
false),
142m_full_title_set(
false)
158 if(! m_title_words_set) {
160m_title_words_set =
true;
167 if(! m_full_title_set) {
169m_full_title_set =
true;
208m_title_words_set =
false;
209m_full_title_set =
false;
211m_full_title.clear();
212m_titlewords.clear();
229m_date->Assign(date);
235 returnm_date.NotEmpty();
240 if(IsSetDate() && m_date->IsSetYear()) {
241 returnm_date->GetYear();
248 if(IsSetDate() && m_date->IsSetMonth()) {
249 returnm_date->GetMonth();
256m_seq_ids.insert(seq_id);
305 if(! m_full_title.empty() && m_full_title.front() ==
'['&& m_full_title.back() ==
']') {
306m_full_title = m_full_title.substr(1, m_full_title.size() - 2);
310 if(m_full_title.back() ==
'.')
311m_full_title.pop_back();
320m_max_date_check(max_date_check),
361 for(
const auto& cur_title : cit.
GetTitle().
Get()) {
362 if(cur_title->IsName()) {
363 data.SetTitle(cur_title->GetName());
411 autopred = [](
charc) {
returnc ==
'-'; };
413second.erase(
remove_if(second.begin(), second.end(), pred), second.end());
419 size_tspace_pos_first =
first.find(
' ');
420 if(space_pos_first != string::npos && space_pos_first + 2 <
first.size()) {
421 first.resize(space_pos_first + 3);
424 size_tspace_pos_second = second.find(
' ');
425 if(space_pos_second != string::npos && space_pos_second + 2 < second.size()) {
426second.resize(space_pos_second + 3);
433 if(space_pos_first != string::npos && space_pos_first + 1 <
first.size()) {
434 first.resize(space_pos_first + 2);
436 if(space_pos_second != string::npos && space_pos_second + 1 < second.size()) {
437second.resize(space_pos_second + 2);
444 if(space_pos_first != string::npos) {
445 first.resize(space_pos_first);
447 if(space_pos_second != string::npos) {
448second.resize(space_pos_second);
460 if(
first.size() != second.size()) {
464 autofirst_it =
first.begin(),
465second_it = second.begin();
468 for(; ret !=
eNoMatch&& first_it !=
first.end(); ++first_it, ++second_it) {
485 boolneed_to_add =
true;
486 for(
const auto& cur_pub :
m_pubs) {
492 if(! cur_seq_id.empty()) {
493cur_pub->AddSeqId(cur_seq_id);
498need_to_add =
false;
506 if(cur_seq_id.empty()) {
509 data->AddSeqId(cur_seq_id);
512 if(!
data->IsSetDate()) {
528 size_tspace = author.rfind(
' ');
529 if(space == string::npos) {
532name = author.substr(0, space + 1);
533 if(space + 1 < author.size()) {
534name += author[space + 1];
549}
else if(person.
IsMl()) {
550name = person.
GetMl();
551}
else if(person.
IsStr()) {
561 if(authors.size()) {
565 if(authors.size() > 1) {
570 if(pubmed_authors.
IsStd()) {
571 for(
const auto& auth : pubmed_authors.
GetStd()) {
572 if(auth->IsSetName()) {
584 constlist<string>&
names= pubmed_authors.
IsMl() ? pubmed_authors.
GetMl() : pubmed_authors.
GetStr();
585 for(
const string& name :
names) {
588 if(cur_name == first_author || cur_name == last_author) {
604 for(
const auto& xref : medline_entry.
GetXref()) {
605 if(xref->IsSetCit()) {
606 if(seq_ids.
find(xref->GetCit()) != seq_ids.
end()) {
632date_before.
SetYear(year - 1);
636date_after.
SetYear(year + max_date_check);
640after = date_after.
Compare(std_pub_date);
653vector<string>
query;
656 for(
const string& w :
data.GetTitleWords()) {
661 for(
const string& author :
data.GetAuthors()) {
662list<CTempString>
names;
664 if(!
names.empty()) {
669vector<TEntrezId> uids;
672edit::CEUtilsUpdater::DoPubSearch(
query, uids);
678 if(uids.size() == 1) {
687 static const stringBASE_URL =
"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?tool=pub_report&versions=no&format=xml&ids=PMC";
688 static const size_tBUF_SIZE = 1024;
694 for(
intattempt = 1; attempt <= 5; attempt++) {
700vector<char>
buf(BUF_SIZE);
701 while(! https.fail()) {
702https.read(&
buf[0], BUF_SIZE);
703 result.append(&
buf[0], https.gcount());
707 if(
result.find(
"status = \"error\"") == string::npos &&
result.find(
"<errmsg>") == string::npos) {
708 static const charpmid_start[] =
"pmid=\"";
709 size_tpmid_pos =
result.find(pmid_start);
710 if(pmid_pos != string::npos) {
729 for(
char& c : s) {
751vector<TEntrezId> uids;
755 if(uids.size() == 1) {
767 stringtitle =
data.GetTitle();
769 stringterm = title +
"[title]";
773 if(! term.empty()) {
797eutils::CPubmedArticleSet pas;
800vector<TEntrezId> uids { pmid };
803eutils.
Fetch(
"PubMed", uids, xml_stream);
809 ERR_POST(
Warning<<
"failed while fetching data from PubMed: "<< e);
813 const auto& pp = pas.GetPP().GetPP();
815 const auto& ppf = *pp.front();
816 if(ppf.IsPubmedArticle()) {
817 consteutils::CPubmedArticle& article = ppf.GetPubmedArticle();
818pubmed_entry.
Reset(article.ToPubmed_entry());
819}
else if(ppf.IsPubmedBookArticle()) {
820 consteutils::CPubmedBookArticle& article = ppf.GetPubmedBookArticle();
821pubmed_entry.
Reset(article.ToPubmed_entry());
825 if(pubmed_entry && pubmed_entry->IsSetMedent() && pubmed_entry->GetMedent().IsSetCit()) {
826 const CCit_art& cit_art = pubmed_entry->GetMedent().GetCit();
846 for(
const string& cur_author : auths) {
853 if(cur_cmp_res < res) {
863 for(
const string&
id: ids) {
864 out<<
"SEQID |"<<
id<<
"|\t";
869 "AUTH_MISMATCH",
"LAST_NAMES",
"ONE_INIT",
"TWO_INITS",
"NO_HYPHENS",
"FULL_NAMES" 883 for(
const string& author : auths) {
890 if(cur_match < best_match) {
891best_match = cur_match;
895 boolboth_ok =
true;
897 size_tpubmed_size = pubmed_auths.size(),
898cur_size = auths.size();
901 if(! auths.empty() && matches == cur_size) {
902 if(cur_size < 3 && pubmed_size > 4) {
903 out<<
"AUTHORS_QUESTIONABLE ["<< result_str <<
"] "<< cur_size <<
" -> "<< pubmed_size <<
'\t';
905}
else if(cur_size < pubmed_size) {
906 out<<
"AUTHORS_ADDED ["<< result_str <<
"] "<< pubmed_size - cur_size <<
'\t';
908 out<<
"AUTHORS_REORDERED ["<< result_str <<
"]\t";
911 out<<
"AUTHORS_CHANGED ["<< result_str <<
"] "<< matches <<
" / "<< pubmed_size <<
'\t';
921 for(
const string& word : title_words) {
927 boolboth_ok =
true;
929 size_tpubmed_size = pubmed_title_words.size(),
930cur_size = title_words.size();
932 if(cur_size < 3 && pubmed_size > 4) {
933 out<<
"TITLE_QUESTIONABLE "<< cur_size <<
" -> "<< pubmed_size <<
'\t';
935}
else if(pubmed_size && cur_size &&
NStr::EqualNocase(pubmed_title_words.front(), title_words.front()) &&
936matches == pubmed_size) {
937 out<<
"TITLE_SAME [SIMILAR] "<< matches <<
'\t';
938}
else if(pubmed_size && matches == pubmed_size) {
939 out<<
"TITLE_ALTERED "<< matches <<
'\t';
942 out<<
"TITLE_DIFFERS "<< matches <<
" / "<< pubmed_size <<
'\t';
952 if(! auths.empty()) {
954 out<< auths.front();
955 autoauth = auths.begin();
956 for(++auth; auth != auths.end(); ++auth)
957 out<<
", "<< *auth;
966 if(!
data.GetFullTitle().empty()) {
970 constlist<string>& words =
data.GetTitleWords();
971 if(! words.empty()) {
972 out<< words.front();
973 autoword = words.begin();
974 for(++word; word != words.end(); ++word)
984 intyear =
data.GetYear();
986 if(
data.GetJournal().empty()) {
988 out<<
"Unpublished";
990 out<<
" ["<< year <<
']';
996 out<<
" ["<< year <<
']';
999 if(!
data.GetVolume().empty()) {
1000 out<<
' '<<
data.GetVolume();
1003 if(!
data.GetPages().empty()) {
1004 out<<
" : "<<
data.GetPages();
1022 if(
data.GetUnique().empty()) {
1025 out<<
"UNIQ_CIT "<<
data.GetUnique() <<
'\t';
1028 boolboth_ok =
true;
1029 if(authors_cmp_res ==
eNoMatch) {
1036 out<<
"TITLE_SAME [IDENTICAL]\t";
1042 out<< (both_ok ?
"PROBABLE\t":
"POSSIBLE\t");
1061 m_out<<
"Trying "<<
m_pubs.size() <<
" Entrez Queries\n\n";
1062 for(
const auto& pub :
m_pubs) {
1066 if(
FetchPub(pmid, *pub, pubmed_entry)) {
1067 NCBI_ASSERT(pubmed_entry->IsSetMedent() && pubmed_entry->GetMedent().IsSetCit(),
1068 "MedEntry and MedEntry.Cit should be present at this point");
1079pub_need_id->AddSeqId(name);
1091 if(! pub->IsSetDate()) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
@Auth_list.hpp User-defined methods of the data storage class.
This stream exchanges data with an HTTP server located at the URL: http[s]://host[:port]/path[?...
CDate::ECompare Compare(const CDate_std &date) const
Indicate how *this relates to another date.
ECompare
How *this relates to another date.
@ eCompare_before
*this comes first.
@ eCompare_same
They're equivalent.
@ eCompare_after
*this comes second.
Class for querying via E-Utils.
void Fetch(const string &db, const vector< objects::CSeq_id_Handle > &uids, CNcbiOstream &ostr, const string &retmode="xml")
Uint8 Search(const string &db, const string &term, vector< objects::CSeq_id_Handle > &uids, const string &xml_path=kEmptyStr)
@Name_std.hpp User-defined methods of the data storage class.
bool GetLabel(string *label, ELabelType type=eContent, TLabelFlags flags=0, ELabelVersion version=eLabel_DefaultVersion) const
Concatenate a label for this pub to label.
@ fLabel_Unique
Append a unique tag [V1].
const std::string & GetCurrentSeqId() const
virtual void SetCurrentSeqId(const std::string &name)
const string & GetPages() const
void SetDate(const CDate_std &date)
void SetAuthors(const CAuth_list &auth_list)
const string & GetJournal() const
const TSeqIds & GetSeqIds() const
const string & GetFullTitle() const
const string & GetTitle() const
void SetTitle(const string &title)
void SetUnique(const string &unique)
const string & GetVolume() const
const list< string > & GetAuthors() const
void CreateFullTitle() const
void SetPages(const string &pages)
void AddSeqId(const string &seq_id)
void SetJournal(const string &journal)
void SetVolume(const string &volume)
const list< string > & GetTitleWords() const
const string & GetUnique() const
list< string > m_titlewords
void CreateTitleWords() const
ncbi::CNcbiOstream & m_out
const CDate_std & GetDate() const
TEntrezId RetrievePMid(const CPubData &data) const
void SetCurrentSeqId(const std::string &name) override
void CompleteReport() override
void ClearData() override
CUnpublishedReport(ncbi::CNcbiOstream &out, int max_date_check, bool nohydra)
void ReportUnpublished(const CPub &pub)
bool FetchPub(TEntrezId pmid, const CPubData &data, CRef< CPubmed_entry > &pubmed_entry) const
shared_ptr< CEutilsClient > m_eutils
CEutilsClient & GetEUtils() const
void SetDate(const CDate_std &date)
const_iterator find(const key_type &key) const
const_iterator end() const
The NCBI C++ standard methods for dealing with std::string.
std::ofstream out("events_result.xml")
main entry point for tests
static const struct name_t names[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
#define ENTREZ_ID_TO(T, entrez_id)
#define ENTREZ_ID_FROM(T, value)
#define NCBI_ASSERT(expr, mess)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Warning(CExceptionArgs_Base &args)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
static const char label[]
bool IsSetVolume(void) const
Check if a value has been assigned to Volume data member.
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
bool IsSetAuthors(void) const
Check if a value has been assigned to Authors data member.
bool IsSetAuthors(void) const
authors (ANSI requires) Check if a value has been assigned to Authors data member.
const TJournal & GetJournal(void) const
Get the variant data.
bool IsSetTitle(void) const
title of journal Check if a value has been assigned to Title data member.
const TVolume & GetVolume(void) const
Get the Volume member data.
const TPages & GetPages(void) const
Get the Pages member data.
const TFrom & GetFrom(void) const
Get the From member data.
const TAuthors & GetAuthors(void) const
Get the Authors member data.
const TDate & GetDate(void) const
Get the Date member data.
bool IsSetTitle(void) const
title of paper (ANSI requires) Check if a value has been assigned to Title data member.
const TTitle & GetTitle(void) const
Get the Title member data.
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
bool IsSetImp(void) const
Check if a value has been assigned to Imp data member.
const TTitle & GetTitle(void) const
Get the Title member data.
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
const TJournal & GetJournal(void) const
Get the Journal member data.
bool IsSetTitle(void) const
eg.
bool IsSetJournal(void) const
Check if a value has been assigned to Journal data member.
bool IsSetDate(void) const
date of publication Check if a value has been assigned to Date data member.
const TStr & GetStr(void) const
Get the variant data.
bool IsSet(void) const
Check if a value has been assigned to data member.
const TImp & GetImp(void) const
Get the Imp member data.
bool IsJournal(void) const
Check if variant Journal is selected.
const TNames & GetNames(void) const
Get the Names member data.
bool IsMl(void) const
Check if variant Ml is selected.
const TStd & GetStd(void) const
Get the variant data.
const TDate & GetDate(void) const
Get the Date member data.
const TAuthors & GetAuthors(void) const
Get the Authors member data.
const TTitle & GetTitle(void) const
Get the Title member data.
const TMl & GetMl(void) const
Get the variant data.
bool IsSetPages(void) const
Check if a value has been assigned to Pages data member.
const Tdata & Get(void) const
Get the member data.
bool IsStd(void) const
Check if variant Std is selected.
const TStr & GetStr(void) const
Get the variant data.
bool IsMl(void) const
Check if variant Ml is selected.
void SetYear(TYear value)
Assign a value to Year data member.
bool IsSetYear(void) const
full year (including 1900) Check if a value has been assigned to Year data member.
bool IsStd(void) const
Check if variant Std is selected.
const TInitials & GetInitials(void) const
Get the Initials member data.
void SetMonth(TMonth value)
Assign a value to Month data member.
bool IsName(void) const
Check if variant Name is selected.
bool IsSetInitials(void) const
first + middle initials Check if a value has been assigned to Initials data member.
const TMl & GetMl(void) const
Get the variant data.
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetMonth(void) const
month (1-12) Check if a value has been assigned to Month data member.
const TLast & GetLast(void) const
Get the Last member data.
const TName & GetName(void) const
Get the variant data.
const TStd & GetStd(void) const
Get the variant data.
bool IsSetXref(void) const
Check if a value has been assigned to Xref data member.
const TXref & GetXref(void) const
Get the Xref member data.
const TArticle & GetArticle(void) const
Get the variant data.
const TGen & GetGen(void) const
Get the variant data.
bool IsArticle(void) const
Check if variant Article is selected.
bool IsGen(void) const
Check if variant Gen is selected.
use only n Cassandra database for the lookups</td > n</tr > n< tr > n< td > yes</td > n< td > do not use tables BIOSEQ_INFO and BLOB_PROP in the Cassandra database
static void ReportJournal(CNcbiOstream &out, const char *prefix, const CPubData &data)
static void ProcessInitials(string &initials)
static void CollectData(const CPub &pub, CPubData &data)
static void ReportTitle(CNcbiOstream &out, const char *prefix, const CPubData &data)
static TEntrezId ConvertPMCtoPMID(TEntrezId pmc)
static void GetOneInitialAuthorName(const string &author, string &name)
static void CollectDataArt(const CCit_art &cit, CPubData &data)
static string authors_cmp_result_label[]
static void NormalizeTitle(string &s)
static void GetAuthorsFromList(list< string > &authors, const CAuth_list &auth_list)
static TEntrezId DoHydraSearch(const CPubData &data)
static void CollectDataGen(const CCit_gen &cit, CPubData &data)
static bool FirstOrLastAuthorMatches(const list< string > &authors, const CAuth_list::C_Names &pubmed_authors)
static AuthorNameMatch CompareAuthorNames(string first, string second)
static void ReportSeqIds(CNcbiOstream &out, const CPubData::TSeqIds &ids)
static bool ReportTitleDiff(CNcbiOstream &out, const list< string > &pubmed_title_words, const list< string > &title_words)
static void ReportAuththors(CNcbiOstream &out, const char *prefix, const list< string > &auths)
string GetBestTitle(const CTitle &titles)
static AuthorNameMatch CompareAuthors(const list< string > &first, const list< string > &second)
static TEntrezId DoEUtilsSearch(CEutilsClient &eutils, const string &database, const string &term)
static string GetAuthorsCmpResultStr(AuthorNameMatch res)
static void GetNameFromStdName(const CPerson_id &person, string &name)
static void ReportOnePub(CNcbiOstream &out, const CCit_art &pubmed_cit_art, const CPubData &data, TEntrezId pmid)
static AuthorNameMatch IsAuthorInList(const list< string > &auths, const string &author)
static bool ReportAuthorDiff(CNcbiOstream &out, const list< string > &pubmed_auths, const list< string > &auths)
static bool CheckRefs(const CMedline_entry &medline_entry, const CPubData::TSeqIds &seq_ids)
static bool CheckDate(int year, int month, int max_date_check, const CCit_jour &juornal)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4