& message,
76 const string& fileName,
80 stringseqId = bioseq.
GetId().front()->AsFastaString();
83 " doesn't contain qualifiers for sequence id "+
93 const string& fileName,
97 stringseqId = bioseq.
GetId().front()->AsFastaString();
98ostringstream message;
100<<
"Multiple potential matches for line " 104<<
". Unable to match sequence id " 106<<
" to a previously matched entry.";
114 if(
id.
empty()) {
118 if(
id.back() ==
'|') {
127 const string& fileName,
133ostringstream message;
139<<
" of "<< fileName
140<<
" duplicates id on line " 142<<
". Skipping line " 146 sPostError(pEC, message.str(), seqId, currentLine);
152 const string& fileName,
160 " contains qualifiers for sequence id "+
162 ", but no sequence with that id was found.";
196list<string> id_strings;
197 for(
const auto& pId : bioseq.
GetId()) {
201id_strings.push_back(
id);
204id_strings.push_back(content);
205 if(pId->IsGeneral()) {
208id_strings.push_back(
tag);
211 autopTextSeqId = pId->GetTextseq_Id();
212 if(pTextSeqId && pTextSeqId->IsSetVersion()) {
213 size_tpointPos =
id.rfind(
'.');
214 if(pointPos != string::npos) {
217id_strings.push_back(versionlessId);
218id_strings.push_back(content);
240 for(
const auto&
id: id_strings) {
244 CTempString& line = linePtr ? *linePtr : it->second.line;
245 if(!line.
empty()) {
247 autolineNum = it->second.lineNum;
257 for(
const auto&
id: id_strings) {
277 if(!entry.second.line.empty()) {
278unusedLines.emplace(entry.second.lineNum, entry.second.line);
282 for(
const auto& entry : unusedLines) {
295vector<CTempString> tokens;
305 staticpair<size_t,size_t>
314 returnmake_pair<size_t,size_t>(1,1);
316 returnmake_pair<size_t,size_t>(3,3);
318 returnmake_pair<size_t,size_t>(2,2);
322 returnmake_pair<size_t,size_t>(1,3);
332 static const size_tminStubLength=2;
333 static const size_tmaxStubLength=3;
336size_type fastaLength = fastaString.
size();
337size_type currentPos=0;
338size_type idStartPos=0;
339 size_tcurrentField=0;
340 size_tcurrentMinField=0;
341 size_tcurrentMaxField=0;
343 while(currentPos < fastaLength) {
344 if(idStartPos == currentPos) {
345 autonextBarPos = fastaString.
find(
'|', currentPos);
346 if(nextBarPos ==
NPOS) {
349 const autostubLength = nextBarPos - currentPos;
350 if(stubLength<minStubLength || stubLength>maxStubLength) {
359currentMinField = numFields.first;
360currentMaxField = numFields.second;
361currentPos=nextBarPos+1;
365 _ASSERT(currentMinField <= currentMaxField);
366 if(currentField < currentMaxField) {
367 autonextBarPos = fastaString.
find(
'|', currentPos);
368 if(nextBarPos ==
NPOS) {
369 if(currentField < currentMinField-1) {
372idStrings.emplace(fastaString.
substr(idStartPos));
375 if(currentField >= currentMinField) {
376 autolength = nextBarPos-currentPos;
377 if(length>=minStubLength && length<=maxStubLength) {
382currentMinField = numFields.first;
383currentMaxField = numFields.second;
384idStartPos=currentPos;
386currentPos=nextBarPos+1;
391currentPos=nextBarPos+1;
395 _ASSERT(currentField == currentMaxField);
396idStrings.emplace(fastaString.
substr(idStartPos, (currentPos-idStartPos)-1));
397idStartPos=currentPos;
402 if(currentField < currentMinField) {
406 if(fastaString[fastaLength-1] ==
'|') {
407 if(currentField < currentMaxField) {
408 _ASSERT(currentPos == fastaLength);
409idStrings.emplace(fastaString.
substr(idStartPos, (currentPos-idStartPos)-1));
424 if(idString.
empty()) {
428 if(
count(begin(idString), end(idString),
'|')<2) {
436rval.first->second.lineNum,
446 ". Unable to parse "+ idString +
".",
453 for(
auto id: parsedIDs) {
454pair<TLineMap::iterator,bool> rval;
457rval.first->second.linePtr = linePtr;
461linePtr = &rval.first->second.line;
470rval.first->second.lineNum,
492 const char* ptr = (
const char*)
m_pFileMap->Map(0, fileSize);
493 const char* end = ptr + fileSize;
499 if(*ptr ==
'\r'|| *ptr ==
'\n') {
504 const char* start = ptr;
506 const char* endline = (
const char*)memchr(ptr,
'\n', end - ptr);
507 if(endline ==
nullptr) endline = end;
512 while(start < endline && *endline ==
'\r')
529 "source modifiers file header line is not valid");
544 for(
const auto&
mod: mods) {
559 const string& seqId,
560 const string& message,
582 for(
auto&
mod: mods) {
584 stringnew_value =
mod.GetValue();
587 mod.SetValue(new_value);
605 boolreadModsFromTitle,
634 const TMods& commandLineMods,
635 const string& commandLineRemainder,
639 boolreadModsFromTitle,
642m_CommandLineMods(commandLineMods),
643m_CommandLineRemainder(commandLineRemainder),
644m_pNamedSrcFileMap(pNamedSrcFileMap),
645m_pDefaultSrcFileMap(pDefaultSrcFileMap),
646m_pMessageListener(pMessageListener),
647m_ReadModsFromTitle(readModsFromTitle),
648m_IsVerbose(isVerbose),
649m_MergePolicy(mergePolicy)
686 stringseqId = bioseq.
GetId().front()->AsFastaString();
716pDescriptors = &(bioseq.
SetDescr().Set());
719CSeq_descr::Tdata::iterator title_it;
722find_if(pDescriptors->begin(), pDescriptors->end(),
724 if(title_it != pDescriptors->end()) {
725pTitleDesc = *title_it;
727 auto& title = (*title_it)->
SetTitle();
728 stringtitleRemainder;
737remainder = titleRemainder + remainder;
748 if(!remainder.empty()) {
751pDescriptors->push_back(pTitleDesc);
752pTitleDesc->
SetTitle() = remainder;
755 stringcurrent_title =
759pTitleDesc->
SetTitle() = current_title.empty() ?
761current_title +
" "+ remainder;
766 if(title_it != pDescriptors->end() &&
767(*title_it)->GetTitle().empty()) {
768pDescriptors->erase(title_it);
771 if(pDescriptors->empty()) {
781 const string& commandLineStr,
782 boolreadModsFromTitle,
791 stringcommandLineRemainder;
792TMods commandLineMods;
799 autofReportCommandLineError =
805TModList rejectedMods;
810fReportCommandLineError);
812commandLineMods = mod_handler.
GetMods();
816commandLineRemainder,
826 if(isVerbose && pDefaultSrcFileMap) {
const string m_CommandLineRemainder
CApplyMods(const TMods &commandLineMods, const string &m_CommandLineRemainder, CMemorySrcFileMap *pNamedSrcFileMap, CMemorySrcFileMap *pDefaultSrcFileMap, ILineErrorListener *pMessageListener, bool readModsFromTitle, bool isVerbose, TMergePolicy mergePolicy=CModHandler::ePreserve)
TMergePolicy m_MergePolicy
void x_GetModsFromFileMap(CMemorySrcFileMap &fileMap, const CBioseq &bioseq, CModHandler::FReportError fReportError, CModHandler &mod_handler, string &remainder)
CModHandler::TModList TModList
CMemorySrcFileMap * m_pDefaultSrcFileMap
void operator()(CBioseq &bioseq)
CMemorySrcFileMap * m_pNamedSrcFileMap
ILineErrorListener * m_pMessageListener
static CLineErrorEx * Create(EProblem eProblem, EDiagSev eSeverity, int code, int subcode, const std::string &strSeqId, unsigned int uLine, const std::string &strErrorMessage=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
ILineErrorListener * m_pEC
void MapFile(const string &fileName, bool allowAcc)
unordered_map< string, size_t > m_ProcessedIdsToLineNum
unique_ptr< CMemoryFileMap > m_pFileMap
void x_ProcessLine(const CTempString &line, TModList &mods)
void x_RegisterLine(size_t lineNum, const CTempString &line, bool allowAcc)
CModHandler::TModList TModList
bool GetMods(const CBioseq &bioseq, TModList &mods, bool isVerbose)
vector< CTempString > m_ColumnNames
static void Apply(const CModHandler &mod_handler, CBioseq &bioseq, TSkippedMods &skipped_mods, FPostMessage fPostMessage=nullptr)
void SetExcludedMods(const vector< string > &excluded_mods)
function< void(const CModData &mod, const string &message, EDiagSev severity, EModSubcode subcode)> FReportError
static const string & GetCanonicalName(const TModEntry &mod_entry)
map< string, list< CModData > > TMods
const TMods & GetMods(void) const
list< CModData > TModList
void AddMods(const TModList &mods, EHandleExisting handle_existing, TModList &rejected_mods, FReportError fReportError=nullptr)
void SetMods(const TMods &mods)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
static void Apply(const CTempString &title, TModList &mods, string &remainder)
virtual bool PutError(const ILineError &)=0
Store error in the container, and return true if error was stored fine, and return false if the calle...
@ eProblem_GeneralParsingError
const_iterator end() const
const_iterator find(const key_type &key) const
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
static E_Choice WhichInverseSeqId(const CTempString &SeqIdCode)
Converts a string to a choice, no need to require a member.
@ eFasta
Tagged ID in NCBI's traditional FASTA style.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
void clear(void)
Clears the string.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
size_type size(void) const
Return the length of the represented array.
static string & ToLower(string &str)
Convert string to lower case â string& version.
@ eTrunc_End
Truncate trailing whitespace only.
E_Choice
Choice variants.
@ e_Gibbmt
Geninfo backbone moltype.
@ e_Giim
Geninfo import id.
@ e_Gibbsq
Geninfo backbone seqid.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
@ e_not_set
No variant selected.
list< CRef< CSeqdesc > > Tdata
void ResetDescr(void)
Reset Descr data member.
TTitle & SetTitle(void)
Select the variant.
const TTitle & GetTitle(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
constexpr bool empty(list< Ts... >) noexcept
void VisitAllBioseqs(objects::CSeq_entry &entry, _M &&m)
const GenericPointer< typename T::ValueType > T2 value
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static void s_PostProcessID(string &id)
static void sReportMissingMods(ILineErrorListener *pEC, const string &fileName, const CBioseq &bioseq)
static void sReportDuplicateIds(ILineErrorListener *pEC, const string &fileName, size_t currentLine, size_t previousLine, const CTempString &seqId)
static void s_PreprocessNoteMods(CModHandler::TModList &mods)
static void sReportMultipleMatches(ILineErrorListener *pEC, const string &fileName, size_t lineNum, const CBioseq &bioseq)
static bool s_ParseFastaIdString(const CTempString &fastaString, set< CTempString, PNocase_Generic< CTempString >> &idStrings)
static void sReportError(ILineErrorListener *pEC, EDiagSev severity, int subcode, const string &seqId, const string &message, ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)
void g_ApplyMods(CMemorySrcFileMap *pNamedSrcFileMap, CMemorySrcFileMap *pDefaultSrcFileMap, const string &commandLineStr, bool readModsFromTitle, bool isVerbose, CModHandler::EHandleExisting mergePolicy, ILineErrorListener *pEC, CSeq_entry &entry)
static void sReportUnusedMods(ILineErrorListener *pEC, const string &fileName, size_t lineNum, const CTempString &seqId)
static pair< size_t, size_t > s_IdTypeToNumFields(CSeq_id::E_Choice choice)
static void s_AppendMods(const CModHandler::TModList &mods, string &title)
static void sPostError(ILineErrorListener *pEC, const string &message, const CTempString &seqId, size_t lineNum=0)
Define Case-insensitive string comparison methods.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4