(
autoparentClass = rec.m_parent_set->m_class;
82 for(
autodescr : rec.m_descr->Get()) {
93 structTHugeFileWriteContext {
95 boolis_fasta =
false;
99atomic_bool m_PubLookupDone{
false};
100std::mutex m_cleanup_mutex;
102std::function<CRef<CSeq_entry>()> next_entry;
106 if(
source->IsMultiSequence() || (is_fasta && handle_as_set)) {
113 if(!
source->IsMultiSequence()) {
117m_topentry = next_entry();
128temp_submit->
Assign(*m_submit);
131temp_top = temp_submit->
SetData().SetEntrys().front();
134temp_submit->
SetData().SetEntrys().clear();
135temp_submit->
SetData().SetEntrys().push_back(entry);
137}
else if(m_topentry->
IsSet()) {
139temp_top->
Assign(*m_topentry);
148 template<
typename_token>
149 classCFlatFileAsyncWriter
153CFlatFileAsyncWriter()
155m_multi_writer.SetMaxWriters(20);
157~CFlatFileAsyncWriter() {}
159 usingTFFFunction = std::function<void(
TToken&, std::ostream&)>;
161 voidPost(
TAsyncToken& iotoken, TFFFunction ff_func)
163 auto output= m_multi_writer.NewStream();
167ff_func(token, ostr);
168}
catch(
conststd::exception& e) {
169std::cerr << e.what();
171std::cerr <<
"unknown exception\n";
179 auto Write(std::ostream& o_stream)
181m_multi_writer.Open(o_stream);
191 for(
size_tsev = 0; sev < current.size(); ++sev) {
192total[sev].total += current[sev].total;
194 const auto& individual = current[sev].individual;
195 for(
autoit = begin(individual); it != end(individual); ++it) {
196 auto code= it->first;
197 auto count= it->second;
209THugeFileWriteContext
context;
217 context.source = &fasta_reader;
221 context.source = &fasta_reader;
229 "Template file descriptors are ignored if input is ASN.1");
231*unique_ptr<CLineError>(
243 boolfirstEntryInBlob =
true;
250 autoentry =
context.source->GetNextSeqEntry();
252 if(firstEntryInBlob) {
255 if(entry->
IsSeq()) {
261firstEntryInBlob =
false;
283 while(
context.source->GetNextBlob()) {
284firstEntryInBlob =
true;
286 context.asn_reader.FlattenGenbankSet();
288 m_secret_files->m_feature_table_reader->m_local_id_counter =
context.asn_reader.GetMaxLocalId() + 1;
292future<void> validator_writetask;
303 if(
context.is_fasta && descrs) {
305 if(
context.m_topentry->IsSet() &&
306 context.m_topentry->GetSet().IsSetSeq_set() &&
307!
context.m_topentry->GetSet().GetSeq_set().empty()) {
308top_set =
context.m_topentry->SetSet().SetSeq_set().front();
315 autoprocess_async = [&hugeFile,
this](
TAsyncToken& token) {
320 automake_ff_async = [
this, &ff_mutex](
TAsyncToken& token, std::ostream& ostr) {
338 if(
context.m_topentry->IsSet())
343 if(
context.source->GetSubmitBlock()) {
345 context.m_submit->SetSub().Assign(*
context.source->GetSubmitBlock());
347 context.m_submit->SetData().SetEntrys().clear();
348 context.m_submit->SetData().SetEntrys().push_back(
context.m_topentry);
353unique_ptr<CObjectOStream> ostr{
358TWriter async_writer(ostr.get());
359async_writer.SetDepth(10);
361TWriter::TProcessFunction ff_chain_func;
371 boolneed_update_date = !
context.is_fasta && s_AddUpdateDescriptor(
context.asn_reader);
374 if(numThreads >= 3) {
375std::ofstream ff_file;
376ff_file.exceptions(ios::failbit | ios::badbit);
378CFlatFileAsyncWriter<TAsyncToken> ff_writer;
381ff_chain_func = [&ff_writer, make_ff_async](
TAsyncToken& token) {
382ff_writer.Post(token, make_ff_async);
384ff_writer.Write(ff_file);
386async_writer.WriteAsyncMT(topobject, make_next_token, process_async, ff_chain_func);
388}
else if(numThreads == 2) {
389async_writer.WriteAsync2T(topobject, make_next_token, process_async, ff_chain_func);
391async_writer.WriteAsyncST(topobject, make_next_token, process_async, ff_chain_func);
399process_async(token);
400async_writer.Write(topobject);
402ff_chain_func(token);
408validator_writetask.wait();
416 for(
const auto& pErrItem : postponed) {
417 const autosev = pErrItem->GetSev();
418valStats[sev].total++;
419valStats[sev].individual[pErrItem->GetErrIndex()]++;
422 for(
const auto& pErrItem : postponed) {
428s_AppendValStats(
stats, valStats);
433 size_ttotalErrors{ 0 };
434 for(
autostatsEntry : valStats) {
435totalErrors += statsEntry.total;
437 if(totalErrors > 0) {
439statsStream.exceptions(ios::failbit | ios::badbit);
449 auto& submit = token.
submit;
451 auto& scope = token.
scope;
452 auto& seh = token.
seh;
455scope->AddDefaults();
479 const boolreadModsFromTitle =
491 m_secret_files->m_feature_table_reader->MergeCDSFeatures(*entry, token);
494 m_secret_files->m_feature_table_reader->MoveProteinSpecificFeats(*entry);
515 m_secret_files->m_feature_table_reader->MakeGapsFromFeatures(*entry);
523seh = scope->AddTopLevelSeqEntry(*entry);
549 m_secret_files->m_feature_table_reader->ChangeDeltaProteinToRawProtein(*entry);
566 automodMergePolicy =
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static bool ConvertPubFeatsToPubDescs(CSeq_entry_Handle seh)
Convert full-length publication features to publication descriptors.
EFormat
The formats are checked in the same order as declared here.
@ eBinaryASN
Binary ASN.1.
@ eGff3
GFF3, CGff3Reader.
@ eFasta
FASTA format sequence record, CFastaReader.
@ eAlignment
Text alignment.
void ConvertNs2Gaps(CSeq_entry &entry)
auto & GetTopEntry() const
auto & GetBioseqs() const
void Open(objects::edit::CHugeFile *file, objects::ILineErrorListener *pMessageListener) override
std::unique_ptr< std::istream > m_stream
CFormatGuess::EFormat m_format
static CLineError * Create(EProblem eProblem, EDiagSev eSeverity, const std::string &strSeqId, unsigned int uLine, const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const std::string &strErrorMessage=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
namespace ncbi::objects::
CRef< objects::CObjectManager > m_ObjMgr
void SetSeqId(objects::CSeq_entry &entry) const
void ApplyFileTracks(objects::CSeq_entry &entry) const
bool m_RemoteTaxonomyLookup
bool m_binary_asn1_output
unique_ptr< objects::edit::CRemoteUpdater > m_remote_updater
void ApplyAccession(objects::CSeq_entry &entry) const
objects::ILineErrorListener * m_logger
TSeqPos m_gap_Unknown_length
objects::CGapsEditor::TEvidenceSet m_DefaultEvidence
CRef< CSerialObject > CreateSubmitFromTemplate(CRef< objects::CSeq_entry > &object, CRef< objects::CSeq_submit > &submit) const
CRef< objects::CSeq_entry > m_entry_template
void CorrectCollectionDates(objects::CSeq_entry &entry) const
optional< size_t > m_use_threads
string GenerateOutputFilename(eFiles kind, string_view basename=kEmptyStr) const
unique_ptr< CValidMessageHandler > pValMsgHandler
void RenameProteinIdsQuals(objects::CSeq_feat &feature) const
void ApplyComments(objects::CSeq_entry &entry) const
objects::CGapsEditor::TCountToEvidenceMap m_GapsizeToEvidence
std::ostream & GetOstream(eFiles suffix)
bool ApplyCreateUpdateDates(objects::CSeq_entry &entry) const
void RemoveProteinIdsQuals(objects::CSeq_feat &feature) const
static void MergeSeqDescr(objects::CSeq_entry &dest, const objects::CSeq_descr &src, bool only_set)
objects::CBioseq_set::TClass m_ClassValue
static void UpdateTaxonFromTable(objects::CBioseq &bioseq)
const TGlobalInfo & GetGlobalInfo()
void UpdateECNumbers(objects::CSeq_entry &entry)
void Cleanup(CRef< objects::CSeq_submit > submit, objects::CSeq_entry_Handle &entry, const string &flags) const
void CollectDiscrepancies(CRef< objects::CSeq_submit > submit, objects::CSeq_entry_Handle &entry)
void Validate(CRef< objects::CSeq_submit > pSubmit, CRef< objects::CSeq_entry > pEntry, CValidMessageHandler &msgHandler)
shared_ptr< TValidatorContext > GetContextPtr()
CRef< CTable2AsnValidator > m_validator
void ProcessHugeFile(edit::CHugeFile &hugeFile, CNcbiOstream *output)
CTable2AsnContext m_context
void AddAnnots(CSeq_entry &entry)
void ProcessSecretFiles2Phase(CSeq_entry &result) const
TAdditionalFiles m_global_files
void ProcessSecretFiles1Phase(bool readModsFromTitle, TAsyncToken &)
void MakeFlatFile(CSeq_entry_Handle seh, CRef< CSeq_submit > submit, std::ostream &ostream)
void ProcessSingleEntry(CFormatGuess::EFormat inputFormat, TAsyncToken &token)
CMultiReader::TAnnots TAnnots
unique_ptr< CMultiReader > m_reader
CRef< CTable2AsnLogger > m_logger
void ProcessTopEntry(CFormatGuess::EFormat inputFormat, bool need_update_date, CRef< CSeq_submit > &submit, CRef< CSeq_entry > &entry)
unique_ptr< TAdditionalFiles > m_secret_files
static SQLCHAR output[256]
static const char * validate(DSNINFO *di)
Go looking for trouble.
@ eDiag_Warning
Warning message.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void Write(CObjectOStream &out, TConstObjectPtr object, const CTypeRef &type)
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
const TSeq & GetSeq(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSet(void) const
Check if variant Set is selected.
void SetClass(TClass value)
Assign a value to Class data member.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_genbank
converted genbank
@ e_Create_date
date entry first created/released
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
const TEntrys & GetEntrys(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
bool IsEntrys(void) const
Check if variant Entrys is selected.
void g_PostprocessErrors(const CHugeFileValidator::TGlobalInfo &globalInfo, const string &genbankSetId, CRef< CValidError > &pErrors)
void VisitAllFeatures(objects::CSeq_entry_EditHandle &entry_h, _M m)
void VisitAllBioseqs(objects::CSeq_entry &entry, _M &&m)
const CharType(& source)[N]
void SortSeqDescr(CSeq_entry &entry)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
void g_ApplyMods(CMemorySrcFileMap *pNamedSrcFileMap, CMemorySrcFileMap *pDefaultSrcFileMap, const string &commandLineStr, bool readModsFromTitle, bool isVerbose, CModHandler::EHandleExisting mergePolicy, ILineErrorListener *pEC, CSeq_entry &entry)
CRef< CSeq_descr > m_descriptors
unique_ptr< IIndexedFeatureReader > m_indexed_annots
unique_ptr< CMemorySrcFileMap > mp_src_qual_map
CRef< CSeq_submit > submit
atomic_bool * pPubLookupDone
CRef< CSeq_entry > top_entry
std::mutex * cleanup_mutex
void g_FormatErrItem(const CValidErrItem &item, CNcbiOstream &ostr)
void g_FormatValStats(const TValStats &stats, size_t total, CNcbiOstream &ostr)
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
static CS_CONTEXT * context
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4