A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/gene__model_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/sequence/gene_model.cpp Source File

124  "annotated by transcript or proteomic data"

;

126  "The sequence of the model RefSeq transcript was modified relative " 127  "to this genomic sequence to represent the inferred CDS"

;

129  "The sequence of the model RefSeq protein was modified relative " 130  "to this genomic sequence to represent the inferred CDS"

;

156  for

( ; desc_iter; ++desc_iter) {

169

, m_intron_stitch_threshold_flags(fBoth)

170

, m_min_intron(kDefaultMinIntron)

171

, m_allowed_unaligned(kDefaultAllowedUnaligned)

172

, m_is_gnomon(

false

)

173

, m_is_best_refseq(

false

)

187

:

m_impl

(new SImplementation(scope))

223  return m_impl

->CleanAlignment(align_in);

232  return m_impl

->ConvertAlignToAnnot(align, annot, seqs, gene_id, cdregion,

false

);

240  m_impl

->ConvertAlignToAnnot(aligns, annot, seqs);

244  const

objects::CSeq_loc &loc,

245

objects::CSeq_annot& annot,

246

objects::CBioseq_set& seqs,

254  "Can't find genomic sequence "

+ loc.GetId()->AsFastaString());

266  size_t

new_id_num = counter.

Add

(1);

269  string str

(

"lcl|MRNA_"

);

276  string str

(

"lcl|PROT_"

);

286

fake_align.

SetSegs

().SetSpliced().SetProduct_id().Assign(*rna_id);

287

fake_align.

SetSegs

().SetSpliced().SetGenomic_id().Assign(*loc.GetId());

289

fake_align.

SetSegs

().SetSpliced().SetGenomic_strand(loc.GetStrand());

290

fake_align.

SetSegs

().SetSpliced().SetProduct_type(

296

exon->SetProduct_start().SetNucpos(product_pos);

297

product_pos += loc_it.GetRange().GetLength();

298

exon->SetProduct_end().SetNucpos(product_pos-1);

299

exon->SetGenomic_start(loc_it.GetRange().GetFrom());

300

exon->SetGenomic_end(loc_it.GetRange().GetTo());

302  match

->SetMatch(loc_it.GetRange().GetLength());

303

exon->SetParts().push_back(

match

);

304

fake_align.

SetSegs

().SetSpliced().SetExons().push_back(exon);

306

fake_align.

SetSegs

().SetSpliced().SetProduct_length(product_pos);

309

cdregion.

SetData

().SetCdregion().SetFrame(frame);

314  "Non-standard frame specified with 5'-complete location"

);

338  if

(product_pos % 3) {

340  "Non-whole number of codons with 3'-complete location"

);

348

cdregion.

SetData

().SetCdregion().SetCode().Set().push_back(

code

);

354  m_impl

->ConvertAlignToAnnot(fake_align, annot, seqs, 0, &cdregion,

false

);

363  m_impl

->SetFeatureExceptions(feat, align);

371  m_impl

->SetPartialFlags(gene_feat, mrna_feat, cds_feat);

376  m_impl

->RecomputePartialFlags(annot);

383

: m_aln(aln), m_scope(scope), m_genomic_row(-1)

384

, m_allowed_unaligned(allowed_unaligned), m_opts(opts)

397  "CreateGeneModelFromAlign(): " 398  "failed to create consistent alignment"

);

414  "CreateGeneModelFromAlign(): " 415  "More than one genomic row in alignment"

);

421  "CreateGeneModelFromAlign(): " 422  "No genomic sequence found in alignment"

);

430  if

(m_aln.GetSegs().IsSpliced()) {

431

rna_loc = x_GetLocFromSplicedExons(m_aln);

433  const CSeq_id

&

id

= m_aln.GetSeq_id(GetRnaRow());

440

rna_loc = x_Mapper()->Map(*range_loc);

448  return

m_genomic_row;

454  return

GetGenomicRow() == 0 ? 1 : 0;

465

x_Mapper()->IncludeSourceLocs(

b

);

470

x_Mapper()->SetMergeNone();

494  if

(!prev_exon.

IsNull

() &&

514  if

(donor_ok || !acceptor_ok) {

517  if

(acceptor_ok || !donor_ok) {

526

prev_int = genomic_int;

562

align->

Assign

(align_in);

564

vector<SExon> orig_exons =

GetExons

(*align);

573  if

(

GetExons

(*align) != orig_exons) {

596

model_num.erase(model_num.size()-2, 2);

613  bool

found_start_codon =

false

;

614  bool

found_stop_codon =

false

;

617  if

((*mod_it)->IsStart_codon_found()) {

618

found_start_codon = (*mod_it)->GetStart_codon_found();

620  if

((*mod_it)->IsStop_codon_found()) {

621

found_stop_codon = (*mod_it)->GetStop_codon_found();

629  "Can't find genomic sequence "

+

634

fake_transcript_align->

Assign

(*align);

635

align.

Reset

(fake_transcript_align);

645  size_t

new_id_num = counter.

Add

(1);

647  string str

(

"lcl|MRNA_"

);

654

fake_transcript_align->

SetSegs

().SetSpliced().SetProduct_id(

657

fake_transcript_align->

SetSegs

().SetSpliced().SetProduct_type(

660

fake_transcript_align->

SetSegs

().SetSpliced().SetExons())

667

fake_transcript_align->

SetSegs

().SetSpliced().SetExons().back();

668  bool

aligned_to_the_end =

669

last_exon->GetProduct_end().GetNucpos()+1==

672

fake_transcript_align->

SetSegs

().SetSpliced().SetProduct_length() =

674

(((found_stop_codon && aligned_to_the_end) || !aligned_to_the_end)?3:0);

676  if

(found_stop_codon && aligned_to_the_end) {

677  bool

is_minus = last_exon->IsSetGenomic_strand() ?

680

. IsSetGenomic_strand() &&

686

? last_exon->GetGenomic_start()

687

: genomic_length - last_exon->GetGenomic_end() - 1);

688  if

(space_for_codon < 3) {

691  "Stop codon goes outside genomic sequence"

);

694

new_exon->SetProduct_start().SetNucpos(

695

last_exon->GetProduct_end().GetNucpos() + space_for_codon + 1);

696

new_exon->SetProduct_end().SetNucpos(

697

last_exon->GetProduct_end().GetNucpos() + 3);

698

new_exon->SetGenomic_start(

699

is_minus ? genomic_length - 3 + space_for_codon : 0);

700

new_exon->SetGenomic_end(

701

is_minus ? genomic_length - 1 : 2 - space_for_codon);

702  if

(last_exon->IsSetProduct_strand()) {

703

new_exon->SetProduct_strand(last_exon->GetProduct_strand());

705  if

(last_exon->IsSetGenomic_strand()) {

706

new_exon->SetGenomic_strand(last_exon->GetGenomic_strand());

708

fake_transcript_align->

SetSegs

().SetSpliced().SetExons()

709

. push_back(new_exon);

713 

last_exon->SetProduct_end().SetNucpos() += space_for_codon;

715

last_exon->SetGenomic_start() -= space_for_codon;

717

last_exon->SetGenomic_end() += space_for_codon;

719  if

(last_exon->IsSetParts() && space_for_codon) {

722

match_stop_codon->SetMatch(space_for_codon);

723

last_exon->SetParts().push_back(match_stop_codon);

728

cd_feat->

SetData

().SetCdregion();

731

fake_transcript_align->

SetSegs

().SetSpliced().SetProduct_id(),

733  if

(!found_start_codon &&

734

fake_transcript_align->

SetSegs

().SetSpliced().SetExons().front()->GetProduct_start().GetNucpos()==0) {

737  if

(!found_stop_codon && aligned_to_the_end) {

746

cd_feat->

SetData

().SetCdregion().SetCode().Set().push_back(

code

);

757

transcribed_rna_id.

Assign

(query_rna_id);

758  if

(cds_feat_on_genome_with_translated_product &&

759

cds_feat_on_genome_with_translated_product->

CanGetProduct

() &&

760

cds_feat_on_query_mrna &&

762  CSeq_id

* translated_protein_id =

const_cast<CSeq_id

*

>

(cds_feat_on_genome_with_translated_product->

SetProduct

().GetId());

773  const CSeq_feat

* cds_feat_on_query_mrna_ptr,

774  bool

call_on_align_list)

784  if

(is_protein_align) {

812  if

(cds_feat_on_query_mrna_ptr) {

814

cds_feat_on_query_mrna->

Assign

(*cds_feat_on_query_mrna_ptr);

817  if

(cdregion_handle) {

824

vector<CMappedFeat> ncRNAs;

827  if

(query_rna_handle) {

829

feat_iter; ++feat_iter) {

830  const CSeq_loc

&rna_loc = feat_iter->GetLocation();

831  if

(feat_iter->GetData().GetSubtype() !=

833

++rna_loc.

begin

() == rna_loc.

end

() &&

837

full_length_rna = *feat_iter;

838

}

else if

(feat_iter->GetData().GetSubtype() ==

841

ncRNAs.push_back(*feat_iter);

848  size_t

model_num = counter.

Add

(1);

852

rna_feat_loc_on_genome->Assign(mapper.GetRnaLoc());

855

list<CRef<CSeq_loc> > transcribed_mrna_seqloc_refs;

863

cds_feat_on_query_mrna, cds_feat_on_transcribed_mrna);

870

*align, rna_feat_loc_on_genome, opts)

872

*transcribed_rna_id, cds_feat_on_query_mrna);

873  if

(mrna_feat_on_genome_with_translated_product &&

874

!mrna_feat_on_genome_with_translated_product->

IsSetProduct

()) {

876 

mrna_feat_on_genome_with_translated_product->

877

SetProduct().SetWhole().

Assign

(*transcribed_rna_id);

882

transcribed_mrna_seqloc_refs,

883

*align, rna_feat_loc_on_genome, time, model_num, seqs, opts);

891

*mrna_feat_on_genome_with_translated_product,

892

cds_feat_on_genome_with_translated_product.

GetPointer

());

898  if

(!call_on_align_list){

903

rna_feat_loc_on_genome, genomic_id, gene_id);

907

annot.

SetData

().SetFtable().push_back(gene_feat);

909

gene =

genes

.

insert

(make_pair(gene_id,gene_feat)).first;

911

gene_feat = gene->second;

913

&mrna_feat_on_genome_with_translated_product->

GetLocation

()));

917

genexref->SetId(*gene_feat->

SetIds

().front());

920

mrnaxref->SetId(*mrna_feat_on_genome_with_translated_product->

SetIds

().front());

922

gene_feat->

SetXref

().push_back(mrnaxref);

923

mrna_feat_on_genome_with_translated_product->

SetXref

().push_back(genexref);

927

rna_feat_loc_on_genome, genomic_id);

930

annot.

SetData

().SetFtable().push_back(gene_feat);

935  if

(mrna_feat_on_genome_with_translated_product) {

938

annot.

SetData

().SetFtable().push_back(mrna_feat_on_genome_with_translated_product);

943  if

(cds_feat_on_genome_with_translated_product.

NotNull

()) {

944

propagated_features.push_back(cds_feat_on_genome_with_translated_product);

946  if

(cds_feat_on_query_mrna && cds_feat_on_query_mrna->

CanGetProduct

()) {

950  for

(

CFeat_CI

feat_iter(prot_handle,

952

feat_iter; ++feat_iter) {

954

feat_iter->GetData().GetProt();

956

!prot_ref.

GetName

().empty()) {

959

prot_xref->SetData().SetProt().SetName()

960

. push_back(prot_ref.

GetName

().front());

961

cds_feat_on_genome_with_translated_product->

SetXref

().push_back(prot_xref);

969  ITERATE

(vector<CMappedFeat>, it, ncRNAs){

973

propagated_features.push_back(ncrna_feat);

978

annot.

SetData

().SetFtable().push_back(*it);

982  if

((*it)->IsSetIds()) {

983

propagatedxref->SetId(*(*it)->SetIds().front());

987

mrnaxref->SetId(*mrna_feat_on_genome_with_translated_product->

SetIds

().front());

989

(*it)->SetXref().push_back(mrnaxref);

990

mrna_feat_on_genome_with_translated_product->

SetXref

().push_back(propagatedxref);

994  if

(!call_on_align_list){

995  if

(propagated_features.empty()){

1000  SetPartialFlags

(gene_feat, mrna_feat_on_genome_with_translated_product, *it);

1006  if

(mrna_feat_on_genome_with_translated_product) {

1007

mrna_feat_on_genome_with_translated_product->

SetProduct

().SetWhole().Assign(query_rna_id);

1009  if

(cds_feat_on_genome_with_translated_product) {

1011

cds_feat_on_genome_with_translated_product->

1013

cds_feat_on_transcribed_mrna->

1017

seq_id->Assign(query_rna_id);

1018

cds_feat_on_transcribed_mrna->

SetLocation

().SetId(*seq_id);

1020

(*loc)->SetId(*seq_id);

1025  if

(!query_rna_handle) {

1027

cds_feat_on_query_mrna, cds_feat_on_genome_with_translated_product);

1031  if

(mrna_feat_on_genome_with_translated_product) {

1033  m_scope

->GetBioseqHandle(query_rna_id);

1040

cds_feat_on_genome_with_translated_product.

GetPointer

(),

1042

cds_feat_on_transcribed_mrna.

GetPointer

());

1045  m_scope

->RemoveTopLevelSeqEntry(rna_seh);

1048  if

(cds_feat_on_genome_with_translated_product) {

1056  TSeqPos

clean_match_count = 0;

1060

&transcribed_mrna_seqloc_refs,

1061

&clean_match_count);

1062  if

(!clean_match_count) {

1064 

annot.

SetData

().SetFtable().remove(cds_feat_on_genome_with_translated_product);

1065

cds_feat_on_genome_with_translated_product =

NULL

;

1068  m_scope

->RemoveTopLevelSeqEntry(prot_seh);

1073  RenameGeneratedBioseqs

(query_rna_id, *transcribed_rna_id, cds_feat_on_query_mrna, cds_feat_on_genome_with_translated_product);

1078  m_scope

->AddTopLevelSeqEntry(**it);

1087  for

(CBioseq_set::TSeq_set::iterator bioseq_it =

1091  if

(((*bioseq_it)->GetSeq().IsNa() &&

1093

((*bioseq_it)->GetSeq().IsAa() &&

1096

bioseq_it = seqs.

SetSeq_set

().erase(bioseq_it);

1105  if

(loc->IsPacked_int() && loc->GetPacked_int().Get().size()==1) {

1107

loc->SetInt(*interval);

1110  return

is_protein_align ? cds_feat_on_genome_with_translated_product : mrna_feat_on_genome_with_translated_product;

1133  const CSeq_id

& genomic_id = clean_align->

GetSeq_id

(mapper.GetGenomicRow());

1137  else if

(!(gene_handle == genomic_id))

1139  "Bad list of alignments to ConvertAlignToAnnot(); alignments on different genes"

);

1142

loc->Assign(mapper.GetRnaLoc());

1153

gene_annot.

SetData

().SetFtable().push_front(gene_feat);

1155

annot.

SetData

().SetFtable().splice(annot.

SetData

().SetFtable().end(),

1156

gene_annot.

SetData

().SetFtable());

1173  if

(!inst.

SetExt

().SetDelta().Set().empty()) {

1198

inst.

SetExt

().SetDelta().AddLiteral(seq, mol_class);

1208

inst.

SetExt

().SetDelta().AddLiteral(seq, mol_class);

1218  bool

add_unaligned_parts,

1219  bool

mark_transcript_deletions,

1235  int

prev_product_to = -1;

1236  bool

prev_fuzz =

false

;

1246  if

((prev_product_to > -1 &&

1249  if

(has_gap !=

NULL

) {

1253

inst.

SetExt

().SetDelta().AddLiteral

1257  int

gap_len = add_unaligned_parts ? mrna_loc->

GetTotalRange

().

GetFrom

()-(prev_product_to+1) : 0;

1259

seq_size += gap_len;

1260

prev_product_to += gap_len;

1261

inst.

SetExt

().SetDelta().AddLiteral(gap_len);

1263

inst.

SetExt

().SetDelta().Set().back()

1268  unsigned

part_count = 0;

1269  unsigned

mapped_exon_len = 0;

1270  for

(

CSeq_loc_CI

part_it(*mrna_loc); part_it; ++part_it) {

1272  if

(prev_product_to<0) {

1273

prev_product_to = part_it.GetRange().GetFrom()-1;

1274  if

(add_unaligned_parts && part_it.GetRange().GetFrom() > 0) {

1275

seq_size = part_it.GetRange().GetFrom();

1276

inst.

SetExt

().SetDelta().AddLiteral(seq_size);

1279  int

deletion_len = part_it.GetRange().GetFrom()-(prev_product_to+1);

1285  if

(deletion_len > 0) {

1286  if

(mark_transcript_deletions && part_count == 1) {

1290

deletion_loc.

SetInt

().SetId().Assign(part_it.GetSeq_id());

1291

deletion_loc.

SetInt

().SetFrom(prev_product_to+1);

1292

deletion_loc.

SetInt

().SetTo(part_it.GetRange().GetFrom()-1);

1297  if

(deletion_len > 0 && (mark_transcript_deletions || part_count > 1)) {

1298  if

(has_indel !=

NULL

) {

1301  string

deletion(deletion_len,

'N'

);

1303

seq_size += deletion.size();

1311

mapped_exon_len += it.GetRange().GetLength();

1320

seq_size += vec.

size

();

1322

prev_product_to = part_it.GetRange().GetTo();

1324  if

(has_indel !=

NULL

&&

1326

mapped_exon_len != loc_it.GetRange().GetLength())) {

1337  if

(seq_size < (

int

)length) {

1339

inst.

SetExt

().SetDelta().AddLiteral

1343

inst.

SetExt

().SetDelta().AddLiteral(length-seq_size);

1378

}

else if

(cds_feat_on_query_mrna.

IsNull

()) {

1396

assembly->

Assign

(align);

1397

bioseq.

SetInst

().SetHist().SetAssembly().push_back(assembly);

1402  string str

(

"lcl|CDNA_"

);

1408

transcribed_rna_id->

Set

(

str

);

1410

bioseq.

SetId

().push_back(transcribed_rna_id);

1412  if

(cds_feat_on_query_mrna.

NotNull

()) {

1418

cds_feat_on_transcribed_mrna->

Assign

(*cds_feat_on_query_mrna);

1419

cds_feat_on_transcribed_mrna->

SetLocation

().SetId(*transcribed_rna_id);

1421

annot->

SetData

().SetFtable().push_back(cds_feat_on_transcribed_mrna);

1425

cds_feat_on_transcribed_mrna->

SetData

().SetCdregion();

1428

(*it)->SetLoc().SetId(*transcribed_rna_id);

1439  return

transcribed_rna_id;

1445

code_break->

SetLoc

(loc);

1446

code_break->

SetAa

().SetNcbieaa(ncbieaa);

1448

feat.

SetData

().SetCdregion().SetCode_break().push_back(code_break);

1466  string str

(

"lcl|PROT_"

);

1473

cds_feat_on_transcribed_mrna->

SetProduct

().SetWhole(*translated_protein_id);

1475

bioseq.

SetId

().push_back(translated_protein_id);

1496

bioseq.

SetDescr

().Set().push_back(desc);

1510  bool

final_code_break =

false

;

1512

final_code_break = (strprot[strprot.size()-1] !=

'*'

);

1514

strprot.resize(strprot.size()-1);

1521

seq_inst.

SetExt

().SetDelta();

1541  bool

starts_with_code_break =

false

;

1545

starts_with_code_break =

true

;

1553  size_t

skip_5_prime = 0;

1554  size_t

skip_3_prime = 0;

1555  unsigned

count_internal_stops = 0;

1558  int

codon_start_pos = (

int

)ci.GetPosition() + frame;

1559  int len

=

int

(ci.GetLength()) - frame;

1561  _ASSERT

( -3 < frame && frame < 3 );

1565

(ci.IsUnknownLength() || !ci.IsSetData()) &&

1575  bool

stop_codon_included = e > strprot.size();

1576  if

(stop_codon_included) {

1588  if

(ci.IsUnknownLength()) {

1589

seq_inst.

SetExt

().SetDelta().AddLiteral(

len

);

1591

}

else if

(!ci.IsSetData()) {

1592  if

(

b

==skip_5_prime &&

1594

skip_5_prime += e-

b

;

1595

}

else if

(stop_codon_included &&

b

==e) {

1599  if

(strprot[

b

] !=

'X'

) {

1604

seq_inst.

SetExt

().SetDelta().AddLiteral(

static_cast<TSeqPos>

(e-

b

));

1608  if

(stop_codon_included && final_code_break) {

1611

stop_codon_on_mrna->

SetInt

().SetFrom(pos_on_mrna);

1612

stop_codon_on_mrna->

SetInt

().SetTo(pos_on_mrna + 2);

1613  AddCodeBreak

(*cds_feat_on_transcribed_mrna, *stop_codon_on_mrna,

'*'

);

1614

transcribed_mrna_seqloc_refs.push_back(stop_codon_on_mrna);

1618  if

(

b

==0 && strprot[

b

] !=

'M'

&&

1619

!starts_with_code_break &&

1621

strprot[

b

] =

'M'

;

1624

start_codon_on_mrna->

SetInt

().SetFrom(pos_on_mrna);

1625

start_codon_on_mrna->

SetInt

().SetTo(pos_on_mrna + 2);

1626  AddCodeBreak

(*cds_feat_on_transcribed_mrna, *start_codon_on_mrna,

'M'

);

1627

transcribed_mrna_seqloc_refs.push_back(start_codon_on_mrna);

1631  size_t

stop_aa_pos =

b

-1;

1632  while

((stop_aa_pos = strprot.find(

'*'

, stop_aa_pos+1)) < e) {

1633

strprot[stop_aa_pos] =

'X'

;

1637

internal_stop_on_mrna->

SetInt

().SetFrom(pos_on_mrna);

1638

internal_stop_on_mrna->

SetInt

().SetTo(pos_on_mrna + 2);

1639  AddCodeBreak

(*cds_feat_on_transcribed_mrna, *internal_stop_on_mrna,

'X'

);

1640

transcribed_mrna_seqloc_refs.push_back(internal_stop_on_mrna);

1641

++count_internal_stops;

1650  _ASSERT

( -2 <= frame && frame <= 0 );

1654

align_info->

SetType

().SetStr(

"AlignInfo"

);

1655

align_info->

AddField

(

"num_internal_stop_codon"

, (

int

)count_internal_stops);

1656

cds_feat_on_transcribed_mrna->

AddExt

(align_info);

1660  if

(

b

< strprot.size() && strprot[

b

] !=

'X'

) {

1669

strprot.size() <=

b

+ (frame==0?0:1) );

1673

!seq_inst.

GetExt

().

GetDelta

().

Get

().back()->GetLiteral().IsSetSeq_data()) {

1674

skip_3_prime += seq_inst.

GetExt

().

GetDelta

().

Get

().back()->GetLiteral().GetLength();

1675

seq_inst.

SetExt

().SetDelta().Set().pop_back();

1679  if

(skip_5_prime || skip_3_prime) {

1690

cds_feat_on_transcribed_mrna->

SetLocation

(*to_mrna.

Map

(*prot_loc));

1695  if

(seq_inst.

SetExt

().SetDelta().Set().size() == 1 && seq_inst.

SetExt

().SetDelta().Set().back()->GetLiteral().IsSetSeq_data()) {

1698

dprot->

Assign

(seq_inst.

SetExt

().SetDelta().Set().back()->GetLiteral().GetSeq_data());

1707

cds_feat_on_assembly_mrna->

Assign

(*cds_feat_on_transcribed_mrna);

1711

cds_feat_on_assembly_mrna->

SetLocation

().SetInt().SetTo() -= 3;

1717

prot_assembly->

SetSegs

().SetSpliced().SetProduct_length(seq_inst.

GetLength

());

1719

seq_inst.

SetHist

().SetAssembly().push_back(prot_assembly);

1731  m_scope

->RemoveTopLevelSeqEntry(prot_seh);

1734  m_scope

->RemoveTopLevelSeqEntry(mrna_seh);

1755  if

(!gnomon_model_num.empty()) {

1757

obj_id->

SetStr

(

"rna."

+ gnomon_model_num);

1760

mrna_feat->

SetIds

().push_back(feat_id);

1763

mrna_feat->

SetProduct

().SetWhole().Assign(transcribed_rna_id);

1767  if

(

info

&&

info

->IsSetBiomol()) {

1768  switch

(

info

->GetBiomol()) {

1792  if

(

info

->IsSetGbmoltype()) {

1793

RNA_class =

info

->GetGbmoltype();

1810  if

(!RNA_class.empty()) {

1811

mrna_feat->

SetData

().SetRna().SetExt().SetGen().SetClass(RNA_class);

1814  if

(!name.empty()) {

1815  if

(!RNA_class.empty()) {

1816

mrna_feat->

SetData

().SetRna().SetExt().SetGen().SetProduct(name);

1818

mrna_feat->

SetData

().SetRna().SetExt().SetName(name);

1840  bool

update_existing_gene = gene_feat;

1841  string

gene_id_str =

"gene."

;

1846  if

(!update_existing_gene) {

1847  if

(feat_iter && feat_iter.

GetSize

()) {

1855

gene_feat->

SetData

().SetGene();

1859

obj_id->

SetStr

(gene_id_str);

1862

gene_feat->

SetIds

().push_back(feat_id);

1875

}

else if

(feat_iter && feat_iter.

GetSize

()) {

1885  if

(feat_iter && feat_iter.

GetSize

() == 1 && update_existing_gene) {

1891  tag

->Assign(**xref_it);

1892  bool

duplicate =

false

;

1897  if

((*previous_xref_it)->Match(**xref_it)){

1910 

gene_feat->

SetData

().SetGene().SetDesc(gene_id_str);

1934

align, loc, opts,

offset

);

1936  if

(cds_feat_on_genome) {

1942

loc_ranges += loc_it.GetRange();

1948  string

gnomon_model_num;

1955  if

(!gnomon_model_num.empty()) {

1957

obj_id->

SetStr

(

"cds."

+ gnomon_model_num);

1960

cds_feat_on_transcribed_mrna->

SetIds

().push_back(feat_id);

1963

transcribed_mrna_seqloc_refs,

1964

time, model_num, seqs);

1967

cds_feat->

Assign

(*cds_feat_on_transcribed_mrna);

1975  if

(is_partial_5prime &&

offset

) {

1978

orig_frame = cds_feat->

GetData

()

1984  int

frame = (

offset

- orig_frame) % 3;

1988

frame = (3 - frame) % 3;

1989  if

(frame != orig_frame) {

1992

cds_feat->

SetData

().SetCdregion()

1996

cds_feat->

SetData

().SetCdregion()

2000

cds_feat->

SetData

().SetCdregion()

2006  "mod 3 out of bounds"

);

2011  if

(!gnomon_model_num.empty() && !is_partial_5prime) {

2013  if

(cds_start >= 3) {

2021

vec.

GetSeqData

(cds_start % 3, cds_start, mrna);

2030  SIZE_TYPE

stop_5prime = strprot.rfind(

'*'

);

2031  if

(stop_5prime !=

NPOS

) {

2032

stop_5prime = stop_5prime*3+cds_start%3;

2034

stop_5prime_feature->

SetData

().SetImp().SetKey(

"misc_feature"

);

2035

stop_5prime_feature->

SetComment

(

"upstream in-frame stop codon"

);

2041

stop_5prime_feature->

SetLocation

(*stop_5prime_location);

2058

cds_feat->

SetData

().SetCdregion();

2059

CCdregion::TCode_break::iterator it =

2063

code_break_loc.

Assign

((*it)->GetLoc());

2068  if

(new_cb_loc->

IsEquiv

()) {

2069

new_cb_loc = new_cb_loc->

GetEquiv

().

Get

().front();

2073  if

(new_cb_loc && !new_cb_loc->

IsNull

()) {

2075

new_cb_ranges += loc_it.GetRange();

2077

new_cb_ranges &= loc_ranges;

2080

(*it)->SetLoc(*new_cb_loc);

2101

name = sequence::CDeflineGenerator().GenerateDefline(handle);

2113  if

(feat_iter && feat_iter.

GetSize

() &&

2119  size_t

last_comma = name.rfind(

','

);

2120  if

(last_comma != string::npos) {

2121

name.erase(last_comma);

2133  if

(desc->GetUser().HasField(

"polyA required for stop codon"

)) {

2152

non_const_loc->

Assign

(*loc);

2154

align, non_const_loc, opts,

offset

);

2173

list< CRef< CSeq_loc > >& a_list = a_mix->

SetMix

().Set();

2174  const

list< CRef< CSeq_loc > >& b_list = b_mix->

GetMix

().

Get

();

2177  for

(list<

CRef< CSeq_loc >

>::iterator a_i = a_list.begin(); a_i != a_list.end();) {

2180

a_list.splice(a_i, diff->

SetMix

().Set());

2181

a_i = a_list.erase(a_i);

2184  if

(a_list.size() == 1) {

2185  return

a_list.front();

2214  for

(

CSeq_loc_CI

loc_it(feature_on_mrna->GetLocation());

2234  "failed to find requisite parts of " 2239  if

( !this_loc_mapped ||

2240

this_loc_mapped->

IsNull

() ||

2241

this_loc_mapped->

IsEmpty

() ) {

2245  if

( !mapped_loc ) {

2249

feature_on_mrna->GetLocation().GetTotalRange().GetFrom();

2252  bool

is_partial_5prime =

2254  bool

is_partial_3prime =

2258  bool

last_range = !++it1;

2259  if

(is_partial_3prime && last_range &&

2262

feature_on_mrna->GetData().IsCdregion() &&

2268

equiv->

GetEquiv

().

Get

().back()->GetTotalRange().GetTo();

2269  if

(missing_end < 3) {

2272 

is_partial_3prime =

false

;

2286

sub.

SetInt

().SetId().Assign(*this_loc_mapped->

GetId

());

2290  bool

cross_origin = (left > right);

2297

half->

SetTo

(genomic_size-1);

2300

half->

SetTo

(right);

2309  if

(this_loc_mapped->

IsMix

()) {

2313  if

(subloc_it.GetRangeAsSeq_loc()->

2316

mrna_fuzzy_boundaries.

insert

(

2317

subloc_it.GetRange().GetFrom());

2319  if

(subloc_it.GetRangeAsSeq_loc()->

2322

mrna_fuzzy_boundaries.

insert

(

2323

subloc_it.GetRange().GetTo());

2328

this_loc_mapped->

SetMix

().Set())

2330

(*subloc_it)->SetPartialStart(

2331

mrna_fuzzy_boundaries.count(

2334

(*subloc_it)->SetPartialStop(

2335

mrna_fuzzy_boundaries.count(

2348

mapped_loc->

SetMix

().Set().push_back(this_loc_mapped);

2360  if

(mapped_loc && feature_on_mrna->GetData().IsRna())

2399  if

(mapped_loc && feature_on_mrna->GetData().IsCdregion()) {

2404  for

(; vec.

IsInGap

(start_gap); ++start_gap);

2405  if

(start_gap > 0 && start_gap < vec.

size

()) {

2412

orig_mapped_loc.

Assign

(*mapped_loc);

2415  while

(mapped_loc->

SetPacked_int

().Set().front()->GetLength()

2418

start_gap -= mapped_loc->

SetPacked_int

().Set().front()->GetLength();

2425

first_exon.

SetTo

() -= start_gap;

2427

first_exon.

SetFrom

() += start_gap;

2433

loc->

Assign

(*SubtractPreserveBiologicalOrder(*loc, *SubtractPreserveBiologicalOrder(orig_mapped_loc, *mapped_loc)));

2438  for

(; vec.

IsInGap

(vec.

size

() - 1 - end_gap); ++end_gap);

2439  if

(end_gap > 0 && end_gap < vec.

size

()) {

2444

orig_mapped_loc.

Assign

(*mapped_loc);

2447  while

(mapped_loc->

SetPacked_int

().Set().back()->GetLength() <= end_gap)

2449

end_gap -= mapped_loc->

SetPacked_int

().Set().back()->GetLength();

2456

last_exon.

SetFrom

() += end_gap;

2458

last_exon.

SetTo

() -= end_gap;

2463

loc->

Assign

(*SubtractPreserveBiologicalOrder(*loc, *SubtractPreserveBiologicalOrder(orig_mapped_loc, *mapped_loc)));

2472

mapped_feat->

Assign

(*feature_on_mrna);

2485  if

(propagated_feat){

2500  if

(mrna_feat && propagated_feat)

2532  if

(gene_feat && mrna_feat){

2545  if

(gene_feat && propagated_feat && !mrna_feat){

2582

feature::CFeatTree

tree

(sah);

2583

vector<CMappedFeat> top_level_features =

tree

.GetChildren(

CMappedFeat

());

2586 

vector< vector<CMappedFeat> > top_level_features_by_type;

2589  ITERATE

(vector<CMappedFeat>, it, top_level_features)

2590

top_level_features_by_type[it->GetData().Which()].push_back(*it);

2597  ITERATE

(vector<CMappedFeat>, gene_it,

2607

vector<CMappedFeat> gene_children =

2608

gene_feat ?

tree

.GetChildren(*gene_it)

2610  sort

(gene_children.begin(), gene_children.end());

2612  ITERATE

(vector<CMappedFeat>, child_it, gene_children){

2621

}

else if

(!child_feat || child_feat->

GetData

().

IsRna

()){

2622

vector<CMappedFeat> rna_children =

2623

child_feat ?

tree

.GetChildren(*child_it)

2629  while

((child_it+1) != gene_children.end() &&

2632

(child_it+1)->GetTotalRange())){

2633

rna_children.push_back(*(++child_it));

2635  if

(rna_children.empty()){

2639  ITERATE

(vector<CMappedFeat>, rna_child_it, rna_children){

2657

!propagated_feature || !propagated_feature->

IsSetDbxref

())

2663  if

((*gene_xref_it)->GetDb() !=

"miRBase"

)

2665  if

((*gene_xref_it)->GetDb() == (*propagated_xref_it)->GetDb() &&

2666

!(*gene_xref_it)->Match(**propagated_xref_it))

2668  string

propagated_feature_desc;

2670

propagated_feature_desc =

"corresponding cdregion"

;

2673  "Unexpected propagated feature type"

);

2674

propagated_feature_desc =

"propagated ncRNA feature"

;

2680

<<

" and "

<< propagated_feature_desc

2681

<<

" have "

<< (*gene_xref_it)->GetDb()

2682

<<

" dbxrefs with inconsistent tags"

);

2697  for

(

CFeat_CI

feat_iter(handle, sel); feat_iter; ++feat_iter) {

2699

feat->

Assign

(feat_iter->GetOriginalFeature());

2701

mapper.

Map

(feat_iter->GetLocation());

2706

annot.

SetData

().SetFtable().push_back(feat);

2736  if

( !(*it)->IsSetId() ) {

2741  const CFeat_id

& feat_id = (*it)->GetId();

2764

(

"rearrangement required for product"

);

2790  for

( ; align_iter; ++align_iter) {

2797

al.

Reset

(&this_align);

2803  bool

has_length_mismatch =

false

;

2805  bool

has_incomplete_polya_tail =

false

;

2806  bool

partial_unaligned_section =

false

;

2821

has_length_mismatch =

true

;

2840

partial_unaligned_section =

true

;

2849  switch

((*part_it)->Which()) {

2851

pos += (*part_it)->GetMatch();

2855  TSeqRange

(pos, pos+(*part_it)->GetMismatch()-1);

2856

pos += (*part_it)->GetMismatch();

2859

pos += (*part_it)->GetDiag();

2863

delete_sizes[pos] = (*part_it)->GetGenomic_ins();

2867  TSeqRange

(pos, pos+(*part_it)->GetProduct_ins()-1);

2868

pos += (*part_it)->GetProduct_ins();

2880  if

(

r

.GetFrom() != 0) {

2882

partial_unaligned_section =

true

;

2884

insert_locs +=

TSeqRange

(0,

r

.GetFrom()-1);

2898  if

(

r

.GetTo() + 1 < max_align_len) {

2900

partial_unaligned_section =

true

;

2902

insert_locs +=

TSeqRange

(

r

.GetTo()+1, max_align_len-1);

2914  if

( insert_locs.

empty

() && delete_locs.

empty

() && !partial_unaligned_section)

2930

mismatch_locs.

clear

();

2932  for

( ; prod_it != prod_end && genomic_it != genomic_end;

2933

++prod_it, ++genomic_it) {

2934  if

(*prod_it != *genomic_it) {

2939  unsigned

tail_len =

Convert

(prod_end - prod_it);

2941  for

( ; prod_it != prod_end; ++prod_it) {

2942  if

(*prod_it ==

'A'

) {

2947  if

(tail_len && count_a >= tail_len * 0.8) {

2949  if

(count_a < tail_len * 0.95) {

2950

has_incomplete_polya_tail =

true

;

2953  else if

(tail_len) {

2955

partial_unaligned_section =

true

;

2958

insert_locs +=

TSeqRange

(end_pos-tail_len+1, end_pos);

2964  if

(!insert_locs.

empty

() ||

2965

!delete_locs.

empty

() ||

2966

has_length_mismatch ||

2967

has_incomplete_polya_tail ||

2968

partial_unaligned_section) {

2969

except_text =

"unclassified transcription discrepancy"

;

2971  else if

(!mismatch_locs.

empty

()) {

2972

except_text =

"mismatches in transcription"

;

2976  x_SetComment

(feat, cds_feat, cds_feat_on_mrna, align, mismatch_locs,

2977

insert_locs, delete_locs, delete_sizes,

2978

partial_unaligned_section);

2989  if

(range_it->GetLength() > pos) {

2990

pos += range_it->GetFrom();

2993

pos -= range_it->GetLength();

2996  CSeq_loc

base_loc(*mapped_protein_id, pos, pos);

2998

mapped = to_genomic->

Map

(*mrna_loc);

3008  const CSeq_feat

* cds_feat_on_query_mrna,

3009  const CSeq_feat

* cds_feat_on_transcribed_mrna,

3014

|| ( cds_feat_on_query_mrna && !cds_feat_on_query_mrna->

IsSetProduct

() )

3027  if

( !(*it)->IsSetId() ) {

3032  const CFeat_id

& feat_id = (*it)->GetId();

3055

(

"rearrangement required for product"

);

3072  bool

has_start =

false

;

3073  bool

has_stop =

false

;

3075  bool

has_gap =

false

;

3076  bool

has_indel =

false

;

3082  if

(cds_feat_on_query_mrna) {

3087

corrected_cds_feat_on_query_mrna->

Assign

(*cds_feat_on_query_mrna);

3091

corrected_cds_feat_on_transcribed_mrna->

Assign

(*cds_feat_on_transcribed_mrna);

3095  int

cds_start_on_mrna = 0;

3096  int

frame_on_mrna = 0;

3097  bool

filled_by_polya =

false

;

3099  if

(align !=

NULL

) {

3111  string

except_text =

"unclassified translation discrepancy"

;

3114  if

(clean_match_count) {

3115

*clean_match_count = seq.

size

();

3124  int

missing_end = 0;

3125  if

(cds_feat_on_query_mrna) {

3147

seq.

GetSeqData

(cds_start_on_mrna + frame_on_mrna, cds_start_on_mrna + cds_len_on_query_mrna, mrna);

3148  if

((missing_end == 1 || missing_end == 2) &&

3154 

filled_by_polya =

true

;

3155  for

(

size_t

pos = mrna.size() - missing_end;

3156

pos < mrna.size(); ++pos)

3172  if

(xlate.size() && xlate[0] ==

'-'

) {

3175  string

first_codon = mrna.substr(0,3);

3179

xlate[0] = first_aa[0];

3189  const CSeq_loc

& cb_on_genome = (*it)->GetLoc();

3191  if

(!cb_on_mrna)

continue

;

3194  if

(

r

.GetLength() != 3) {

3203  switch

((*it)->GetAa().Which()) {

3205

src += (char)(*it)->GetAa().GetNcbieaa();

3210

src += (char)(*it)->GetAa().GetNcbistdaa();

3215

src += (char)(*it)->GetAa().GetNcbi8aa();

3227

xlate[pos] = dst[0];

3239  if

(corrected_cds_feat_on_transcribed_mrna) {

3257

whole_product->

SetWhole

(*cds_id);

3261  if

(cds_feat_on_transcribed_mrna) {

3264  CSeq_loc

cds_feat_on_transcribed_mrna_loc;

3265

cds_feat_on_transcribed_mrna_loc.

Assign

(corrected_cds_feat_on_transcribed_mrna->

GetLocation

());

3267

cds_feat_on_transcribed_mrna_loc.

FlipStrand

();

3273

product_ranges.

clear

();

3275

product_ranges += loc_it.GetRange();

3284

product_ranges.

GetTo

());

3288  if

((xlate.size() == product_ranges.

GetTo

() + (filled_by_polya ? 1 : 2) ||

3290

xlate[xlate.size() - 1] ==

'*'

)

3292

xlate.resize(xlate.size() - 1);

3301  if

( (product_ranges.

GetFrom

()==0 && xlate.size() && xlate[0] ==

'M'

) ||

3306  if

(product_ranges.

Empty

()) {

3311  if

(product_ranges[0].IsWhole()) {

3314  string

xlate_trimmed;

3316

actual +=

whole

.substr(range_it->GetFrom(), range_it->GetLength());

3317

xlate_trimmed += xlate.substr(range_it->GetFrom(), range_it->GetLength());

3319

xlate = xlate_trimmed;

3321  if

(actual !=

whole

) {

3336

string::const_iterator it1 = actual.begin();

3337

string::const_iterator it1_end = actual.end();

3338

string::const_iterator it2 = xlate.begin();

3339

string::const_iterator it2_end = xlate.end();

3341  for

( ; it1 != it1_end && it2 != it2_end; ++it1, ++it2) {

3344

mapped_protein_id, product_ranges, to_mrna, to_genomic);

3347  if

(!mapped->

IsInt

()) {

3364  "fTrustProteinSeq & fForceTranslateCds combination not implemented"

);

3367  char

actual_aa = *it1;

3368

code_break->

SetAa

().SetNcbieaa(actual_aa);

3370

}

else if

(*it2 ==

'-'

|| *it2 ==

'*'

) {

3372

}

else if

(*it1 != *it2) {

3374

}

else if

(clean_match_count && (!mapped ||

3377

++*clean_match_count;

3381  if

(has_stop && filled_by_polya

3386

product_ranges, to_mrna, to_genomic);

3394

feat.

SetComment

() +=

"stop codon completed by the addition of " 3395  "3' A residues to the mRNA"

;

3406

(feat.

GetComment

().find(

"indel"

) != string::npos ||

3407

feat.

GetComment

().find(

"inserted"

) != string::npos ||

3408

feat.

GetComment

().find(

"deleted"

) != string::npos))

3413  if

(actual.size() != xlate.size() ||

3414

!has_stop || !has_start ||

3415

has_gap || has_indel) {

3416

except_text =

"unclassified translation discrepancy"

;

3418  else if

(mismatch_count) {

3419

except_text =

"mismatches in translation"

;

3428  string

except_text =

text

;

3430

list<string> except_toks;

3434  for

(list<string>::iterator it = except_toks.begin();

3435

it != except_toks.end(); ) {

3438

*it ==

"annotated by transcript or proteomic data"

||

3439

*it ==

"unclassified transcription discrepancy"

||

3440

*it ==

"mismatches in transcription"

||

3441

*it ==

"unclassified translation discrepancy"

||

3442

*it ==

"mismatches in translation"

) {

3443

except_toks.erase(it++);

3451  if

( !except_text.empty() ) {

3455  if

(it->GetSeqId()->IsOther() &&

3456

it->GetSeqId()->GetOther().GetAccession()[0] ==

'N'

&&

3457  string

(

"MRP"

).find(it->GetSeqId()->GetOther().GetAccession()[1]) != string::npos)

3459

except_text =

"annotated by transcript or proteomic data"

;

3462  string

product_type_string;

3464

product_type_string =

"AA sequence"

;

3467

product_type_string =

"RNA sequence"

;

3470

product_type_string +=

", mRNA"

;

3473

qualifier->

SetQual

(

"inference"

);

3474

qualifier->

SetVal

(

"similar to "

+ product_type_string +

" (same species):RefSeq:"

+

3475

it->GetSeqId()->GetOther().GetAccession() +

'.'

+

3477

feat.

SetQual

().push_back(qualifier);

3480

except_toks.push_back(except_text);

3482

except_text =

NStr::Join

(except_toks,

", "

);

3484  if

(except_text.empty()) {

3500  string

product_type_string =

"RNA sequence"

;

3504

product_type_string +=

", mRNA"

;

3507  string

db =

"INSD"

;

3516

qualifier->

SetQual

(

"inference"

);

3517

qualifier->

SetVal

(

"similar to "

+ product_type_string +

" (same species):"

+db+

":"

+

3519

feat.

SetQual

().push_back(qualifier);

3526  const CSeq_feat

* cds_feat_on_query_mrna,

3527  const CSeq_feat

* cds_feat_on_transcribed_mrna,

3534

align_ref.

Reset

(align);

3543  for

(CSeq_feat::TQual::iterator it = feat.

SetQual

().begin();

3544

it != feat.

SetQual

().end(); )

3546  if

((*it)->CanGetQual() && (*it)->GetQual() ==

"inference"

) {

3547

it = feat.

SetQual

().erase(it);

3553  if

(feat.

GetQual

().empty()) {

3571

cds_feat_on_query_mrna, cds_feat_on_transcribed_mrna,

3572

transcribed_mrna_seqloc_refs,

3598 static string s_Count

(

unsigned

num,

const string

&item_name)

3611  bool

partial_unaligned_section)

3613  if

(mismatch_locs.

empty

() && insert_locs.

empty

() && delete_locs.

empty

() &&

3614

!partial_unaligned_section &&

3621  string

rna_comment, cds_comment;

3630

inserts_in_cds &= insert_locs;

3631

deletes_in_cds &= delete_locs;

3633  if

(cds_feat_on_mrna) {

3637

cds_ranges += loc_it.GetRange();

3642

align_info->

SetType

().SetStr(

"AlignInfo"

);

3645  unsigned

indel_count =

Convert

(insert_locs.

size

() + delete_locs.

size

());

3646  unsigned

frameshift_count = 0;

3647  unsigned

pct_coverage = 100, cds_pct_coverage = 100;

3648  if

(partial_unaligned_section) {

3655  if

(cds_feat && cds_feat_on_mrna) {

3656  unsigned

cds_indel_count = 0;

3658

++(it->GetLength() % 3 ? frameshift_count : cds_indel_count);

3661

++(delete_sizes[it->GetFrom()] % 3 ? frameshift_count

3664

indel_count -= frameshift_count;

3665  unsigned

cds_mismatch_count = 0;

3666  bool

start_codon_mismatch =

false

;

3681  if

(!single_interval_product) {

3683  "product is required to be a single interval"

);

3685  for

(

TSeqPos

pos = start_pos; pos < start_pos +

prot

.size(); ++pos)

3687  CSeq_loc

aa_loc(*cds_id, pos, pos);

3692  if

(codon.

size

() == 3) {

3694

codon[0], codon[1], codon[2]);

3695  char

translated_codon = pos == 0

3698  if

(translated_codon !=

prot

[pos]) {

3699

++cds_mismatch_count;

3704

start_codon_mismatch =

true

;

3709  if

(cds_mismatch_count || cds_indel_count || frameshift_count || cds_pct_coverage < 100)

3711

cds_comment =

"The RefSeq protein"

;

3712  if

(cds_mismatch_count) {

3713

cds_comment +=

" has " 3714

+

s_Count

(cds_mismatch_count,

"substitution"

);

3716  if

(frameshift_count) {

3717

cds_comment += (cds_mismatch_count ?

", "

:

" has "

)

3718

+

s_Count

(frameshift_count,

"frameshift"

);

3720  if

(cds_indel_count) {

3721

cds_comment += (cds_mismatch_count || frameshift_count ?

", "

:

" has "

)

3722

+

s_Count

(cds_indel_count,

"non-frameshifting indel"

);

3724  if

(cds_pct_coverage < 100) {

3725  if

(cds_mismatch_count || cds_indel_count || frameshift_count) {

3726

cds_comment +=

" and"

;

3728

cds_comment +=

" aligns at " 3732

cds_comment +=

" compared to this genomic sequence"

;

3734  if

(start_codon_mismatch) {

3735

align_info->

AddField

(

"start_codon_mismatches"

, 1);

3738

rna_comment =

"The RefSeq transcript"

;

3739  if

(!mismatch_locs.

empty

()) {

3740

rna_comment +=

" has "

+

3744  if

(frameshift_count) {

3745

rna_comment += (mismatch_locs.

empty

() ?

" has "

:

", "

) +

3746  s_Count

(frameshift_count,

"frameshift"

);

3747

align_info->

AddField

(

"num_frameshifts"

, (

int

)frameshift_count);

3750

rna_comment += (mismatch_locs.

empty

() && !frameshift_count?

" has "

:

", "

) +

3751  s_Count

(indel_count,

"non-frameshifting indel"

);

3752

align_info->

AddField

(

"num_nonframeshift_indel"

, (

int

)indel_count);

3754  if

(partial_unaligned_section) {

3755  if

(!mismatch_locs.

empty

() || indel_count || frameshift_count) {

3756

rna_comment +=

" and"

;

3758

rna_comment +=

" aligns at " 3762  if

(rna_comment ==

"The RefSeq transcript"

) {

3763

rna_comment.clear();

3765

rna_comment +=

" compared to this genomic sequence"

;

3771

deleted_bases = 0, cds_deleted_bases = 0,

3775  "Delete locations should always be one base"

);

3776

deleted_bases += delete_sizes.

find

(delete_it->GetFrom())->second;

3779  for

(

TSeqPos

pos = insert_it->GetFrom();

3780

pos <= insert_it->GetTo(); ++pos)

3787  "Delete locations should always be one base"

);

3788

delete_codons.

insert

((delete_it->GetFrom() -

3790

cds_deleted_bases +=

3791

delete_sizes.

find

(delete_it->GetFrom())->second;

3798  switch

((*it)->GetAa().Which()) {

3800

aa = (*it)->GetAa().GetNcbieaa();

3805  string

src_string(1, (*it)->GetAa().GetNcbistdaa()),

3816  string

src_string(1, (*it)->GetAa().GetNcbi8aa()),

3833  unsigned

insert_codons_count =

Convert

(insert_codons.

size

()),

3834

delete_codons_count =

Convert

(delete_codons.

size

());

3835  if

(inserted_bases || deleted_bases) {

3838  if

(inserted_bases) {

3839

rna_comment +=

": inserted "

+

s_Count

(inserted_bases,

"base"

)

3840

+

" in "

+

s_Count

(insert_codons_count,

"codon"

);

3842  if

(deleted_bases) {

3844

+

" deleted "

+

s_Count

(deleted_bases,

"base"

)

3845

+

" in "

+

s_Count

(delete_codons_count,

"codon"

);

3847  if

(cds_inserted_bases || cds_deleted_bases || code_breaks) {

3850  if

(cds_inserted_bases) {

3851

cds_comment +=

": inserted "

+

s_Count

(cds_inserted_bases,

"base"

)

3852

+

" in "

+

s_Count

(insert_codons_count,

"codon"

);

3854  if

(cds_deleted_bases) {

3856

+

" deleted "

+

s_Count

(cds_deleted_bases,

"base"

)

3857

+

" in "

+

s_Count

(delete_codons_count,

"codon"

);

3861

+

" substituted "

+

s_Count

(code_breaks,

"base"

)

3862

+

" at "

+

s_Count

(code_breaks,

"genomic stop codon"

);

3877  if

(internal_unaligned > 0) {

3878

align_info->

AddField

(

"internal_unaligned"

, internal_unaligned);

3886

align_info->

AddField

(

"3prime_unaligned"

,

3887

(

int

)(product_length - align->

GetSeqStop

(0) - 1));

3891  if

(!rna_comment.empty()) {

3895 

}

else if

(rna_feat.

GetComment

().find(rna_comment) == string::npos) {

3896

rna_feat.

SetComment

() +=

"; "

+ rna_comment;

3899  if

(!cds_comment.empty()) {

3903 

}

else if

(cds_feat->

GetComment

().find(cds_comment) == string::npos) {

3904

cds_feat->

SetComment

() +=

"; "

+ cds_comment;

3907  if

(!align_info->

GetData

().empty()) {

3908

rna_feat.

AddExt

(align_info);

3926

}

else if

(feat.

GetComment

().find(comment) == string::npos) {

3932

comment =

" added "

+

s_Count

(insert_length,

"base"

) +

" not found in genome assembly"

;

3944  string

ensembl_match_rna, ensembl_match_cds;

3945

vector<string> keywords;

3949  for

(

CSeqdesc_CI

desc(rna_handle, desc_types); desc; ++desc) {

3950  if

(desc->IsGenbank() && desc->GetGenbank().IsSetKeywords()) {

3951  for

(

const string

&keyword : desc->GetGenbank().GetKeywords()) {

3953

(keyword ==

"MANE Select"

|| keyword ==

"MANE Plus" 3954

|| keyword ==

"MANE Plus Clinical"

))

3957  if

(keyword ==

"MANE Select"

) {

3958

keywords.push_back(

"RefSeq Select"

);

3959

}

else if

(keyword ==

"MANE Plus Clinical"

) {

3960

keywords.push_back(

"RefSeq Plus Clinical"

);

3963

keywords.push_back(keyword);

3966

}

else if

(desc->IsUser() &&

3967

desc->GetUser().HasField(

"MANE Ensembl match"

))

3971  "/"

, ensembl_match_rna, ensembl_match_cds);

3974

}

else if

(desc->IsUser() && desc->GetUser().GetType().IsStr() &&

3975

desc->GetUser().GetType().GetStr() ==

"RefGeneTracking"

&&

3976

need_location_check)

3978  if

(desc->GetUser().HasField(

"EnsemblLocation"

)) {

3980

desc->GetUser().GetField(

"EnsemblLocation"

));

3981

}

else if

(desc->GetUser().HasField(

"SelectGeneLocation"

)) {

3985

desc->GetUser().GetField(

"SelectGeneLocation"

)));

3990  if

((match_found >=

eOverlap

|| !need_location_check) && !keywords.empty())

3999  if

(match_found ==

eExact

&& !drop && !ensembl_match_rna.empty()) {

4001

rna_ensembl_ref->

SetDb

(

"Ensembl"

);

4002

rna_ensembl_ref->

SetTag

().SetStr(ensembl_match_rna);

4003

rna_feat.

SetDbxref

().push_back(rna_ensembl_ref);

4004  if

(cds_feat && !ensembl_match_cds.empty()) {

4006

cds_ensembl_ref->

SetDb

(

"Ensembl"

);

4007

cds_ensembl_ref->

SetTag

().SetStr(ensembl_match_cds);

4008

cds_feat->

SetDbxref

().push_back(cds_ensembl_ref);

4018  if

(!loc_field.

HasField

(

"seq_id"

) || !loc_field.

HasField

(

"from"

) ||

4022

+

" doesn't have expected fields"

);

4041  if

(!loc_genomic_acc.

Match

(genomic_acc) || loc_strand != align.

GetSeqStrand

(1))

4057  for

(

const string

&keyword : keywords) {

4059

qualifier->

SetQual

(

"tag"

);

4060

qualifier->

SetVal

(keyword);

4061

feat.

SetQual

().push_back(qualifier);

4085  id

->Assign(*loc1->

GetId

());

4091

merged_loc = left_loc;

4092

merged_loc->

Add

(*right_loc);

4093

merged_loc->

Add

(*loc1);

4095

merged_loc->

Add

(*loc2);

4105

x[1] += genomic_size;

4107

x[3] += genomic_size;

4110

x[0] += genomic_size;

4111

x[1] += genomic_size;

4112

}

else if

(x[3] < x[0]) {

4113

x[2] += genomic_size;

4114

x[3] += genomic_size;

4118

x[0] =

min

(x[0], x[2]);

4119

x[1] =

max

(x[1], x[3]) - genomic_size;

4135  id

->Assign(*loc.

GetId

());

4143

left_loc->

Add

(*it.GetRangeAsSeq_loc());

4145

right_loc->

Add

(*it.GetRangeAsSeq_loc());

4155  swap

(left_loc, right_loc);

4157

left_loc->

Add

(*right_loc);

4159  if

(no_gap_at_origin) {

4163  if

(interval.

GetFrom

() == 0) {

4166  if

(interval.

GetTo

() == genomic_size-1) {

4196  return

genomic_ids.

size

() > 1;

4206  const int

k_gap_length,

4207  const int

next_exon_start)

4209  if

(insert->

SetMix

().Set().size() > 1) {

4213  if

(insert->

SetMix

().Set().size() > 0) {

4214  int

half_intron_length = (next_exon_start - region_end)/2;

4215  int

copy_length =

min

(k_gap_length, half_intron_length);

4216

region_end += copy_length;

4218  if

(region_begin < region_end) {

4222

edited_sequence_seqloc->

SetMix

().Set().push_back(genome_loc);

4224  if

(copy_length < k_gap_length) {

4229

edited_sequence_seqloc->

SetMix

().Set().push_back(gap_loc);

4233

edited_sequence_seqloc->

SetMix

().Set().push_back(insert);

4236  if

(copy_length < k_gap_length) {

4239

edited_sequence_seqloc->

SetMix

().Set().push_back(gap_loc);

4243

region_begin = region_end;

4253  const CSeq_feat

* cds_feat_on_query_mrna_ptr,

4254  bool

call_on_align_list)

4258

align->

Assign

(input_align);

4289  const int

k_gap_length =

min

(1000,

int

(genomic_length));

4294

spliced_seg.

SetExons

().reverse();

4296  int

region_begin = 0;

4307  if

(!seqid.

Match

(*genomic_seqid)) {

4314

insert->

SetMix

().Set().push_back(loc);

4316  int

exon_length = exon_stop - exon_start +1;

4317

exon_stop = region_end + k_gap_length -1;

4318

exon_start = region_end + k_gap_length - exon_length;

4324  if

(!(region_end <= exon_start)) {

4337

region_end = exon_stop +1;

4356  if

(region_begin < (

int

)genomic_length) {

4359

genomic_length -1));

4360

edited_sequence_seqloc->

SetMix

().Set().push_back(genome_loc);

4365

spliced_seg.

SetExons

().reverse();

4373

seqentry->

SetSeq

(*bioseq);

4380

seq_desc->

Assign

(*desc);

4381

bioseq->

SetDescr

().Set().push_back(seq_desc);

4388

seq_desc->

Assign

(*desc);

4389

bioseq->

SetDescr

().Set().push_back(seq_desc);

4403

gene_feat = gene->second;

4411

call_on_align_list);

4413  m_scope

->RemoveBioseq(bioseq_handle);

4414

annot_local.

SetData

().SetFtable().clear();

4418  genes

[gene_id] = gene_feat;

4426  TSeqPos

cds_insert_length = 0;

4429

align->

Assign

(input_align);

4436  if

(!seqid.

Match

(*genomic_seqid)) {

4440  if

(cds_feat_on_query_mrna_ptr) {

4441  int

cds_intersection_len =

4447  if

(cds_intersection_len > 0) {

4448

cds_insert_length += cds_intersection_len;

4452

spliced_seg.

SetExons

().erase(it);

4460

gene_id, cds_feat_on_query_mrna_ptr,

4461

call_on_align_list);

4465

align->

Assign

(input_align);

4468  if

(entry.

IsSeq

() &&

4473

entry.

SetSeq

().

SetInst

().SetHist().SetAssembly().front() =

4484

it != annot_local.

SetData

().SetFtable().rend(); ++it) {

4486  if

(

f

.GetData().IsGene()) {

4490  if

(

f

.GetData().IsCdregion() && cds_insert_length==0) {

4502

annot.

SetData

().SetFtable().splice(annot.

SetData

().SetFtable().end(),

4503

annot_local.

SetData

().SetFtable());

User-defined methods of the data storage class.

@ eExtreme_Positional

numerical value

@ eExtreme_Biological

5' and 3'

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

size_t GetSize(void) const

const CSeq_id * GetFirstId() const

TFeatureGeneratorFlags GetFlags() const

CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)

Convert an alignment to an annotation.

unique_ptr< SImplementation > m_impl

void RecomputePartialFlags(objects::CSeq_annot &annot)

Recompute the correct partial states for all features in this annotation.

void SetMinIntron(TSeqPos)

EIntronStitchThresholdFlags

void SetFeatureExceptions(objects::CSeq_feat &feat, const objects::CSeq_align *align=NULL)

Correctly mark exceptions on a feature.

void SetFlags(TFeatureGeneratorFlags)

void SetAllowedUnaligned(TSeqPos)

CFeatureGenerator(CRef< objects::CScope > scope)

CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)

Clean an alignment according to our best guess of its biological representation.

void SetIntronStitchThresholdFlags(EIntronStitchThresholdFlags)

@ fGenerateStableLocalIds

@ fAddTranslatedCDSAssembly

int TFeatureGeneratorFlags

void SetPartialFlags(CRef< objects::CSeq_feat > gene_feat, CRef< objects::CSeq_feat > mrna_feat, CRef< objects::CSeq_feat > cds_feat)

Mark the correct partial states for a set of features.

void ConvertLocToAnnot(const objects::CSeq_loc &loc, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, objects::CCdregion::EFrame frame=objects::CCdregion::eFrame_one, CRef< objects::CSeq_id > prot_id=CRef< objects::CSeq_id >(), CRef< objects::CSeq_id > rna_id=CRef< objects::CSeq_id >())

Convert genomic location to an annotation.

@Gb_qual.hpp User-defined methods of the data storage class.

static const CTrans_table & GetTransTable(int id)

static void SetFeatureExceptions(objects::CSeq_feat &feat, objects::CScope &scope, const objects::CSeq_align *align=NULL)

Correctly mark exceptions on a feature.

static void CreateGeneModelFromAlign(const objects::CSeq_align &align, objects::CScope &scope, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, TGeneModelCreateFlags flags=fDefaults, TSeqPos allowed_unaligned=10)

Create a gene model from an alignment this will optionally promote all features through the alignment...

static void SetPartialFlags(objects::CScope &scope, CRef< objects::CSeq_feat > gene_feat, CRef< objects::CSeq_feat > mrna_feat, CRef< objects::CSeq_feat > cds_feat)

static void CreateGeneModelsFromAligns(const list< CRef< objects::CSeq_align > > &aligns, objects::CScope &scope, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, TGeneModelCreateFlags flags=fDefaults, TSeqPos allowed_unaligned=10)

int TGeneModelCreateFlags

static void RecomputePartialFlags(objects::CScope &scope, objects::CSeq_annot &annot)

position_type GetTo() const

position_type GetFrom() const

position_type GetCoveredLength(void) const

Returns total length covered by ranges in this collection, i.e.

double GetPercentCoverage(CScope &scope, const CSeq_align &align, unsigned query=0)

Compute percent coverage of the query (sequence 0) (range 0-100)

static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)

ESubtype GetSubtype(void) const

CRange< TSeqPos > GetSeqRange(TDim row) const

GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...

TSeqPos GetSeqStop(TDim row) const

CRef< CSeq_loc > CreateRowSeq_loc(TDim row) const

TDim CheckNumRows(void) const

Validatiors.

const CSeq_id & GetSeq_id(TDim row) const

Get seq-id (the first one if segments have different ids).

TSeqPos GetSeqStart(TDim row) const

ENa_strand GetSeqStrand(TDim row) const

Get strand (the first one if segments have different strands).

CSeq_feat_EditHandle –.

namespace ncbi::objects::

void AddExt(CRef< CUser_object > ext, TAddExt add_flags=0)

Add an extension by type in exts container.

void SetPartialStart(bool val, ESeqLocExtremes ext)

void SetPartialStop(bool val, ESeqLocExtremes ext)

Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.

static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())

TSeqPos InternalUnaligned(void) const

CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const

char GetStartResidue(int state) const

char GetCodonResidue(int state) const

static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)

Template class for iteration on objects of class C.

int GetInt(void) const

get value

const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const

Access a named field in this user field.

const string & GetString(void) const

bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const

Verify that a named field exists.

CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)

add a data field to the user object that holds a given value

container_type::iterator iterator

const_iterator end() const

iterator_bool insert(const value_type &val)

const_iterator find(const key_type &key) const

iterator_bool insert(const value_type &val)

CMappedFeat GetCdsOnMrna(const objects::CSeq_id &rna_id, CScope &scope)

static const char * str(char *buf, int n)

static bool s_Contains(const TSeqRange &range1, const TSeqRange &range2)

Check whether range1 contains range2.

bool IsProteinAlign(const CSeq_align &align)

void AddInsertWithGaps(CRef< CSeq_loc > &edited_sequence_seqloc, CSeq_id &genomic_seqid, int &region_begin, int &region_end, int &offset, CRef< CSeq_loc > &insert, const int k_gap_length, const int next_exon_start)

const char * k_except_text_for_gap_filled_gnomon_model

const char * k_cds_comment

void AddCodeBreak(CSeq_feat &feat, CSeq_loc &loc, char ncbieaa)

static void s_TransformToNucpos(CProduct_pos &pos)

const char * k_rna_comment

void AddLiteral(CSeq_inst &inst, const string &seq, CSeq_inst::EMol mol_class)

string ExtractGnomonModelNum(const CSeq_id &seq_id)

void RenameGeneratedBioseqs(const CSeq_id &query_rna_id, CSeq_id &transcribed_rna_id, CRef< CSeq_feat > cds_feat_on_query_mrna, CRef< CSeq_feat > cds_feat_on_genome_with_translated_product)

bool IsContinuous(const CSeq_loc &loc)

static string s_Count(unsigned num, const string &item_name)

static CRef< CSeq_loc > s_MapSingleAA(TSeqPos pos, CRef< CSeq_id > mapped_protein_id, const CRangeCollection< TSeqPos > &product_ranges, CRef< CSeq_loc_Mapper > to_mrna, CRef< CSeq_loc_Mapper > to_genomic)

static const CMolInfo * s_GetMolInfo(const CBioseq_Handle &handle)

Return the mol-info object for a given sequence.

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define ERASE_ITERATE(Type, Var, Cont)

Non-constant version with ability to erase current element, if container permits.

int TSignedSeqPos

Type for signed sequence position.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

const TSeqPos kInvalidSeqPos

Define special value for invalid sequence position.

void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)

TValue Add(int delta) THROWS_NONE

Atomically add value (=delta), and return new counter value.

#define NCBI_ASSERT(expr, mess)

#define ERR_POST(message)

Error posting with file, line number information but without error codes.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

void Warning(CExceptionArgs_Base &args)

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Set object to copy of another one.

const TPrim & Get(void) const

const string AsFastaString(void) const

static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)

Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...

string GetSeqIdString(bool with_version=false) const

Return seqid string with optional version for text seqid type.

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Optimized implementation of CSerialObject::Assign, which is not so efficient.

CConstRef< CSeq_id > GetSeqId(void) const

EAccessionInfo

For IdentifyAccession (below)

CSeq_id::EAccessionInfo IdentifyAccession(void) const

CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)

Reassign based on flat specifications; arguments interpreted as with constructors.

bool Match(const CSeq_id &sid2) const

Match() - TRUE if SeqIds are equivalent.

static CSeq_id_Handle GetHandle(const CSeq_id &id)

Normal way of getting a handle, works for any seq-id.

const CTextseq_id * GetTextseq_Id(void) const

Return embedded CTextseq_id, if any.

void SetPacked_int(TPacked_int &v)

bool IsPartialStart(ESeqLocExtremes ext) const

check start or stop of location for e_Lim fuzz

ENa_strand GetStrand(void) const

Get the location's strand.

void ChangeToPackedInt(void)

Works only if location is currently an interval, point, packed-int (handled trivially),...

bool IsReverseStrand(void) const

Return true if all ranges have reverse strand.

void FlipStrand(void)

Flip the strand (e.g. plus to minus)

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Override Assign() to incorporate cache invalidation.

void SetId(CSeq_id &id)

set the 'id' field in all parts of this location

TRange GetTotalRange(void) const

TSeqPos GetStart(ESeqLocExtremes ext) const

Return start and stop positions of the seq-loc.

CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const

All functions create and return a new seq-loc object.

const_iterator end(void) const

const_iterator begin(void) const

int Compare(const CSeq_loc &loc) const

void Add(const CSeq_loc &other)

Simple adding of seq-locs.

const CSeq_id * GetId(void) const

Get the id of the location return NULL if has multiple ids or no id at all.

void SetPartialStart(bool val, ESeqLocExtremes ext)

set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)

CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const

Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.

void SetStrand(ENa_strand strand)

Set the strand for all of the location's ranges.

void SetPartialStop(bool val, ESeqLocExtremes ext)

bool IsPartialStop(ESeqLocExtremes ext) const

TSeqPos GetStop(ESeqLocExtremes ext) const

@ eOrder_Biological

Iterate sub-locations in positional order.

const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)

If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...

TSeqPos GetLength(const CSeq_id &id, CScope *scope)

Get sequence length if scope not null, else return max possible TSeqPos.

bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)

Determines if two CSeq_ids represent the same CBioseq.

const CMolInfo * GetMolInfo(const CBioseq &bioseq)

Retrieve the MolInfo object for a given bioseq handle.

const COrg_ref * GetOrg_refOrNull(const CBioseq_Handle &handle)

Return the pointer to org-ref associated with a given sequence or null if there is no org-ref associa...

const COrg_ref & GetOrg_ref(const CBioseq_Handle &handle)

Return the org-ref associated with a given sequence.

static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)

Translate a string using a specified genetic code.

@ fIs5PrimePartial

= 0x4 Translate first codon even if not start codon (because sequence is 5' partial)

@ eGetId_Best

return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function

@ eGetId_ForceAcc

return only an accession based seq-id

CRef< CSeq_loc > Map(const CSeq_loc &src_loc)

Map seq-loc.

static CRef< CObjectManager > GetInstance(void)

Return the existing object manager or create one.

CBioseq_Handle GetBioseqHandle(const CSeq_id &id)

Get bioseq handle by seq-id.

CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)

Add Seq-annot, return its CSeq_annot_Handle.

CSeq_loc_Mapper_Base & SetMergeAll(void)

Merge any abutting or overlapping intervals.

@ eProductToLocation

Map from the feature's product to location.

@ eLocationToProduct

Map from the feature's location to product.

@ fAlign_Dense_seg_TotalRange

Ignore internal dense-seg structure - map each dense-seg according to the total ranges involved.

vector< CSeq_id_Handle > TId

const CTSE_Handle & GetTSE_Handle(void) const

Get CTSE_Handle of containing TSE.

CConstRef< CBioseq > GetCompleteBioseq(void) const

Get the complete bioseq.

bool IsSetDbxref(void) const

virtual CConstRef< CSeq_feat > GetSeq_feat(void) const

const CSeqFeatData & GetData(void) const

TSeqPos GetBioseqLength(void) const

CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const

CConstRef< CSeq_id > GetSeqId(void) const

Get id which can be used to access this bioseq handle Throws an exception if none is available.

TInst_Topology GetInst_Topology(void) const

CSeq_annot_EditHandle GetEditHandle(void) const

Get 'edit' version of handle.

const CSeq_feat::TDbxref & GetDbxref(void) const

CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const

Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...

const TId & GetId(void) const

@ eCoding_Ncbi

Set coding to binary coding (Ncbi4na or Ncbistdaa)

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

SAnnotSelector & ExcludeFeatSubtype(TFeatSubtype subtype)

Exclude feature subtype from the search.

SAnnotSelector & SetResolveAll(void)

SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).

TSeqPos GetPos(void) const

const CSeq_loc & GetLocation(void) const

const CSeq_feat & GetOriginalFeature(void) const

Get original feature with unmapped location/product.

SAnnotSelector & SetAdaptiveDepth(bool value=true)

SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...

const CSeq_feat & GetMappedFeature(void) const

Feature mapped to the master sequence.

SAnnotSelector & SetResolveNone(void)

SetResolveNone() is equivalent to SetResolveMethod(eResolve_None).

bool IsInGap(TSeqPos pos) const

true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...

const CSeqMap & GetSeqMap(void) const

void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const

Fill the buffer string with the sequence data for the interval [start, stop).

const_iterator begin(void) const

const_iterator end(void) const

TObjectType * GetPointer(void) const THROWS_NONE

Get pointer,.

bool IsNull(void) const THROWS_NONE

Check if pointer is null – same effect as Empty().

bool NotNull(void) const THROWS_NONE

Check if pointer is not null – same effect as NotEmpty().

TObjectType * GetPointer(void) THROWS_NONE

Get pointer,.

void Reset(void)

Reset reference object.

void Reset(void)

Reset reference object.

bool NotNull(void) const THROWS_NONE

Check if pointer is not null – same effect as NotEmpty().

TObjectType * GetNonNullPointer(void) const

Get pointer value and throw a null pointer exception if pointer is null.

int64_t Int8

8-byte (64-bit) signed integer

position_type GetLength(void) const

bool NotEmpty(void) const

bool IntersectingWith(const TThisType &r) const

CRange< TSeqPos > TSeqRange

typedefs for sequence ranges

static TThisType GetWhole(void)

static position_type GetWholeTo(void)

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

NCBI_NS_STD::string::size_type SIZE_TYPE

static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)

Convert size_t to string.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)

Check if a string ends with a specified suffix value.

static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string (in-place)

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static string Join(const TContainer &arr, const CTempString &delim)

Join strings using the specified delimiter.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)

Split a string into two pieces using the specified delimiters.

static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive equality of a substring with another string.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

@ fSplit_Tokenize

All delimiters are merged and trimmed, to get non-empty tokens only.

string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const

Transform time to string.

@ eCurrent

Use current time. See also CCurrentTime.

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

bool IsSetLocus(void) const

Official gene symbol Check if a value has been assigned to Locus data member.

const TLocus & GetLocus(void) const

Get the Locus member data.

const TTag & GetTag(void) const

Get the Tag member data.

void SetTag(TTag &value)

Assign a value to Tag data member.

bool IsId(void) const

Check if variant Id is selected.

bool CanGetDb(void) const

Check if it is safe to call GetDb method.

const TDb & GetDb(void) const

Get the Db member data.

const TStr & GetStr(void) const

Get the variant data.

TStr & SetStr(void)

Select the variant.

const TData & GetData(void) const

Get the Data member data.

void SetType(TType &value)

Assign a value to Type data member.

const TLabel & GetLabel(void) const

Get the Label member data.

void SetDb(const TDb &value)

Assign a value to Db data member.

TId GetId(void) const

Get the variant data.

@ eLim_circle

artificial break at origin of circle

const TTaxname & GetTaxname(void) const

Get the Taxname member data.

bool IsSetTaxname(void) const

preferred formal name Check if a value has been assigned to Taxname data member.

const TName & GetName(void) const

Get the Name member data.

bool IsSetName(void) const

protein name Check if a value has been assigned to Name data member.

TType GetType(void) const

Get the Type member data.

EType

type of RNA feature

bool CanGetType(void) const

Check if it is safe to call GetType method.

@ eType_scRNA

will become ncRNA, with RNA-gen.class = scRNA

@ eType_snoRNA

will become ncRNA, with RNA-gen.class = snoRNA

@ eType_ncRNA

non-coding RNA; subsumes snRNA, scRNA, snoRNA

@ eType_snRNA

will become ncRNA, with RNA-gen.class = snRNA

const TDonor_after_exon & GetDonor_after_exon(void) const

Get the Donor_after_exon member data.

void SetGenomic_id(TGenomic_id &value)

Assign a value to Genomic_id data member.

TNucpos & SetNucpos(void)

Select the variant.

const TGenomic_id & GetGenomic_id(void) const

Get the Genomic_id member data.

bool IsSetParts(void) const

basic seqments always are in biologic order Check if a value has been assigned to Parts data member.

bool CanGetGenomic_id(void) const

Check if it is safe to call GetGenomic_id method.

void ResetGenomic_id(void)

Reset Genomic_id data member.

TGenomic_start GetGenomic_start(void) const

Get the Genomic_start member data.

const TAcceptor_before_exon & GetAcceptor_before_exon(void) const

Get the Acceptor_before_exon member data.

bool CanGetGenomic_id(void) const

Check if it is safe to call GetGenomic_id method.

void SetSegs(TSegs &value)

Assign a value to Segs data member.

bool IsSetAcceptor_before_exon(void) const

splice sites Check if a value has been assigned to Acceptor_before_exon data member.

TExons & SetExons(void)

Assign a value to Exons data member.

TProduct_length GetProduct_length(void) const

Get the Product_length member data.

bool IsSetPoly_a(void) const

start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...

void SetDim(TDim value)

Assign a value to Dim data member.

TProduct_type GetProduct_type(void) const

Get the Product_type member data.

TGenomic_strand GetGenomic_strand(void) const

Get the Genomic_strand member data.

void ResetGenomic_strand(void)

Reset Genomic_strand data member.

bool IsSetGenomic_strand(void) const

genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...

void SetGenomic_start(TGenomic_start value)

Assign a value to Genomic_start data member.

void SetType(TType value)

Assign a value to Type data member.

const TParts & GetParts(void) const

Get the Parts member data.

const TProduct_start & GetProduct_start(void) const

Get the Product_start member data.

const TProduct_end & GetProduct_end(void) const

Get the Product_end member data.

const TSpliced & GetSpliced(void) const

Get the variant data.

bool CanGetSegs(void) const

Check if it is safe to call GetSegs method.

list< CRef< CSpliced_seg_modifier > > TModifiers

bool CanGetGenomic_strand(void) const

Check if it is safe to call GetGenomic_strand method.

bool IsSetGenomic_strand(void) const

Check if a value has been assigned to Genomic_strand data member.

list< CRef< CSpliced_exon > > TExons

const TExons & GetExons(void) const

Get the Exons member data.

TGenomic_strand GetGenomic_strand(void) const

Get the Genomic_strand member data.

void SetGenomic_id(TGenomic_id &value)

Assign a value to Genomic_id data member.

void SetGenomic_end(TGenomic_end value)

Assign a value to Genomic_end data member.

const TBases & GetBases(void) const

Get the Bases member data.

list< CRef< CSpliced_exon_chunk > > TParts

bool IsSetProduct_length(void) const

length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...

TPoly_a GetPoly_a(void) const

Get the Poly_a member data.

TGenomic_end GetGenomic_end(void) const

Get the Genomic_end member data.

void SetGenomic_strand(TGenomic_strand value)

Assign a value to Genomic_strand data member.

bool IsSpliced(void) const

Check if variant Spliced is selected.

bool CanGetPoly_a(void) const

Check if it is safe to call GetPoly_a method.

const TModifiers & GetModifiers(void) const

Get the Modifiers member data.

TNucpos GetNucpos(void) const

Get the variant data.

const TSegs & GetSegs(void) const

Get the Segs member data.

const TGenomic_id & GetGenomic_id(void) const

Get the Genomic_id member data.

bool IsSetDonor_after_exon(void) const

Check if a value has been assigned to Donor_after_exon data member.

bool CanGetGenomic_strand(void) const

Check if it is safe to call GetGenomic_strand method.

@ e_Product_ins

insertion in product sequence (i.e. gap in the genomic sequence)

@ e_Diag

both sequences are represented, there is sufficient similarity between product and genomic sequences....

@ e_Genomic_ins

insertion in genomic sequence (i.e. gap in the product sequence)

@ e_Match

both sequences represented, product and genomic sequences match

@ e_Mismatch

both sequences represented, product and genomic sequences do not match

@ eProduct_type_transcript

void SetAa(TAa &value)

Assign a value to Aa data member.

TXref & SetXref(void)

Assign a value to Xref data member.

void SetQual(const TQual &value)

Assign a value to Qual data member.

bool IsSetComment(void) const

Check if a value has been assigned to Comment data member.

vector< CRef< CDbtag > > TDbxref

TDbxref & SetDbxref(void)

Assign a value to Dbxref data member.

void ResetPartial(void)

Reset Partial data member.

bool IsSetData(void) const

the specific data Check if a value has been assigned to Data data member.

bool IsSetQual(void) const

qualifiers Check if a value has been assigned to Qual data member.

E_Choice Which(void) const

Which variant is currently selected.

bool IsSetCode(void) const

genetic code used Check if a value has been assigned to Code data member.

void SetLocation(TLocation &value)

Assign a value to Location data member.

bool IsCdregion(void) const

Check if variant Cdregion is selected.

TIds & SetIds(void)

Assign a value to Ids data member.

const TLoc & GetLoc(void) const

Get the Loc member data.

void SetComment(const TComment &value)

Assign a value to Comment data member.

void ResetExcept(void)

Reset Except data member.

void ResetCode_break(void)

Reset Code_break data member.

void SetPartial(TPartial value)

Assign a value to Partial data member.

void SetProduct(TProduct &value)

Assign a value to Product data member.

const TQual & GetQual(void) const

Get the Qual member data.

bool IsSetPartial(void) const

incomplete in some way? Check if a value has been assigned to Partial data member.

const TLocal & GetLocal(void) const

Get the variant data.

void ResetExcept_text(void)

Reset Except_text data member.

bool IsSetXref(void) const

cite other relevant features Check if a value has been assigned to Xref data member.

const TLocation & GetLocation(void) const

Get the Location member data.

void SetExcept(TExcept value)

Assign a value to Except data member.

bool IsLocal(void) const

Check if variant Local is selected.

TLocal & SetLocal(void)

Select the variant.

bool IsGene(void) const

Check if variant Gene is selected.

void ResetId(void)

Reset Id data member.

list< CRef< CCode_break > > TCode_break

TFrame GetFrame(void) const

Get the Frame member data.

const TData & GetData(void) const

Get the Data member data.

const TExcept_text & GetExcept_text(void) const

Get the Except_text member data.

bool IsSetExcept_text(void) const

explain if except=TRUE Check if a value has been assigned to Except_text data member.

const TCode & GetCode(void) const

Get the Code member data.

const TDbxref & GetDbxref(void) const

Get the Dbxref member data.

void SetData(TData &value)

Assign a value to Data data member.

TCode_break & SetCode_break(void)

Assign a value to Code_break data member.

const TCdregion & GetCdregion(void) const

Get the variant data.

void SetLoc(TLoc &value)

Assign a value to Loc data member.

const TProduct & GetProduct(void) const

Get the Product member data.

const TComment & GetComment(void) const

Get the Comment member data.

void SetVal(const TVal &value)

Assign a value to Val data member.

const TGene & GetGene(void) const

Get the variant data.

void SetExcept_text(const TExcept_text &value)

Assign a value to Except_text data member.

const TXref & GetXref(void) const

Get the Xref member data.

vector< CRef< CSeqFeatXref > > TXref

bool CanGetProduct(void) const

Check if it is safe to call GetProduct method.

const TRna & GetRna(void) const

Get the variant data.

bool IsSetDbxref(void) const

support for xref to other databases Check if a value has been assigned to Dbxref data member.

TQual & SetQual(void)

Assign a value to Qual data member.

const TCode_break & GetCode_break(void) const

Get the Code_break member data.

bool IsSetProduct(void) const

product of process Check if a value has been assigned to Product data member.

bool IsRna(void) const

Check if variant Rna is selected.

void ResetQual(void)

Reset Qual data member.

bool IsSetCode_break(void) const

individual exceptions Check if a value has been assigned to Code_break data member.

bool IsSetFrame(void) const

Check if a value has been assigned to Frame data member.

@ e_not_set

No variant selected.

@ eFrame_three

reading frame

@ e_MaxChoice

== e_Variation+1

@ e_Ncbi8aa

NCBI8aa code.

@ e_Ncbieaa

ASCII value of NCBIeaa code.

void SetTo(TTo value)

Assign a value to To data member.

bool IsMix(void) const

Check if variant Mix is selected.

bool IsEmpty(void) const

Check if variant Empty is selected.

list< CRef< CSeq_interval > > Tdata

ENa_strand

strand of nucleic acid

bool IsOther(void) const

Check if variant Other is selected.

void SetId(TId &value)

Assign a value to Id data member.

TFrom GetFrom(void) const

Get the From member data.

bool IsGeneral(void) const

Check if variant General is selected.

list< CRef< CSeq_loc > > Tdata

bool IsEquiv(void) const

Check if variant Equiv is selected.

E_Choice Which(void) const

Which variant is currently selected.

void SetFrom(TFrom value)

Assign a value to From data member.

const Tdata & Get(void) const

Get the member data.

TVersion GetVersion(void) const

Get the Version member data.

const Tdata & Get(void) const

Get the member data.

const TEquiv & GetEquiv(void) const

Get the variant data.

const TOther & GetOther(void) const

Get the variant data.

void SetFuzz_to(TFuzz_to &value)

Assign a value to Fuzz_to data member.

void SetFuzz_from(TFuzz_from &value)

Assign a value to Fuzz_from data member.

bool IsLocal(void) const

Check if variant Local is selected.

TStrand GetStrand(void) const

Get the Strand member data.

const TGeneral & GetGeneral(void) const

Get the variant data.

TTo GetTo(void) const

Get the To member data.

bool IsInt(void) const

Check if variant Int is selected.

const TInt & GetInt(void) const

Get the variant data.

bool IsNull(void) const

Check if variant Null is selected.

void SetStrand(TStrand value)

Assign a value to Strand data member.

const TMix & GetMix(void) const

Get the variant data.

const TAccession & GetAccession(void) const

Get the Accession member data.

@ e_not_set

No variant selected.

const TSeq & GetSeq(void) const

Get the variant data.

bool IsSetClass(void) const

Check if a value has been assigned to Class data member.

TClass GetClass(void) const

Get the Class member data.

bool IsSeq(void) const

Check if variant Seq is selected.

void SetClass(TClass value)

Assign a value to Class data member.

list< CRef< CSeq_entry > > TSeq_set

TSeq & SetSeq(void)

Select the variant.

TSeq_set & SetSeq_set(void)

Assign a value to Seq_set data member.

@ eClass_nuc_prot

nuc acid and coded proteins

void SetCompleteness(TCompleteness value)

Assign a value to Completeness data member.

TLiteral & SetLiteral(void)

Select the variant.

void SetLength(TLength value)

Assign a value to Length data member.

void SetData(TData &value)

Assign a value to Data data member.

TId & SetId(void)

Assign a value to Id data member.

bool CanGetBiomol(void) const

Check if it is safe to call GetBiomol method.

const TInst & GetInst(void) const

Get the Inst member data.

void SetSeq_data(TSeq_data &value)

Assign a value to Seq_data data member.

bool IsSetAssembly(void) const

how was this assembled? Check if a value has been assigned to Assembly data member.

TTopology GetTopology(void) const

Get the Topology member data.

const TIupacna & GetIupacna(void) const

Get the variant data.

void SetExt(TExt &value)

Assign a value to Ext data member.

void SetHist(THist &value)

Assign a value to Hist data member.

const TLiteral & GetLiteral(void) const

Get the variant data.

TAnnot & SetAnnot(void)

Assign a value to Annot data member.

bool IsSetHist(void) const

sequence history Check if a value has been assigned to Hist data member.

bool IsSetExt(void) const

extensions for special types Check if a value has been assigned to Ext data member.

bool IsSetInst(void) const

the sequence data Check if a value has been assigned to Inst data member.

TLength GetLength(void) const

Get the Length member data.

TLength GetLength(void) const

Get the Length member data.

void SetInst(TInst &value)

Assign a value to Inst data member.

const THist & GetHist(void) const

Get the Hist member data.

const TExt & GetExt(void) const

Get the Ext member data.

TBiomol GetBiomol(void) const

Get the Biomol member data.

void SetBiomol(TBiomol value)

Assign a value to Biomol data member.

EMol

molecule class in living organism

void SetDescr(TDescr &value)

Assign a value to Descr data member.

const TDelta & GetDelta(void) const

Get the variant data.

const TNcbi4na & GetNcbi4na(void) const

Get the variant data.

void SetRepr(TRepr value)

Assign a value to Repr data member.

list< CRef< CSeq_feat > > TFtable

const TNcbi2na & GetNcbi2na(void) const

Get the variant data.

const Tdata & Get(void) const

Get the member data.

bool IsLiteral(void) const

Check if variant Literal is selected.

bool IsSetSeq_data(void) const

may have the data Check if a value has been assigned to Seq_data data member.

void SetLength(TLength value)

Assign a value to Length data member.

void SetSeq_data(TSeq_data &value)

Assign a value to Seq_data data member.

void ResetExt(void)

Reset Ext data member.

const TSeq_data & GetSeq_data(void) const

Get the Seq_data member data.

const TNcbi8na & GetNcbi8na(void) const

Get the variant data.

const TMolinfo & GetMolinfo(void) const

Get the variant data.

TMolinfo & SetMolinfo(void)

Select the variant.

const TSeq_data & GetSeq_data(void) const

Get the Seq_data member data.

void SetMol(TMol value)

Assign a value to Mol data member.

void ResetSeq_data(void)

Reset Seq_data data member.

E_Choice Which(void) const

Which variant is currently selected.

@ eRepr_delta

sequence made by changes (delta) to others

@ eRepr_raw

continuous sequence

@ eCompleteness_complete

complete biological entity

@ eCompleteness_no_left

missing 5' or NH3 end

@ eCompleteness_partial

partial but no details given

@ eCompleteness_no_right

missing 3' or COOH end

@ eCompleteness_no_ends

missing both ends

@ e_Ncbi2na

2 bit nucleic acid code

@ e_Iupacna

IUPAC 1 letter nuc acid code.

@ e_Ncbi8na

8 bit extended nucleic acid code

@ e_Ncbi4na

4 bit nucleic acid code

@ e_Iupacaa

IUPAC 1 letter amino acid code.

@ eBiomol_pre_RNA

precursor RNA of any sort really

@ eBiomol_snoRNA

small nucleolar RNA

@ eBiomol_transcribed_RNA

transcribed RNA other than existing classes

@ e_Org

if all from one organism

@ e_User

user defined object

@ e_Genbank

GenBank specific info.

@ e_Molinfo

info on the molecule and techniques

@ e_Source

source of materials, includes Org-ref

bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept

Returs GAP block length.

unsigned int

A callback function used to compare two keys in a database.

where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole

static void text(MDB_val *v)

constexpr auto sort(_Init &&init)

constexpr bool empty(list< Ts... >) noexcept

int GetGeneticCode(const CBioseq_Handle &bsh)

const GenericPointer< typename T::ValueType > T2 value

Defines: CTimeFormat - storage class for time format.

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)

void IncludeSourceLocs(bool b=true)

CRef< CSeq_loc > Map(const CSeq_loc &loc)

CSeq_align::TDim GetRnaRow() const

CSeq_align::TDim GetGenomicRow() const

SMapper(const CSeq_align &aln, CScope &scope, TSeqPos allowed_unaligned=10, CSeq_loc_Mapper::TMapOptions opts=0)

CSeq_align::TDim m_genomic_row

CRef< CSeq_loc > x_GetLocFromSplicedExons(const CSeq_align &aln) const

This has special logic to set partialness based on alignment properties In addition,...

const CSeq_loc & GetRnaLoc()

CRef< CSeq_loc_Mapper > x_Mapper()

TSeqPos m_allowed_unaligned

CRef< CSeq_feat > x_MapFeature(const objects::CSeq_feat *feature_on_mrna, const CSeq_align &align, CRef< CSeq_loc > loc, CSeq_loc_Mapper::TMapOptions opts, TSeqPos &offset)

void x_AddKeywordQuals(CSeq_feat &feat, const vector< string > &keywords)

CRef< CSeq_feat > x_CreateCdsFeature(CConstRef< CSeq_feat > cds_feat_on_query_mrna, CRef< objects::CSeq_feat > cds_feat_on_transcribed_mrna, list< CRef< CSeq_loc > > &transcribed_mrna_seqloc_refs, const CSeq_align &align, CRef< CSeq_loc > loc, const CTime &time, size_t model_num, CBioseq_set &seqs, CSeq_loc_Mapper::TMapOptions opts)

CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align_in)

void MaximizeTranslation(objects::CSeq_align &align)

vector< SExon > GetExons(const CSeq_align &align)

CRef< CSeq_loc > MergeSeq_locs(const CSeq_loc *loc1, const CSeq_loc *loc2=NULL)

void RecalculateScores(CSeq_align &align)

void x_SetCommentForGapFilledModel(CSeq_feat &feat, TSeqPos insert_length)

void RecomputePartialFlags(objects::CSeq_annot &annot)

void x_SetQualForGapFilledModel(CSeq_feat &feat, CSeq_id_Handle id)

TFeatureGeneratorFlags m_flags

void x_CollectMrnaSequence(CSeq_inst &inst, const CSeq_align &align, const CSeq_loc &loc, bool add_unaligned_parts=true, bool mark_transcript_deletions=true, bool *has_gap=NULL, bool *has_indel=NULL)

void x_CopyAdditionalFeatures(const CBioseq_Handle &handle, SMapper &mapper, CSeq_annot &annot)

SImplementation(objects::CScope &scope)

void x_CreateGeneFeature(CRef< CSeq_feat > &gene_feat, const CBioseq_Handle &handle, SMapper &mapper, CRef< CSeq_loc > loc, const CSeq_id &genomic_id, Int8 gene_id=0)

CRef< CSeq_id > x_CreateMrnaBioseq(const CSeq_align &align, CConstRef< CSeq_loc > loc, const CTime &time, size_t model_num, CBioseq_set &seqs, CConstRef< CSeq_feat > cds_feat_on_query_mrna, CRef< CSeq_feat > &cds_feat_on_transcribed_mrna)

void SetPartialFlags(CRef< CSeq_feat > gene_feat, CRef< CSeq_feat > mrna_feat, CRef< CSeq_feat > cds_feat)

const CBioseq & x_CreateProteinBioseq(CSeq_loc *cds_loc, CRef< CSeq_feat > cds_feat_on_transcribed_mrna, list< CRef< CSeq_loc > > &transcribed_mrna_seqloc_refs, const CTime &time, size_t model_num, CBioseq_set &seqs)

void ClearScores(CSeq_align &align)

string x_ConstructRnaName(const CBioseq_Handle &handle)

void x_SetComment(CSeq_feat &rna_feat, CSeq_feat *cds_feat, const CSeq_feat *cds_feat_on_mrna, const CSeq_align *align, const CRangeCollection< TSeqPos > &mismatch_locs, const CRangeCollection< TSeqPos > &insert_locs, const CRangeCollection< TSeqPos > &delete_locs, map< TSeqPos, TSeqPos > &delete_sizes, bool partial_unaligned_edge)

void SetFeatureExceptions(objects::CSeq_feat &feat, const objects::CSeq_align *align, objects::CSeq_feat *cds_feat=NULL, const objects::CSeq_feat *cds_feat_on_query_mrna=NULL, const objects::CSeq_feat *cds_feat_on_transcribed_mrna=NULL, list< CRef< CSeq_loc > > *transcribed_mrna_seqloc_refs=NULL, TSeqPos *clean_match_count=NULL)

void x_AddSelectMarkup(const CSeq_align &align, const CBioseq_Handle &rna_handle, const CSeq_id &genomic_acc, CSeq_feat &rna_feat, CSeq_feat *cds_feat)

void x_CheckInconsistentDbxrefs(CConstRef< CSeq_feat > gene_feat, CConstRef< CSeq_feat > cds_feat)

void x_HandleRnaExceptions(CSeq_feat &feat, const CSeq_align *align, CSeq_feat *cds_feat, const CSeq_feat *cds_feat_on_mrna)

Handle feature exceptions.

CRef< CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id, const objects::CSeq_feat *cdregion, bool call_on_align_list)

void x_HandleCdsExceptions(CSeq_feat &feat, const CSeq_align *align, const CSeq_feat *cds_feat_on_query_mrna, const CSeq_feat *cds_feat_on_transcribed_mrna, list< CRef< CSeq_loc > > *transcribed_mrna_seqloc_refs, TSeqPos *clean_match_count)

CRef< CSeq_feat > x_CreateMrnaFeature(CRef< CSeq_loc > loc, const CSeq_id &query_rna_id, CSeq_id &transcribed_rna_id, CConstRef< CSeq_feat > cds_feat_on_query_mrna)

bool x_RequiresPolyAForStopCodon(const objects::CSeq_id &mrna)

bool HasMixedGenomicIds(const CSeq_align &input_align)

e_MatchType x_CheckMatch(const CSeq_align &align, const CSeq_id &genomic_acc, const CUser_field &loc_field)

void x_SetExceptText(CSeq_feat &feat, const string &except_text)

void TrimHolesToCodons(objects::CSeq_align &align)

CRef< CSeq_loc > FixOrderOfCrossTheOriginSeqloc(const CSeq_loc &loc, TSeqPos outside_point, CSeq_loc::TOpFlags flags=CSeq_loc::fSort)

CRef< CSeq_feat > x_CreateNcRnaFeature(const objects::CSeq_feat *ncrnafeature_on_mrna, const CSeq_align &align, CConstRef< CSeq_loc > loc, CSeq_loc_Mapper::TMapOptions opts)

void TransformProteinAlignToTranscript(CConstRef< CSeq_align > &align, CRef< CSeq_feat > &cd_feat)

CRef< CSeq_feat > ConvertMixedAlignToAnnot(const CSeq_align &input_align, CSeq_annot &annot, CBioseq_set &seqs, Int8 gene_id, const CSeq_feat *cds_feat_on_query_mrna_ptr, bool call_on_align_list)

CRef< objects::CScope > m_scope

void StitchSmallHoles(objects::CSeq_align &align)

const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4