Google

Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

gnSequence.cpp

Go to the documentation of this file.
00001 
00002 // File:            gnSequence.h
00003 // Purpose:         Source Seq, where sequence is stored in source file
00004 // Description:     implements the baseSeq interface for dna source files.
00005 // Changes:        
00006 // Version:         libGenome 0.1.0 
00007 // Author:          Aaron Darling 
00008 // Last Edited:     April 15, 2001, 11:13:00pm 
00009 // Modified by:     
00010 // Copyright:       (c) Aaron Darling 
00011 // Licenses:        Proprietary 
00013 #include "gn/gnSequence.h"
00014 #include "gn/gnSourceFactory.h"
00015 #include "gn/gnBaseSource.h"
00016 #include "gn/gnGenomeSpec.h"
00017 #include "gn/gnFragmentSpec.h"
00018 #include "gn/gnSourceSpec.h"
00019 #include "gn/gnStringSpec.h"
00020 #include "gn/gnStringHeader.h"
00021 
00022 // Constructor
00023 gnSequence::gnSequence( )
00024 {
00025         spec = new gnGenomeSpec();
00026         comparator = gnCompare::DNASeqCompare();
00027 }
00028 gnSequence::gnSequence( const gnSeqC* seq){
00029         spec = new gnGenomeSpec();
00030         if (seq[0] != 0){
00031                 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00032                 spec->AddSpec(fragSpec);
00033                 fragSpec->AddSpec(new gnStringSpec(seq));
00034         }
00035         comparator = gnCompare::DNASeqCompare();
00036 }
00037 gnSequence::gnSequence( const string& str){
00038         spec = new gnGenomeSpec();
00039         if (str.length() != 0){
00040                 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00041                 spec->AddSpec(fragSpec);
00042                 fragSpec->AddSpec(new gnStringSpec(str));
00043         }
00044         comparator = gnCompare::DNASeqCompare();
00045 }
00046 gnSequence::gnSequence( const gnGenomeSpec& gngs){
00047         spec = gngs.Clone();
00048         comparator = gnCompare::DNASeqCompare();
00049 }
00050 gnSequence::gnSequence( const gnFragmentSpec& gnfs){
00051         spec = new gnGenomeSpec();
00052         spec->AddSpec(gnfs.Clone());
00053         comparator = gnCompare::DNASeqCompare();
00054 }
00055 gnSequence::gnSequence( const gnContigSpec& gcs){
00056         spec = new gnGenomeSpec();
00057         gnFragmentSpec *fragSpec = new gnFragmentSpec();
00058         fragSpec->AddSpec(gcs.Clone());
00059         comparator = gnCompare::DNASeqCompare();
00060 }
00061 gnSequence::gnSequence( gnSeqC *bases, gnSeqI length){
00062         spec = new gnGenomeSpec();
00063         if (length > 0){
00064                 gnFragmentSpec *fragSpec = new gnFragmentSpec();
00065                 spec->AddSpec(fragSpec);
00066                 fragSpec->AddSpec(new gnStringSpec(string(bases, length)));
00067         }
00068         comparator = gnCompare::DNASeqCompare();
00069 }       
00070 gnSequence::gnSequence( const gnSequence& seq )
00071 {
00072         spec = seq.spec->Clone();
00073         filter_list = seq.filter_list;
00074         comparator = seq.comparator;
00075 }
00076 // Destructor
00077 gnSequence::~gnSequence()
00078 {
00079         if (spec != NULL) delete spec;
00080 }
00081 
00082 gnSequence& gnSequence::operator=( const gnSequence& seq){
00083         spec = seq.spec->Clone();
00084         filter_list = seq.filter_list;
00085         comparator = seq.comparator;
00086         return *this;
00087 }
00088 
00089 // Clone
00090 gnSequence* gnSequence::Clone() const
00091 {
00092         return new gnSequence( *this );
00093 }
00094 //IMPLEMENT ME!! Consider ambiguities!
00095 int gnSequence::compare(const string& str) const{
00096         STACK_TRACE_START
00097                 gnSeqI len = length();
00098                 gnSeqI seq_len = str.length();
00099                 gnSeqI comparelen = len < seq_len ? len : seq_len;
00100                 gnSeqI compared = 0;
00101                 while(comparelen > 0){
00102                         gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE;
00103                         string bases = ToString(compared, curlen);
00104                         string seq_bases = str.substr(compared, curlen);
00105                         if(comparator->LessThan(bases, seq_bases))
00106                                 return -1;
00107                         else if(comparator->LessThan(seq_bases, bases))
00108                                 return 1;
00109 
00110                         compared += curlen;
00111                         comparelen -= curlen;
00112                 }
00113                 if(len < seq_len)
00114                         return -1;
00115                 else if(len > seq_len)
00116                         return 1;
00117                 return 0;
00118         STACK_TRACE_END
00119 }
00120 
00121 int gnSequence::compare(const gnSequence& seq) const{
00122         STACK_TRACE_START
00123                 gnSeqI len = length();
00124                 gnSeqI seq_len = seq.length();
00125                 gnSeqI comparelen = len < seq_len ? len : seq_len;
00126                 gnSeqI compared = 0;
00127                 while(comparelen > 0){
00128                         gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE;
00129                         string bases = ToString(curlen, compared+1);
00130                         string seq_bases = seq.ToString(curlen, compared+1);
00131                         if(comparator->LessThan(bases, seq_bases))
00132                                 return -1;
00133                         else if(comparator->LessThan(seq_bases, bases))
00134                                 return 1;
00135 
00136                         compared+= curlen;
00137                         comparelen -= curlen;
00138                 }
00139                 if(len < seq_len)
00140                         return -1;
00141                 else if(len > seq_len)
00142                         return 1;
00143                 return 0;
00144         STACK_TRACE_END
00145 }
00146 
00147 void gnSequence::insert( const gnSeqI offset, const gnSeqC *bases, const gnSeqI len){
00148         STACK_TRACE_START
00149                 string str(bases, len);
00150                 gnStringSpec gpbs(str);
00151                 insert(offset, gpbs);
00152         STACK_TRACE_END
00153 }
00154 //Offset is the gene index, not a computer index, to insert before.
00155 //This is the default insert.  It inserts entire contigs
00156 void gnSequence::insert( const gnSeqI offset, const gnGenomeSpec& gpbs){
00157         STACK_TRACE_START
00158                 if(offset == 0)
00159                         Throw_gnEx(SeqIndexOutOfBounds());
00160                 if(offset == GNSEQI_END || spec->GetLength() < offset){ //simple append.
00161                         for(uint32 i=0; i < gpbs.GetSpecListLength(); i++)
00162                                 spec->AddSpec(gpbs.GetSpec(i)->Clone());
00163                         return;
00164                 }
00165                 //clone this sequence
00166                 gnGenomeSpec* tmpSpec = spec->Clone();
00167                 //crop off the end of this sequence past the insert point
00168                 //crop off the beginning of the cloned sequence up to the insert
00169                 gnSeqI real_offset = offset - 1;
00170                 gnSeqI endCrop = spec->GetLength() - real_offset;
00171                 spec->CropEnd(endCrop);
00172                 tmpSpec->CropStart(real_offset);
00173                 //append the inserted sequence and the end of this sequence.
00174                 insert(GNSEQI_END, gpbs);
00175                 insert(GNSEQI_END, *tmpSpec);
00176                 delete tmpSpec;
00177         STACK_TRACE_END
00178 }
00179 // Concatenation operators
00180 gnSequence const gnSequence::operator+(const gnSequence& seq) const{
00181         STACK_TRACE_START
00182                 gnSequence rval(*this);
00183                 rval.append(seq);
00184                 return rval;
00185         STACK_TRACE_END
00186 }
00187 // Subsequences and base deletion
00188 gnSequence gnSequence::subseq(const gnSeqI offset, const gnSeqI length) const{
00189         STACK_TRACE_START
00190                 if(offset == 0)
00191                         Throw_gnEx(SeqIndexOutOfBounds());
00192                 if(length == 0)
00193                         return gnSequence();
00194 
00195                 gnSequence tmpSeq(*spec->CloneRange(offset - 1, length));
00196                 return tmpSeq;
00197         STACK_TRACE_END
00198 }
00199 void gnSequence::erase( const gnSeqI offset, const gnSeqI len ){
00200         STACK_TRACE_START
00201                 if(offset == 0)
00202                         Throw_gnEx(SeqIndexOutOfBounds());
00203                 //range checking, etc.
00204                 gnSeqI current_length = length();
00205                 if(offset > current_length)
00206                         Throw_gnEx(SeqIndexOutOfBounds());
00207                 gnSeqI endBase = offset + len - 1 < current_length ? offset + len - 1 : current_length;
00208                 gnSeqI startBase = offset - 1;
00209 
00210                 gnGenomeSpec* tmpSpec = spec->CloneRange(endBase, GNSEQI_END);
00211                 uint32 lennard = tmpSpec->GetLength();
00212                 spec->CropEnd(current_length - startBase);
00213                 lennard = length();
00214                 insert(GNSEQI_END, *tmpSpec);
00215                 delete tmpSpec;
00216         STACK_TRACE_END
00217 }
00218 void gnSequence::splitContig(const gnSeqI splitI, const uint32 contigI){
00219         STACK_TRACE_START
00220                 gnSeqI splitBase = splitI;
00221                 gnSeqI current_length = length();
00222                 if(splitI == 0)
00223                         Throw_gnEx(SeqIndexOutOfBounds());
00224                 if(contigI == ALL_CONTIGS && splitI > current_length)
00225                                 Throw_gnEx(SeqIndexOutOfBounds());
00226                 else
00227                         localToGlobal(contigI, splitBase);
00228                 gnGenomeSpec* tmpSpec = spec->Clone();
00229                 tmpSpec->CropStart(splitBase);
00230                 spec->CropEnd(current_length - splitBase);
00231                 
00232                 insert(GNSEQI_END, *tmpSpec);
00233                 delete tmpSpec;
00234         STACK_TRACE_END
00235 }
00236 
00237 // IO operators
00238 istream& operator>>(istream& is, gnSequence& gps){      //read from source.
00239         string bases;
00240         is >> bases;
00241         gps.append(bases);
00242         return is;
00243 }
00244 ostream& operator<<(ostream& os, const gnSequence& gps){ //write to source.
00245         os << gps.ToString();
00246         return os;
00247 }
00248 string gnSequence::ToString( const gnSeqI len, const gnSeqI offset ) const
00249 {
00250         STACK_TRACE_START
00251                 string str;
00252                 ToString(str, len, offset);
00253                 return str;
00254         STACK_TRACE_END
00255 }
00256 boolean gnSequence::ToString( string& str, const gnSeqI len, const gnSeqI offset ) const
00257 {
00258         STACK_TRACE_START
00259                 gnSeqI real_offset = offset - 1;
00260                 gnSeqI m_length = length();
00261                 gnSeqI readSize = len > m_length - real_offset ? m_length - real_offset : len;
00262                 char *buf = new char[readSize+1];
00263                 boolean success = spec->SeqRead( real_offset, buf, readSize, ALL_CONTIGS);
00264                 buf[readSize] = '\0';
00265                 str = buf;
00266                 delete[] buf;
00267 
00268                 //now filter the string
00269                 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00270                 for(; iter != filter_list.end(); iter++)
00271                         (*iter)->Filter(str);
00272 
00273                 if( success )
00274                         return true;
00275                 return false;
00276         STACK_TRACE_END
00277 }
00278 boolean gnSequence::ToArray( gnSeqC* pSeqC, gnSeqI length, const gnSeqI offset ) const
00279 {
00280         STACK_TRACE_START
00281                 gnSeqI real_offset = offset - 1;
00282                 if(offset == GNSEQI_END)
00283                         return false;
00284                 gnSeqC* tmp = new gnSeqC[length];
00285                 boolean success = spec->SeqRead( real_offset, tmp, length, ALL_CONTIGS);
00286                 
00287                 //now filter the array
00288                 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00289                 for(; iter != filter_list.end(); iter++)
00290                         (*iter)->Filter(&tmp, length);
00291                 memcpy(pSeqC, tmp, length);
00292                 delete[] tmp;
00293                 
00294                 if( success )
00295                         return true;
00296 
00297                 return false;
00298         STACK_TRACE_END
00299 }
00300 gnSeqC gnSequence::GetSeqC( const gnSeqI offset ) const
00301 {
00302         STACK_TRACE_START
00303                 char block;
00304                 gnSeqI readLen = 1;
00305                 gnSeqI real_offset = offset - 1;
00306                 boolean success = spec->SeqRead( real_offset, &block, readLen, ALL_CONTIGS);
00307 
00308                 //now filter the char
00309                 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin();
00310                 for(; iter != filter_list.end(); iter++)
00311                         block = (*iter)->Filter(block);
00312 
00313                 if( success )
00314                         return block;
00315 
00316                 return GNSEQC_NULL;
00317         STACK_TRACE_END
00318 }
00319 gnSeqC gnSequence::operator[]( const gnSeqI offset ) const
00320 {
00321         STACK_TRACE_START
00322                 return GetSeqC( offset );
00323         STACK_TRACE_END
00324 }
00325 
00326 gnSeqI gnSequence::contigListSize() const{
00327         STACK_TRACE_START
00328                 return spec->GetSpecListLength();
00329         STACK_TRACE_END
00330 }
00331 gnSeqI gnSequence::contigListLength() const{
00332         STACK_TRACE_START
00333                 return spec->GetSpecListLength();
00334         STACK_TRACE_END
00335 }
00336 
00337 //find the contig associated with base
00338 uint32 gnSequence::contigIndexByBase( const gnSeqI baseI) const{
00339         STACK_TRACE_START
00340                 return spec->GetSpecIndexByBase(baseI-1);
00341         STACK_TRACE_END
00342 }
00343 gnSequence gnSequence::contig( const uint32 contigI) const{
00344         STACK_TRACE_START
00345                 if(contigI == ALL_CONTIGS)
00346                         return *this;
00347                 return gnSequence(*spec->GetSpec(contigI));
00348         STACK_TRACE_END
00349 }
00350 //returns a gnSequence pointer containing the specified contig.
00351 gnSequence gnSequence::contigByBase( const gnSeqI baseI) const{
00352         STACK_TRACE_START
00353                 return gnSequence(*spec->GetSpecByBase(baseI-1));
00354         STACK_TRACE_END
00355 }
00356 uint32 gnSequence::contigIndexByName( string& contigName) const{
00357         STACK_TRACE_START
00358                 return spec->GetSpecIndexByName(contigName);
00359         STACK_TRACE_END
00360 }
00361 
00362 gnSeqI gnSequence::contigStart( const uint32 contigI) const{
00363         STACK_TRACE_START
00364                 int64 leafI = contigI;
00365                 // add one for the stupid geneticists whose indices start at 1
00366                 return spec->GetSpecStartBase( leafI ) + 1;
00367         STACK_TRACE_END
00368 }
00369 
00370 gnSeqI gnSequence::contigLength( const uint32 contigI) const{
00371         STACK_TRACE_START
00372                 return spec->GetSpec( contigI )->GetLength();
00373         STACK_TRACE_END
00374 }
00375 
00376 string gnSequence::contigName( const uint32 contigI) const{
00377         STACK_TRACE_START
00378                 return spec->GetSpec(contigI)->GetName();
00379         STACK_TRACE_END
00380 }
00381 
00382 void gnSequence::globalToLocal(uint32& contigI, gnSeqI& baseI) const{
00383         STACK_TRACE_START
00384                 contigI = contigIndexByBase(baseI);
00385                 baseI -= contigStart(contigI);
00386         STACK_TRACE_END
00387 }
00388 
00389 void gnSequence::localToGlobal(const uint32 contigI, gnSeqI& baseI) const{
00390         STACK_TRACE_START
00391                 if(baseI > contigLength(contigI))
00392                         Throw_gnEx(SeqIndexOutOfBounds());
00393                 baseI += contigStart(contigI) - 1;
00394         STACK_TRACE_END
00395 }
00396 
00397 void gnSequence::globalToSource(uint32& contigI, gnSeqI& baseI) const{
00398         STACK_TRACE_START
00399                 baseI--;        //convert from geneticist coordinates
00400                 gnSeqI seq_contig = spec->GetSpecIndexByBase(baseI);
00401                 gnSeqI new_base = baseI - spec->GetSpecStartBase(seq_contig);
00402                 gnFragmentSpec *fragment_spec = spec->GetSpec(seq_contig);
00403                 seq_contig = fragment_spec->GetSpecIndexByBase(new_base);
00404                 new_base = new_base - fragment_spec->GetSpecStartBase(seq_contig);
00405                 gnContigSpec* contig_spec = fragment_spec->GetSpec(seq_contig);
00406                 contigI = contig_spec->GetSourceContigIndex();
00407                 gnSeqI contig_start = contig_spec->GetStart();
00408                 if(contig_spec->IsReverseComplement()){
00409                         gnSeqI source_len = contig_spec->GetSourceLength();
00410                         //it may seem counter intuitive, but we can add the source_len and mod by source_len
00411                         //to deal with circularity
00412         //              contig_start = (contig_start - contig_spec->GetLength() + source_len) % source_len;
00413                         //if we are reverse complement then the contig_start will be the ending base pair
00414                         //we want to subtract new_base from it. and we can add/mod by source_len to deal
00415                         //with circles.
00416                         baseI = (contig_start - new_base - 1 + source_len) % source_len;
00417                 }else
00418                         baseI =  contig_start + new_base + 1;
00419         STACK_TRACE_END
00420 }
00421 
00422 void gnSequence::localToSource(uint32& contigI, gnSeqI& baseI) const{
00423         STACK_TRACE_START
00424                 localToGlobal(contigI, baseI);
00425                 globalToSource(contigI, baseI);
00426         STACK_TRACE_END
00427 }
00428 
00429 gnSequence gnSequence::contigByName( string& contigName) const{
00430         STACK_TRACE_START
00431                 uint32 contigIndex = spec->GetSpecIndexByName(contigName);
00432                 return gnSequence(*spec->GetSpec(contigIndex));
00433         STACK_TRACE_END
00434 }
00435 
00436 void gnSequence::setContigName( const uint32 contigI, const string& contig_name) {
00437         STACK_TRACE_START
00438                 if(contigI == ALL_CONTIGS)
00439                         spec->SetName(contig_name);
00440                 else
00441                         spec->GetSpec(contigI)->SetName(contig_name);
00442         STACK_TRACE_END
00443 }
00444 
00445 void gnSequence::setReverseComplement( const boolean revComp, const uint32 contigI){
00446         STACK_TRACE_START
00447                 if(contigI == ALL_CONTIGS)
00448                         spec->SetReverseComplement(revComp);
00449                 else{
00450                         gnFragmentSpec* contig = spec->GetSpec(contigI);
00451                         contig->SetReverseComplement(revComp);
00452                 }
00453         STACK_TRACE_END
00454 }
00455 
00456 boolean gnSequence::isReverseComplement( const uint32 contigI ){
00457         STACK_TRACE_START
00458                 if(contigI == ALL_CONTIGS)
00459                         return spec->IsReverseComplement();
00460                 gnFragmentSpec* contig = spec->GetSpec(contigI);
00461                 return contig->IsReverseComplement();
00462         STACK_TRACE_END
00463 }
00464 
00465 uint32 gnSequence::getHeaderListLength(const uint32 contigI) const{
00466         STACK_TRACE_START
00467                 if(contigI == ALL_CONTIGS)
00468                         return spec->GetHeaderListLength();
00469                 else{
00470                         return spec->GetSpec(contigI)->GetHeaderListLength();
00471                 }
00472         STACK_TRACE_END
00473 }
00474 
00475 gnBaseHeader* gnSequence::getHeader(const uint32 contigI, const uint32 headerI) const{
00476         STACK_TRACE_START
00477                 if(contigI == ALL_CONTIGS)
00478                         return spec->GetHeader(headerI);
00479                 else{
00480                         return spec->GetSpec(contigI)->GetHeader(headerI);
00481                 }
00482         STACK_TRACE_END
00483 }
00484 
00485 void gnSequence::addHeader(const uint32 contigI, gnBaseHeader* header, const uint32 headerI){
00486         STACK_TRACE_START
00487                 if(contigI == ALL_CONTIGS)
00488                         spec->AddHeader(header, headerI);
00489                 else{
00490                         spec->GetSpec(contigI)->AddHeader(header, headerI);
00491                 }
00492         STACK_TRACE_END
00493 }
00494 
00495 void gnSequence::removeHeader(const uint32 contigI, const uint32 headerI){
00496         STACK_TRACE_START
00497                 if(contigI == ALL_CONTIGS)
00498                         spec->RemoveHeader(headerI);
00499                 else
00500                         spec->GetSpec(contigI)->RemoveHeader(headerI);
00501         STACK_TRACE_END
00502 }
00503 
00504 void gnSequence::merge(const gnSeqI startBase, const gnSeqI endBase){
00505         STACK_TRACE_START
00506         STACK_TRACE_END
00507 }
00508 
00509 void gnSequence::mergeContigs(const uint32 startC, const uint32 endC){
00510         STACK_TRACE_START
00511                 spec->MergeFragments(startC, endC);
00512         STACK_TRACE_END
00513 }
00514 
00515 bool gnSequence::LoadSource(const string sourcename){
00516         STACK_TRACE_START
00517                 gnSourceFactory* m_sourceFactory = gnSourceFactory::GetSourceFactory();
00518                 gnBaseSource *m_pSource = m_sourceFactory->AddSource(sourcename, true);
00519                 if (m_pSource!=NULL)
00520                 {
00521                         if(spec != NULL)
00522                                 delete spec;
00523                         spec = m_pSource->GetSpec();
00524                         return true;
00525                 }
00526                 return false;
00527         STACK_TRACE_END
00528 }
00529 
00530 gnSeqI gnSequence::find(const gnSequence& search, const gnSeqI offset)const{
00531         STACK_TRACE_START
00532                 //this is really adHoc... should probably be switched
00533                 string searchIn=ToString();
00534                 string find= search.ToString();
00535                 string::size_type pos = searchIn.find(find, offset);
00536                 if (pos == string::npos)
00537                         return GNSEQI_ERROR;
00538                 else{
00539                         return pos++; 
00540                 }
00541         STACK_TRACE_END
00542 }

Generated at Fri Nov 30 15:36:51 2001 for libGenome by doxygen1.2.8.1 written by Dimitri van Heesch, © 1997-2001