|
gnSequence.cppGo to the documentation of this file.00001 00002 // File: gnSequence.h 00003 // Purpose: Source Seq, where sequence is stored in source file 00004 // Description: implements the baseSeq interface for dna source files. 00005 // Changes: 00006 // Version: libGenome 0.1.0 00007 // Author: Aaron Darling 00008 // Last Edited: April 15, 2001, 11:13:00pm 00009 // Modified by: 00010 // Copyright: (c) Aaron Darling 00011 // Licenses: Proprietary 00013 #include "gn/gnSequence.h" 00014 #include "gn/gnSourceFactory.h" 00015 #include "gn/gnBaseSource.h" 00016 #include "gn/gnGenomeSpec.h" 00017 #include "gn/gnFragmentSpec.h" 00018 #include "gn/gnSourceSpec.h" 00019 #include "gn/gnStringSpec.h" 00020 #include "gn/gnStringHeader.h" 00021 00022 // Constructor 00023 gnSequence::gnSequence( ) 00024 { 00025 spec = new gnGenomeSpec(); 00026 comparator = gnCompare::DNASeqCompare(); 00027 } 00028 gnSequence::gnSequence( const gnSeqC* seq){ 00029 spec = new gnGenomeSpec(); 00030 if (seq[0] != 0){ 00031 gnFragmentSpec *fragSpec = new gnFragmentSpec(); 00032 spec->AddSpec(fragSpec); 00033 fragSpec->AddSpec(new gnStringSpec(seq)); 00034 } 00035 comparator = gnCompare::DNASeqCompare(); 00036 } 00037 gnSequence::gnSequence( const string& str){ 00038 spec = new gnGenomeSpec(); 00039 if (str.length() != 0){ 00040 gnFragmentSpec *fragSpec = new gnFragmentSpec(); 00041 spec->AddSpec(fragSpec); 00042 fragSpec->AddSpec(new gnStringSpec(str)); 00043 } 00044 comparator = gnCompare::DNASeqCompare(); 00045 } 00046 gnSequence::gnSequence( const gnGenomeSpec& gngs){ 00047 spec = gngs.Clone(); 00048 comparator = gnCompare::DNASeqCompare(); 00049 } 00050 gnSequence::gnSequence( const gnFragmentSpec& gnfs){ 00051 spec = new gnGenomeSpec(); 00052 spec->AddSpec(gnfs.Clone()); 00053 comparator = gnCompare::DNASeqCompare(); 00054 } 00055 gnSequence::gnSequence( const gnContigSpec& gcs){ 00056 spec = new gnGenomeSpec(); 00057 gnFragmentSpec *fragSpec = new gnFragmentSpec(); 00058 fragSpec->AddSpec(gcs.Clone()); 00059 comparator = gnCompare::DNASeqCompare(); 00060 } 00061 gnSequence::gnSequence( gnSeqC *bases, gnSeqI length){ 00062 spec = new gnGenomeSpec(); 00063 if (length > 0){ 00064 gnFragmentSpec *fragSpec = new gnFragmentSpec(); 00065 spec->AddSpec(fragSpec); 00066 fragSpec->AddSpec(new gnStringSpec(string(bases, length))); 00067 } 00068 comparator = gnCompare::DNASeqCompare(); 00069 } 00070 gnSequence::gnSequence( const gnSequence& seq ) 00071 { 00072 spec = seq.spec->Clone(); 00073 filter_list = seq.filter_list; 00074 comparator = seq.comparator; 00075 } 00076 // Destructor 00077 gnSequence::~gnSequence() 00078 { 00079 if (spec != NULL) delete spec; 00080 } 00081 00082 gnSequence& gnSequence::operator=( const gnSequence& seq){ 00083 spec = seq.spec->Clone(); 00084 filter_list = seq.filter_list; 00085 comparator = seq.comparator; 00086 return *this; 00087 } 00088 00089 // Clone 00090 gnSequence* gnSequence::Clone() const 00091 { 00092 return new gnSequence( *this ); 00093 } 00094 //IMPLEMENT ME!! Consider ambiguities! 00095 int gnSequence::compare(const string& str) const{ 00096 STACK_TRACE_START 00097 gnSeqI len = length(); 00098 gnSeqI seq_len = str.length(); 00099 gnSeqI comparelen = len < seq_len ? len : seq_len; 00100 gnSeqI compared = 0; 00101 while(comparelen > 0){ 00102 gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE; 00103 string bases = ToString(compared, curlen); 00104 string seq_bases = str.substr(compared, curlen); 00105 if(comparator->LessThan(bases, seq_bases)) 00106 return -1; 00107 else if(comparator->LessThan(seq_bases, bases)) 00108 return 1; 00109 00110 compared += curlen; 00111 comparelen -= curlen; 00112 } 00113 if(len < seq_len) 00114 return -1; 00115 else if(len > seq_len) 00116 return 1; 00117 return 0; 00118 STACK_TRACE_END 00119 } 00120 00121 int gnSequence::compare(const gnSequence& seq) const{ 00122 STACK_TRACE_START 00123 gnSeqI len = length(); 00124 gnSeqI seq_len = seq.length(); 00125 gnSeqI comparelen = len < seq_len ? len : seq_len; 00126 gnSeqI compared = 0; 00127 while(comparelen > 0){ 00128 gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE; 00129 string bases = ToString(curlen, compared+1); 00130 string seq_bases = seq.ToString(curlen, compared+1); 00131 if(comparator->LessThan(bases, seq_bases)) 00132 return -1; 00133 else if(comparator->LessThan(seq_bases, bases)) 00134 return 1; 00135 00136 compared+= curlen; 00137 comparelen -= curlen; 00138 } 00139 if(len < seq_len) 00140 return -1; 00141 else if(len > seq_len) 00142 return 1; 00143 return 0; 00144 STACK_TRACE_END 00145 } 00146 00147 void gnSequence::insert( const gnSeqI offset, const gnSeqC *bases, const gnSeqI len){ 00148 STACK_TRACE_START 00149 string str(bases, len); 00150 gnStringSpec gpbs(str); 00151 insert(offset, gpbs); 00152 STACK_TRACE_END 00153 } 00154 //Offset is the gene index, not a computer index, to insert before. 00155 //This is the default insert. It inserts entire contigs 00156 void gnSequence::insert( const gnSeqI offset, const gnGenomeSpec& gpbs){ 00157 STACK_TRACE_START 00158 if(offset == 0) 00159 Throw_gnEx(SeqIndexOutOfBounds()); 00160 if(offset == GNSEQI_END || spec->GetLength() < offset){ //simple append. 00161 for(uint32 i=0; i < gpbs.GetSpecListLength(); i++) 00162 spec->AddSpec(gpbs.GetSpec(i)->Clone()); 00163 return; 00164 } 00165 //clone this sequence 00166 gnGenomeSpec* tmpSpec = spec->Clone(); 00167 //crop off the end of this sequence past the insert point 00168 //crop off the beginning of the cloned sequence up to the insert 00169 gnSeqI real_offset = offset - 1; 00170 gnSeqI endCrop = spec->GetLength() - real_offset; 00171 spec->CropEnd(endCrop); 00172 tmpSpec->CropStart(real_offset); 00173 //append the inserted sequence and the end of this sequence. 00174 insert(GNSEQI_END, gpbs); 00175 insert(GNSEQI_END, *tmpSpec); 00176 delete tmpSpec; 00177 STACK_TRACE_END 00178 } 00179 // Concatenation operators 00180 gnSequence const gnSequence::operator+(const gnSequence& seq) const{ 00181 STACK_TRACE_START 00182 gnSequence rval(*this); 00183 rval.append(seq); 00184 return rval; 00185 STACK_TRACE_END 00186 } 00187 // Subsequences and base deletion 00188 gnSequence gnSequence::subseq(const gnSeqI offset, const gnSeqI length) const{ 00189 STACK_TRACE_START 00190 if(offset == 0) 00191 Throw_gnEx(SeqIndexOutOfBounds()); 00192 if(length == 0) 00193 return gnSequence(); 00194 00195 gnSequence tmpSeq(*spec->CloneRange(offset - 1, length)); 00196 return tmpSeq; 00197 STACK_TRACE_END 00198 } 00199 void gnSequence::erase( const gnSeqI offset, const gnSeqI len ){ 00200 STACK_TRACE_START 00201 if(offset == 0) 00202 Throw_gnEx(SeqIndexOutOfBounds()); 00203 //range checking, etc. 00204 gnSeqI current_length = length(); 00205 if(offset > current_length) 00206 Throw_gnEx(SeqIndexOutOfBounds()); 00207 gnSeqI endBase = offset + len - 1 < current_length ? offset + len - 1 : current_length; 00208 gnSeqI startBase = offset - 1; 00209 00210 gnGenomeSpec* tmpSpec = spec->CloneRange(endBase, GNSEQI_END); 00211 uint32 lennard = tmpSpec->GetLength(); 00212 spec->CropEnd(current_length - startBase); 00213 lennard = length(); 00214 insert(GNSEQI_END, *tmpSpec); 00215 delete tmpSpec; 00216 STACK_TRACE_END 00217 } 00218 void gnSequence::splitContig(const gnSeqI splitI, const uint32 contigI){ 00219 STACK_TRACE_START 00220 gnSeqI splitBase = splitI; 00221 gnSeqI current_length = length(); 00222 if(splitI == 0) 00223 Throw_gnEx(SeqIndexOutOfBounds()); 00224 if(contigI == ALL_CONTIGS && splitI > current_length) 00225 Throw_gnEx(SeqIndexOutOfBounds()); 00226 else 00227 localToGlobal(contigI, splitBase); 00228 gnGenomeSpec* tmpSpec = spec->Clone(); 00229 tmpSpec->CropStart(splitBase); 00230 spec->CropEnd(current_length - splitBase); 00231 00232 insert(GNSEQI_END, *tmpSpec); 00233 delete tmpSpec; 00234 STACK_TRACE_END 00235 } 00236 00237 // IO operators 00238 istream& operator>>(istream& is, gnSequence& gps){ //read from source. 00239 string bases; 00240 is >> bases; 00241 gps.append(bases); 00242 return is; 00243 } 00244 ostream& operator<<(ostream& os, const gnSequence& gps){ //write to source. 00245 os << gps.ToString(); 00246 return os; 00247 } 00248 string gnSequence::ToString( const gnSeqI len, const gnSeqI offset ) const 00249 { 00250 STACK_TRACE_START 00251 string str; 00252 ToString(str, len, offset); 00253 return str; 00254 STACK_TRACE_END 00255 } 00256 boolean gnSequence::ToString( string& str, const gnSeqI len, const gnSeqI offset ) const 00257 { 00258 STACK_TRACE_START 00259 gnSeqI real_offset = offset - 1; 00260 gnSeqI m_length = length(); 00261 gnSeqI readSize = len > m_length - real_offset ? m_length - real_offset : len; 00262 char *buf = new char[readSize+1]; 00263 boolean success = spec->SeqRead( real_offset, buf, readSize, ALL_CONTIGS); 00264 buf[readSize] = '\0'; 00265 str = buf; 00266 delete[] buf; 00267 00268 //now filter the string 00269 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin(); 00270 for(; iter != filter_list.end(); iter++) 00271 (*iter)->Filter(str); 00272 00273 if( success ) 00274 return true; 00275 return false; 00276 STACK_TRACE_END 00277 } 00278 boolean gnSequence::ToArray( gnSeqC* pSeqC, gnSeqI length, const gnSeqI offset ) const 00279 { 00280 STACK_TRACE_START 00281 gnSeqI real_offset = offset - 1; 00282 if(offset == GNSEQI_END) 00283 return false; 00284 gnSeqC* tmp = new gnSeqC[length]; 00285 boolean success = spec->SeqRead( real_offset, tmp, length, ALL_CONTIGS); 00286 00287 //now filter the array 00288 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin(); 00289 for(; iter != filter_list.end(); iter++) 00290 (*iter)->Filter(&tmp, length); 00291 memcpy(pSeqC, tmp, length); 00292 delete[] tmp; 00293 00294 if( success ) 00295 return true; 00296 00297 return false; 00298 STACK_TRACE_END 00299 } 00300 gnSeqC gnSequence::GetSeqC( const gnSeqI offset ) const 00301 { 00302 STACK_TRACE_START 00303 char block; 00304 gnSeqI readLen = 1; 00305 gnSeqI real_offset = offset - 1; 00306 boolean success = spec->SeqRead( real_offset, &block, readLen, ALL_CONTIGS); 00307 00308 //now filter the char 00309 list<const gnBaseFilter*>::const_iterator iter = filter_list.begin(); 00310 for(; iter != filter_list.end(); iter++) 00311 block = (*iter)->Filter(block); 00312 00313 if( success ) 00314 return block; 00315 00316 return GNSEQC_NULL; 00317 STACK_TRACE_END 00318 } 00319 gnSeqC gnSequence::operator[]( const gnSeqI offset ) const 00320 { 00321 STACK_TRACE_START 00322 return GetSeqC( offset ); 00323 STACK_TRACE_END 00324 } 00325 00326 gnSeqI gnSequence::contigListSize() const{ 00327 STACK_TRACE_START 00328 return spec->GetSpecListLength(); 00329 STACK_TRACE_END 00330 } 00331 gnSeqI gnSequence::contigListLength() const{ 00332 STACK_TRACE_START 00333 return spec->GetSpecListLength(); 00334 STACK_TRACE_END 00335 } 00336 00337 //find the contig associated with base 00338 uint32 gnSequence::contigIndexByBase( const gnSeqI baseI) const{ 00339 STACK_TRACE_START 00340 return spec->GetSpecIndexByBase(baseI-1); 00341 STACK_TRACE_END 00342 } 00343 gnSequence gnSequence::contig( const uint32 contigI) const{ 00344 STACK_TRACE_START 00345 if(contigI == ALL_CONTIGS) 00346 return *this; 00347 return gnSequence(*spec->GetSpec(contigI)); 00348 STACK_TRACE_END 00349 } 00350 //returns a gnSequence pointer containing the specified contig. 00351 gnSequence gnSequence::contigByBase( const gnSeqI baseI) const{ 00352 STACK_TRACE_START 00353 return gnSequence(*spec->GetSpecByBase(baseI-1)); 00354 STACK_TRACE_END 00355 } 00356 uint32 gnSequence::contigIndexByName( string& contigName) const{ 00357 STACK_TRACE_START 00358 return spec->GetSpecIndexByName(contigName); 00359 STACK_TRACE_END 00360 } 00361 00362 gnSeqI gnSequence::contigStart( const uint32 contigI) const{ 00363 STACK_TRACE_START 00364 int64 leafI = contigI; 00365 // add one for the stupid geneticists whose indices start at 1 00366 return spec->GetSpecStartBase( leafI ) + 1; 00367 STACK_TRACE_END 00368 } 00369 00370 gnSeqI gnSequence::contigLength( const uint32 contigI) const{ 00371 STACK_TRACE_START 00372 return spec->GetSpec( contigI )->GetLength(); 00373 STACK_TRACE_END 00374 } 00375 00376 string gnSequence::contigName( const uint32 contigI) const{ 00377 STACK_TRACE_START 00378 return spec->GetSpec(contigI)->GetName(); 00379 STACK_TRACE_END 00380 } 00381 00382 void gnSequence::globalToLocal(uint32& contigI, gnSeqI& baseI) const{ 00383 STACK_TRACE_START 00384 contigI = contigIndexByBase(baseI); 00385 baseI -= contigStart(contigI); 00386 STACK_TRACE_END 00387 } 00388 00389 void gnSequence::localToGlobal(const uint32 contigI, gnSeqI& baseI) const{ 00390 STACK_TRACE_START 00391 if(baseI > contigLength(contigI)) 00392 Throw_gnEx(SeqIndexOutOfBounds()); 00393 baseI += contigStart(contigI) - 1; 00394 STACK_TRACE_END 00395 } 00396 00397 void gnSequence::globalToSource(uint32& contigI, gnSeqI& baseI) const{ 00398 STACK_TRACE_START 00399 baseI--; //convert from geneticist coordinates 00400 gnSeqI seq_contig = spec->GetSpecIndexByBase(baseI); 00401 gnSeqI new_base = baseI - spec->GetSpecStartBase(seq_contig); 00402 gnFragmentSpec *fragment_spec = spec->GetSpec(seq_contig); 00403 seq_contig = fragment_spec->GetSpecIndexByBase(new_base); 00404 new_base = new_base - fragment_spec->GetSpecStartBase(seq_contig); 00405 gnContigSpec* contig_spec = fragment_spec->GetSpec(seq_contig); 00406 contigI = contig_spec->GetSourceContigIndex(); 00407 gnSeqI contig_start = contig_spec->GetStart(); 00408 if(contig_spec->IsReverseComplement()){ 00409 gnSeqI source_len = contig_spec->GetSourceLength(); 00410 //it may seem counter intuitive, but we can add the source_len and mod by source_len 00411 //to deal with circularity 00412 // contig_start = (contig_start - contig_spec->GetLength() + source_len) % source_len; 00413 //if we are reverse complement then the contig_start will be the ending base pair 00414 //we want to subtract new_base from it. and we can add/mod by source_len to deal 00415 //with circles. 00416 baseI = (contig_start - new_base - 1 + source_len) % source_len; 00417 }else 00418 baseI = contig_start + new_base + 1; 00419 STACK_TRACE_END 00420 } 00421 00422 void gnSequence::localToSource(uint32& contigI, gnSeqI& baseI) const{ 00423 STACK_TRACE_START 00424 localToGlobal(contigI, baseI); 00425 globalToSource(contigI, baseI); 00426 STACK_TRACE_END 00427 } 00428 00429 gnSequence gnSequence::contigByName( string& contigName) const{ 00430 STACK_TRACE_START 00431 uint32 contigIndex = spec->GetSpecIndexByName(contigName); 00432 return gnSequence(*spec->GetSpec(contigIndex)); 00433 STACK_TRACE_END 00434 } 00435 00436 void gnSequence::setContigName( const uint32 contigI, const string& contig_name) { 00437 STACK_TRACE_START 00438 if(contigI == ALL_CONTIGS) 00439 spec->SetName(contig_name); 00440 else 00441 spec->GetSpec(contigI)->SetName(contig_name); 00442 STACK_TRACE_END 00443 } 00444 00445 void gnSequence::setReverseComplement( const boolean revComp, const uint32 contigI){ 00446 STACK_TRACE_START 00447 if(contigI == ALL_CONTIGS) 00448 spec->SetReverseComplement(revComp); 00449 else{ 00450 gnFragmentSpec* contig = spec->GetSpec(contigI); 00451 contig->SetReverseComplement(revComp); 00452 } 00453 STACK_TRACE_END 00454 } 00455 00456 boolean gnSequence::isReverseComplement( const uint32 contigI ){ 00457 STACK_TRACE_START 00458 if(contigI == ALL_CONTIGS) 00459 return spec->IsReverseComplement(); 00460 gnFragmentSpec* contig = spec->GetSpec(contigI); 00461 return contig->IsReverseComplement(); 00462 STACK_TRACE_END 00463 } 00464 00465 uint32 gnSequence::getHeaderListLength(const uint32 contigI) const{ 00466 STACK_TRACE_START 00467 if(contigI == ALL_CONTIGS) 00468 return spec->GetHeaderListLength(); 00469 else{ 00470 return spec->GetSpec(contigI)->GetHeaderListLength(); 00471 } 00472 STACK_TRACE_END 00473 } 00474 00475 gnBaseHeader* gnSequence::getHeader(const uint32 contigI, const uint32 headerI) const{ 00476 STACK_TRACE_START 00477 if(contigI == ALL_CONTIGS) 00478 return spec->GetHeader(headerI); 00479 else{ 00480 return spec->GetSpec(contigI)->GetHeader(headerI); 00481 } 00482 STACK_TRACE_END 00483 } 00484 00485 void gnSequence::addHeader(const uint32 contigI, gnBaseHeader* header, const uint32 headerI){ 00486 STACK_TRACE_START 00487 if(contigI == ALL_CONTIGS) 00488 spec->AddHeader(header, headerI); 00489 else{ 00490 spec->GetSpec(contigI)->AddHeader(header, headerI); 00491 } 00492 STACK_TRACE_END 00493 } 00494 00495 void gnSequence::removeHeader(const uint32 contigI, const uint32 headerI){ 00496 STACK_TRACE_START 00497 if(contigI == ALL_CONTIGS) 00498 spec->RemoveHeader(headerI); 00499 else 00500 spec->GetSpec(contigI)->RemoveHeader(headerI); 00501 STACK_TRACE_END 00502 } 00503 00504 void gnSequence::merge(const gnSeqI startBase, const gnSeqI endBase){ 00505 STACK_TRACE_START 00506 STACK_TRACE_END 00507 } 00508 00509 void gnSequence::mergeContigs(const uint32 startC, const uint32 endC){ 00510 STACK_TRACE_START 00511 spec->MergeFragments(startC, endC); 00512 STACK_TRACE_END 00513 } 00514 00515 bool gnSequence::LoadSource(const string sourcename){ 00516 STACK_TRACE_START 00517 gnSourceFactory* m_sourceFactory = gnSourceFactory::GetSourceFactory(); 00518 gnBaseSource *m_pSource = m_sourceFactory->AddSource(sourcename, true); 00519 if (m_pSource!=NULL) 00520 { 00521 if(spec != NULL) 00522 delete spec; 00523 spec = m_pSource->GetSpec(); 00524 return true; 00525 } 00526 return false; 00527 STACK_TRACE_END 00528 } 00529 00530 gnSeqI gnSequence::find(const gnSequence& search, const gnSeqI offset)const{ 00531 STACK_TRACE_START 00532 //this is really adHoc... should probably be switched 00533 string searchIn=ToString(); 00534 string find= search.ToString(); 00535 string::size_type pos = searchIn.find(find, offset); 00536 if (pos == string::npos) 00537 return GNSEQI_ERROR; 00538 else{ 00539 return pos++; 00540 } 00541 STACK_TRACE_END 00542 } Generated at Fri Nov 30 15:36:51 2001 for libGenome by 1.2.8.1 written by Dimitri van Heesch, © 1997-2001 |