Skip to content

Commit

Permalink
Implemented --outBAMsortingBinsN option to control the number of sort…
Browse files Browse the repository at this point in the history
…ing bins. Icnreasing this number reduces the amount of RAM required for sorting.
  • Loading branch information
alexdobin committed Jan 9, 2018
1 parent a57c5f8 commit 91d34b7
Show file tree
Hide file tree
Showing 8 changed files with 1,295 additions and 1,282 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
* Implemented --outBAMsortingBinsN option to control the number of sorting bins. Icnreasing this number reduces the amount of RAM required for sorting.
* Implemented --waspOutputMode option for filtering allele specific alignments. This is re-implemenation of the original WASP algorithm by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061–1063 (2015), https://www.nature.com/articles/nmeth.3582 .

* Fixed a bug in chimeric detection code which sometimes led to uninitialized memory access. The chimeric output may change for a very small number of reads.
Expand Down
Binary file modified bin/Linux_x86_64/STAR
Binary file not shown.
12 changes: 6 additions & 6 deletions source/BAMbinSortUnmapped.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ void BAMbinSortUnmapped(uint32 iBin, uint nThreads, string dirBAMsort, BGZF *bgz
vector<string> bamInFile;
std::map <uint,uint> startPos;

for (uint it=0; it<nThreads; it++) {//initialize
for (uint it=0; it<nThreads; it++) {//files from all threads, and BySJout
bamInFile.push_back(dirBAMsort+to_string(it)+"/"+to_string((uint) iBin));
bamInFile.push_back(dirBAMsort+to_string(it)+"/"+to_string((uint) iBin)+".BySJout");
};
vector<uint32> bamSize(bamInFile.size(),0);
vector<uint32> bamSize(bamInFile.size(),0);//record sizes

//allocate arrays
char **bamIn=new char* [bamInFile.size()];
Expand All @@ -25,13 +25,13 @@ void BAMbinSortUnmapped(uint32 iBin, uint nThreads, string dirBAMsort, BGZF *bgz
for (uint it=0; it<bamInFile.size(); it++) {//initialize
bamIn[it] = new char [BAMoutput_oneAlignMaxBytes];

bamInStream[it].open(bamInFile.at(it).c_str());
bamInStream[it].open(bamInFile.at(it).c_str());//opean all files

bamInStream[it].read(bamIn[it],sizeof(int32));//read record size
bamInStream[it].read(bamIn[it],sizeof(int32));//read BAM record size
if (bamInStream[it].good()) {
bamSize[it]=((*(uint32*)bamIn[it])+sizeof(int32));
bamSize[it]=((*(uint32*)bamIn[it])+sizeof(int32));//true record size +=4 (4 bytes for uint-iRead)
bamInStream[it].read(bamIn[it]+sizeof(int32),bamSize.at(it)-sizeof(int32)+sizeof(uint));//read the rest of the record, including last uint = iRead
startPos[*(uint*)(bamIn[it]+bamSize.at(it))]=it;
startPos[*(uint*)(bamIn[it]+bamSize.at(it))]=it;//startPos[iRead]=it : record the order of the files to output
} else {//nothing to do here, file is empty, do not record it
};
};
Expand Down
3 changes: 2 additions & 1 deletion source/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ Parameters::Parameters() {//initalize parameters info
parArray.push_back(new ParameterInfoScalar <string> (-1, -1, "outSAMheaderCommentFile", &outSAMheaderCommentFile));
parArray.push_back(new ParameterInfoScalar <int> (-1, -1, "outBAMcompression", &outBAMcompression));
parArray.push_back(new ParameterInfoScalar <int> (-1, -1, "outBAMsortingThreadN", &outBAMsortingThreadN));
parArray.push_back(new ParameterInfoScalar <uint32> (-1, -1, "outBAMsortingBinsN", &outBAMsortingBinsN));
parArray.push_back(new ParameterInfoVector <string> (-1, -1, "outSAMfilter", &outSAMfilter.mode));
parArray.push_back(new ParameterInfoScalar <uint> (-1, -1, "outSAMmultNmax", &outSAMmultNmax));
parArray.push_back(new ParameterInfoScalar <uint> (-1, -1, "outSAMattrIHstart", &outSAMattrIHstart));
Expand Down Expand Up @@ -601,7 +602,7 @@ void Parameters::inputParameters (int argInN, char* argIn[]) {//input parameters
} else {
outBAMsortingThreadNactual=outBAMsortingThreadN;
};
outBAMcoordNbins=max(outBAMsortingThreadNactual*3,10);
outBAMcoordNbins=max((uint32)outBAMsortingThreadNactual*3,outBAMsortingBinsN);
outBAMsortingBinStart= new uint64 [outBAMcoordNbins];
outBAMsortingBinStart[0]=1;//this initial value means that the bin sizes have not been determined yet

Expand Down
1 change: 1 addition & 0 deletions source/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ class Parameters {
vector <string> outSAMtype;
bool outBAMunsorted, outBAMcoord, outSAMbool;
uint32 outBAMcoordNbins;
uint32 outBAMsortingBinsN;//user-defined number of bins for sorting
string outBAMsortTmpDir;

// string bamRemoveDuplicatesType;
Expand Down
2 changes: 1 addition & 1 deletion source/STAR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ int main(int argInN, char* argIn[]) {

//check max size needed for sorting
uint maxMem=0;
for (uint32 ibin=0; ibin<nBins-1; ibin++) {//check akk bins
for (uint32 ibin=0; ibin<nBins-1; ibin++) {//check all bins
uint binS=0;
for (int it=0; it<P.runThreadN; it++) {//collect sizes from threads
binS += RAchunk[it]->chunkOutBAMcoord->binTotalBytes[ibin]+24*RAchunk[it]->chunkOutBAMcoord->binTotalN[ibin];
Expand Down
3 changes: 3 additions & 0 deletions source/parametersDefault
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,9 @@ outBAMcompression 1
outBAMsortingThreadN 0
int: >=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).

outBAMsortingBinsN 50
int: >0: number of genome bins fo coordinate-sorting

### BAM processing

bamRemoveDuplicatesType -
Expand Down
Loading

0 comments on commit 91d34b7

Please sign in to comment.