Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 86 additions & 16 deletions src/fastqreader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,22 +222,29 @@ void FastqReader::getLine(string* line){
int start = mBufUsedLen;
int end = start;

while(end < mBufDataLen) {
if(mFastqBuf[end] != '\r' && mFastqBuf[end] != '\n')
end++;
else
break;
{
const char* nl = (const char*)memchr(mFastqBuf + start, '\n', mBufDataLen - start);
if(nl) {
end = nl - mFastqBuf;
// handle \r before \n
if(end > start && mFastqBuf[end - 1] == '\r')
end--;
} else {
// no \n found; scan for lone \r
const char* cr = (const char*)memchr(mFastqBuf + start, '\r', mBufDataLen - start);
end = cr ? (cr - mFastqBuf) : mBufDataLen;
}
}

// this line well contained in this buf, or this is the last buf
if(end < mBufDataLen || bufferFinished()) {
int len = end - start;
line->assign(mFastqBuf+start, len);

// skip \n or \r
// skip \r and/or \n
end++;
// handle \r\n
if(end < mBufDataLen-1 && mFastqBuf[end-1]=='\r' && mFastqBuf[end] == '\n')
if(end < mBufDataLen && mFastqBuf[end-1]=='\r' && mFastqBuf[end] == '\n')
end++;

mBufUsedLen = end;
Expand All @@ -258,11 +265,18 @@ void FastqReader::getLine(string* line){
start++;
end = start;
}
while(end < mBufDataLen) {
if(mFastqBuf[end] != '\r' && mFastqBuf[end] != '\n')
end++;
else
break;
{
const char* nl = (const char*)memchr(mFastqBuf + end, '\n', mBufDataLen - end);
if(nl) {
end = nl - mFastqBuf;
// handle \r before \n
if(end > start && mFastqBuf[end - 1] == '\r')
end--;
} else {
// no \n found; scan for lone \r
const char* cr = (const char*)memchr(mFastqBuf + start, '\r', mBufDataLen - start);
end = cr ? (cr - mFastqBuf) : mBufDataLen;
}
}
// this line well contained in this buf
if(end < mBufDataLen || bufferFinished()) {
Expand All @@ -285,6 +299,56 @@ void FastqReader::getLine(string* line){
return;
}

void FastqReader::skipLine(){
// Advance past the next newline without storing any content.
// Used to skip the '+' strand line in FASTQ records.
while(true) {
if(mBufUsedLen >= mBufDataLen) {
if(bufferFinished()) return;
readToBuf();
}
const char* nl = (const char*)memchr(mFastqBuf + mBufUsedLen, '\n', mBufDataLen - mBufUsedLen);
if(nl) {
int end = nl - mFastqBuf + 1; // skip past \n
mBufUsedLen = end;
return;
}
// no newline in this buffer, consume it all and refill
mBufUsedLen = mBufDataLen;
}
}

void FastqReader::readExact(string* str, int len){
// Read exactly len bytes from the buffer, then skip trailing \n or \r\n.
// Used for quality line where length == sequence length.
str->resize(len);
int copied = 0;
while(copied < len) {
if(mBufUsedLen >= mBufDataLen) {
if(bufferFinished()) {
str->resize(copied);
return;
}
readToBuf();
}
int avail = mBufDataLen - mBufUsedLen;
int need = len - copied;
int take = (avail < need) ? avail : need;
memcpy(&(*str)[copied], mFastqBuf + mBufUsedLen, take);
mBufUsedLen += take;
copied += take;
}
// skip trailing \r\n or \n
if(mBufUsedLen < mBufDataLen) {
if(mFastqBuf[mBufUsedLen] == '\r') mBufUsedLen++;
if(mBufUsedLen < mBufDataLen && mFastqBuf[mBufUsedLen] == '\n') mBufUsedLen++;
} else if(!bufferFinished()) {
readToBuf();
if(mBufDataLen > 0 && mFastqBuf[0] == '\r') mBufUsedLen++;
if(mBufUsedLen < mBufDataLen && mFastqBuf[mBufUsedLen] == '\n') mBufUsedLen++;
}
}

Read* FastqReader::read(){
if(mBufUsedLen >= mBufDataLen && bufferFinished()) {
return NULL;
Expand Down Expand Up @@ -320,15 +384,21 @@ Read* FastqReader::read(){
return NULL;

getLine(sequence);
getLine(strand);
getLine(quality);

if (strand->empty() || (*strand)[0]!='+') {
// validate '+' line: peek at first char, then skip without storing
if(mBufUsedLen >= mBufDataLen && !bufferFinished())
readToBuf();
if(mBufUsedLen >= mBufDataLen || mFastqBuf[mBufUsedLen] != '+') {
cerr << *name << endl;
cerr << "Expected '+', got " << *strand << endl;
cerr << "Expected '+' line in FASTQ record" << endl;
cerr << "Your FASTQ may be invalid, please check the tail of your FASTQ file" << endl;
return NULL;
}
strand->assign("+");
skipLine();

// read quality by exact length (== sequence length), no newline scan needed
readExact(quality, sequence->length());

if(quality->length() != sequence->length()) {
cerr << "ERROR: sequence and quality have different length:" << endl;
Expand Down
2 changes: 2 additions & 0 deletions src/fastqreader.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class FastqReader{
void init();
void close();
void getLine(string* line);
void skipLine();
void readExact(string* str, int len);
void clearLineBreaks(char* line);
void readToBuf();
void readToBufIgzip();
Expand Down
Loading