// UnirefParser.cpp #include #include "UnirefParser.hpp" namespace NGBW { // UnirefParser bool UnirefParser::Parse(FastaRecord &record) { record.clear(); if (m_line == NULL) ReadNextLine(); while (1) { if (m_input.Eof()) return false; if (m_line[0] == '>') { ParseHeader(record); break; } ReadNextLine(); } while (1) { if (m_input.Eof() || m_line[0] == '>') break; record.sequence.append(m_line); ReadNextLine(); } return true; } void UnirefParser::ParseHeader(FastaRecord &record) { size_t first = FindFirstOf('_', 1); if (first == std::string::npos) first = 1; else first += 1; size_t length = std::strlen(m_line); size_t last = FindFirstSpace(first + 1); if (last == std::string::npos) last = length; record.identity.assign(m_line + first, last - first); if (last < length) { first = last + 1; const char *sub_string = std::strstr(m_line + first, " n="); if (sub_string != NULL) { last = sub_string - m_line; record.description.assign(m_line + first, last - first); first = last + 3; sub_string = std::strstr(m_line + first, " Tax="); if (sub_string != NULL) { first = (sub_string - m_line) + 5; sub_string = std::strstr(m_line + first, " RepID="); if (sub_string != NULL) last = sub_string - m_line; else last = length; record.organism.assign(m_line + first, last - first); } } } ReadNextLine(); } } // namespace NGBW