// UniprotParser.cpp #include #include #include #include "UniprotParser.hpp" namespace NGBW { // UniprotParser bool UniprotParser::Parse(FastaRecord &record) { record.clear(); while (1) { ReadNextLine(); if (m_input.Eof()) return false; if (std::memcmp(m_line, "ID ", 3) == 0) { ReadNextLine(); break; } } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "AC ", 3) == 0) { ParseAccession(record); break; } ReadNextLine(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "DE ", 3) == 0) { ParseDescription(record); break; } ReadNextLine(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "OS ", 3) == 0) { ParseOrganism(record); break; } ReadNextLine(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "SQ ", 3) == 0) { ParseSequence(record); break; } ReadNextLine(); } while (1) { if (m_input.Eof()) return false; if (FindEndToken()) break; ReadNextLine(); } return true; } void UniprotParser::ParseAccession(FastaRecord &record) { size_t first = FindFirstNotSpace(3); if (first == std::string::npos) return; size_t last = FindFirstOf(';', first + 1); if (last == std::string::npos) last = std::strlen(m_line); record.identity.assign(m_line + first, last - first); ReadNextLine(); } void UniprotParser::ParseDescription(FastaRecord &record) { size_t first = FindFirstNotSpace(3); if (first == std::string::npos) return; size_t last; if (std::memcmp(m_line + first, "RecName:", 8) == 0) { first = FindFirstOf('=', first + 8) + 1; last = FindFirstOf(';', first); record.description.assign(m_line + first, last - first); } while (1) { ReadNextLine(); if (m_input.Eof() || std::memcmp(m_line, "DE ", 3) != 0) break; first = FindFirstNotSpace(3); if (first == std::string::npos) continue; if (std::memcmp(m_line + first, "Flags:", 6) == 0) { first = FindFirstNotSpace(first + 6); last = FindFirstOf(';', first); record.description.push_back(' '); record.description.append(m_line + first, last - first); } } } void UniprotParser::ParseOrganism(FastaRecord &record) { size_t first = FindFirstNotSpace(3); if (first == std::string::npos) return; size_t last = std::strlen(m_line); record.organism.assign(m_line + first, last - first); ReadNextLine(); } void UniprotParser::ParseSequence(FastaRecord &record) { while (1) { ReadNextLine(); if (m_input.Eof() || !std::isspace(m_line[0])) return; size_t first = FindFirstNotSpace(1); if (m_line[first] == '/' && m_line[first + 1] == '/') break; size_t last = first - 1; size_t length = std::strlen(m_line); while (1) { first = FindFirstNotSpace(last + 1); last = FindFirstSpace(first + 1); if (last == std::string::npos) last = length; record.sequence.append(m_line + first, last - first); if (last == length) break; } } } bool UniprotParser::FindEndToken() { size_t first = FindFirstNotSpace(); if (first == std::string::npos) return false; if (m_line[first] == '/' && m_line[first + 1] == '/') return true; return false; } } // namespace NGBW