// GenbankParser.cpp #include #include #include #include "GenbankParser.hpp" namespace NGBW { // GenbankParser bool GenbankParser::Parse(FastaRecord &record) { record.clear(); ReadNextLine(); while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "LOCUS", 5) == 0) { ReadNextLine(); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "DEFINITION", 10) == 0) { ParseDefinition(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "ACCESSION", 9) == 0) { ParseAccession(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "SOURCE", 6) == 0) { ParseSource(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "ORIGIN", 6) == 0) { ParseOrigin(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (FindEndToken()) break; IgnoreField(); } return true; } void GenbankParser::ParseDefinition(FastaRecord &record) { size_t first = FindFirstNotSpace(11); if (first == std::string::npos) return; record.description.assign(m_line + first); while (1) { ReadNextLine(); if (m_input.Eof() || !std::isspace(m_line[0])) return; size_t last = FindLastNotSpace(); if (last != std::string::npos) { first = FindFirstNotSpace(1); record.description.append(m_line + first, last - first); } } } void GenbankParser::ParseAccession(FastaRecord &record) { size_t first = FindFirstNotSpace(10); if (first == std::string::npos) return; size_t last = FindFirstSpace(first + 1); if (last == std::string::npos) last = std::strlen(m_line); record.identity.assign(m_line + first, last - first); ReadNextLine(); } void GenbankParser::ParseSource(FastaRecord &record) { size_t first = FindFirstNotSpace(7); if (first == std::string::npos) return; record.organism.assign(m_line + first); IgnoreField(); } void GenbankParser::ParseOrigin(FastaRecord &record) { while (1) { ReadNextLine(); if (m_input.Eof() || !std::isspace(m_line[0])) return; size_t first = FindFirstNotSpace(1); if (m_line[first] == '/' && m_line[first + 1] == '/') return; size_t last = FindFirstNotDigit(first); size_t length = std::strlen(m_line); while (1) { first = FindFirstNotSpace(last + 1); last = FindFirstSpace(first + 1); if (last == std::string::npos) last = length; record.sequence.append(m_line + first, last - first); if (last == length) break; } } } void GenbankParser::IgnoreField() { while (1) { ReadNextLine(); if (m_input.Eof() || !std::isspace(m_line[0])) return; } } bool GenbankParser::FindEndToken() { size_t first = FindFirstNotSpace(); if (first == std::string::npos) return false; if (m_line[first] == '/' && m_line[first + 1] == '/') return true; return false; } } // namespace NGBW