// GenbankParser.cpp #include #include #include #include "GenbankParser.hpp" namespace NGBW { // GenbankParser bool GenbankParser::Parse(GenbankRecord &record) { record.clear(); ReadNextLine(); while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "LOCUS", 5) == 0) { ParseLocus(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "DEFINITION", 10) == 0) { ParseDefinition(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "ACCESSION", 9) == 0) { ParseAccession(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "VERSION", 7) == 0) { ParseVersion(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "SOURCE", 6) == 0) { ParseSource(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (std::memcmp(m_line, "ORIGIN", 6) == 0) { ParseOrigin(record); break; } IgnoreField(); } while (1) { if (m_input.Eof()) return false; if (FindEndToken()) break; IgnoreField(); } return true; } void GenbankParser::ParseLocus(GenbankRecord &record) { size_t first = FindFirstNotSpace(6); if (first == std::string::npos) return; size_t last = FindFirstSpace(first + 1); if (last == std::string::npos) last = std::strlen(m_line); record.locus.assign(m_line + first, last - first); ReadNextLine(); } void GenbankParser::ParseDefinition(GenbankRecord &record) { size_t first = FindFirstNotSpace(11); if (first == std::string::npos) return; record.definition.assign(m_line + first); while (1) { ReadNextLine(); if (m_input.Eof() || !std::isspace(m_line[0])) return; size_t last = FindLastNotSpace(); if (last != std::string::npos) { first = FindFirstNotSpace(1); record.definition.append(m_line + first, last - first); } } } void GenbankParser::ParseAccession(GenbankRecord &record) { size_t first = FindFirstNotSpace(10); if (first == std::string::npos) return; record.accession.assign(m_line + first); ReadNextLine(); } void GenbankParser::ParseVersion(GenbankRecord &record) { size_t first; size_t last = 7; size_t length = std::strlen(m_line); while (1) { first = FindFirstNotSpace(last + 1); if (first == std::string::npos) break; last = FindFirstSpace(first + 1); if (last == std::string::npos) last = length; record.version.push_back(std::string(m_line + first, last - first)); if (last == length) break; } ReadNextLine(); } void GenbankParser::ParseSource(GenbankRecord &record) { size_t first = FindFirstNotSpace(7); if (first == std::string::npos) return; record.organism.assign(m_line + first); IgnoreField(); } void GenbankParser::ParseOrigin(GenbankRecord &record) { while (1) { ReadNextLine(); if (m_input.Eof() || !std::isspace(m_line[0])) return; size_t first = FindFirstNotSpace(1); if (m_line[first] == '/' && m_line[first + 1] == '/') return; size_t last = FindFirstNotDigit(first); size_t length = std::strlen(m_line); while (1) { first = FindFirstNotSpace(last + 1); last = FindFirstSpace(first + 1); if (last == std::string::npos) last = length; record.sequence.append(m_line + first, last - first); if (last == length) break; } record.sequence.push_back('\n'); } } void GenbankParser::IgnoreField() { while (1) { ReadNextLine(); if (m_input.Eof() || !std::isspace(m_line[0])) return; } } bool GenbankParser::FindEndToken() { size_t first = FindFirstNotSpace(); if (first == std::string::npos) return false; if (m_line[first] == '/' && m_line[first + 1] == '/') return true; return false; } } // namespace NGBW