// UpdateUniprot.cpp #if defined(_WIN32) || defined(_WIN64) // C4290 - the compiler ignores exception specifications #pragma warning(disable: 4290) #endif #include #include #include #include #include #include #include #include #include "GzipReader.hpp" #include "SystemError.hpp" using namespace NGBW; int main(int argc, char **argv) { try { std::ios_base::sync_with_stdio(false); if(argc != 4) throw std::invalid_argument("usage: pdb-to-fasta dataset outputname inputname"); boost::scoped_ptr output_file; if(std::memcmp(argv[2], "-", 2) != 0) { output_file.reset(new std::ofstream(argv[2])); if(!output_file->good()) { std::string message("can't open "); message.append(argv[2]); throw SystemError("main", message, errno); } } std::ostream &output = output_file ? *output_file : std::cout; GzipReader reader(argv[3]); output.exceptions(std::ios_base::badbit); while (1) { const char *line = reader.GetLine(); if (reader.Eof()) break; if (line[0] == '>') { output.write(">ngbw|", 6); output << argv[1]; // protein id size_t last = 1; while (line[last] != '_') { if (line[last] == '\0') throw std::runtime_error("Unexpected comment format"); last += 1; } output.put('|'); for (size_t i = 1 ; i < last ; i += 1) output.put(std::toupper(line[i])); // chain id last += 1; size_t first = last; while (!std::isspace(line[last])) { if (line[last] == '\0') throw std::runtime_error("Unexpected comment format"); last += 1; } output.put('|'); for (size_t i = first ; i < last ; i += 1) output.put(std::toupper(line[i])); // description const char *description = std::strstr(line + last, "length:"); if (description != NULL) { while (!std::isspace(*description) && *description != '\0') description += 1; while (std::isspace(*description) && *description != '\0') description += 1; output.put('|'); output << description; } output.put('\n'); } else { output << line; output.put('\n'); } } return 0; } catch(const std::exception &err) { std::cerr << err.what() << std::endl; return 1; } catch(...) { std::cerr << "An unknown exception was thrown" << std::endl; return 1; } }