// UpdateUniprot.cpp #if defined(_WIN32) || defined(_WIN64) // C4290 - the compiler ignores exception specifications #pragma warning(disable: 4290) #endif #include #include #include #include #include #include #include #include "GzipReader.hpp" #include "SystemError.hpp" #include "UniprotParser.hpp" using namespace NGBW; static void WriteFasta(const char *dataset, UniprotRecord &record, std::ostream &output) throw(std::ios_base::failure) { std::string description; if (record.description.compare(0, 8, "RecName:", 8) == 0) { size_t equals_sign = record.description.find_first_of('=', 8) + 1; size_t semi_colon = record.description.find_first_of(';', equals_sign); description.assign(record.description.substr(equals_sign, semi_colon - equals_sign)); size_t flags_line = record.description.find("Flags:", semi_colon + 1, 6); if (flags_line != std::string::npos) { flags_line += 6; semi_colon = record.description.find_first_of(';', flags_line); std::string flag(record.description.substr(flags_line, semi_colon - flags_line)); for (std::string::const_iterator i = flag.begin() ; i != flag.end() ; i++) description.push_back(std::tolower(*i)); } } else description.assign(record.description); output.write(">ngbw|", 6); output << dataset; output.put('|'); output << record.accession; output.put('|'); output << record.organism; output.put('|'); output << description; output.put('\n'); output << record.sequence; } int main(int argc, char **argv) { try { std::ios_base::sync_with_stdio(false); if(argc != 4) throw std::invalid_argument("usage: uniprot-to-fasta dataset outputname inputname"); boost::scoped_ptr output_file; if(std::memcmp(argv[2], "-", 2) != 0) { output_file.reset(new std::ofstream(argv[2])); if(!output_file->good()) { std::string message("can't open "); message.append(argv[2]); throw SystemError("main", message, errno); } } std::ostream &output = output_file ? *output_file : std::cout; GzipReader reader(argv[3]); UniprotParser parser(reader); UniprotRecord record; output.exceptions(std::ios_base::badbit); while (parser.Parse(record)) WriteFasta(argv[1], record, output); return 0; } catch(const std::exception &err) { std::cerr << err.what() << std::endl; return 1; } catch(...) { std::cerr << "An unknown exception was thrown" << std::endl; return 1; } }