/* * PdbStructureParser.java */ package org.ngbw.utils; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * * @author Paul Hoover * */ class PdbStructureParser extends Parser { private static final Pattern m_headerPattern = Pattern.compile("^HEADER.*$"); private static final Pattern m_titlePattern = Pattern.compile("^TITLE.*$"); private static final Pattern m_compoundPattern = Pattern.compile("^COMPND\\s+(\\d+\\s+)?MOLECULE:.*$"); private static final Pattern m_sourcePattern = Pattern.compile("^SOURCE\\s+(\\d+\\s+)?ORGANISM_SCIENTIFIC:.*$"); private static final Pattern m_expdataPattern = Pattern.compile("^EXPDTA.*$"); private static final Pattern m_authorPattern = Pattern.compile("^AUTHOR.*$"); private static final Pattern m_revDatePattern = Pattern.compile("^REVDAT.*$"); private static final Pattern m_remarkPattern = Pattern.compile("^REMARK\\s+(\\d+\\s+)?RESOLUTION\\.\\s+(\\d+|\\d+\\.\\d+)\\s+ANGSTROMS\\..*$"); private static final Pattern m_endPattern = Pattern.compile("^END.*$"); private static final Pattern m_filterPattern = Pattern.compile("^(SEQRES|SEQADV|ATOM).*$"); /** * * @param input */ public PdbStructureParser(String input) { super(input, m_filterPattern); } /** * * @param input */ public PdbStructureParser(byte[] input) { super(input, m_filterPattern); } /** * * @param input */ public PdbStructureParser(InputStream input) { super(input, m_filterPattern); } /** * * @param input */ public PdbStructureParser(Reader input) { super(input, m_filterPattern); } /** * * @return * @throws IOException */ public StructureRecord nextRecord() throws IOException { if (!findFirstLine(m_headerPattern)) return null; StructureRecord record = new StructureRecord(); parseHeader(record); while (true) { if (m_line == null) return null; if (m_titlePattern.matcher(m_line).matches()) record.name = parseMultiLineField(m_titlePattern); else if (m_compoundPattern.matcher(m_line).matches()) parseCompound(record); else if (m_sourcePattern.matcher(m_line).matches()) parseSource(record); else if (m_expdataPattern.matcher(m_line).matches()) record.type = parseMultiLineField(m_expdataPattern); else if (m_authorPattern.matcher(m_line).matches()) record.author = parseMultiLineField(m_authorPattern); else if (m_revDatePattern.matcher(m_line).matches()) parseRevDate(record); else if (m_remarkPattern.matcher(m_line).matches()) parseRemark(record); else if (m_endPattern.matcher(m_line).matches()) { record.completeSource = getCompleteSource(); record.filteredSource = getFilteredSource(); break; } else readAndStoreLine(); } return record; } private void parseHeader(StructureRecord record) throws IOException { record.depositionDate = m_line.substring(50, 59); record.primaryId = m_line.substring(62).trim(); readAndStoreLine(); } private void parseCompound(StructureRecord record) throws IOException { record.type = m_line.substring(20).trim(); readAndStoreLine(); } private void parseSource(StructureRecord record) throws IOException { record.organism = m_line.substring(31).trim(); readAndStoreLine(); } private void parseRevDate(StructureRecord record) throws IOException { record.modificationDate = m_line.substring(13, 22); readAndStoreLine(); } private void parseRemark(StructureRecord record) throws IOException { Matcher matcher = m_remarkPattern.matcher(m_line); if (matcher.matches()) record.resolution = matcher.group(2); readAndStoreLine(); } private String parseMultiLineField(Pattern fieldName) throws IOException { StringBuilder field = new StringBuilder(m_line.substring(10).trim()); while (true) { readAndStoreLine(); if (m_line == null || !fieldName.matcher(m_line).matches()) break; field.append(m_line.substring(10).trim()); } return field.toString(); } }