/* * PdbFinderParser.java */ package org.ngbw.utils; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.util.Iterator; import java.util.LinkedList; import java.util.Queue; import java.util.Set; import java.util.TreeSet; import java.util.regex.Pattern; /** * * @author Paul Hoover * */ class PdbSequenceParser extends Parser { private static final Pattern m_identityPattern = Pattern.compile("^ID\\s*:.*$"); private static final Pattern m_headerPattern = Pattern.compile("^Header\\s*:.*$"); private static final Pattern m_sourcePattern = Pattern.compile("^Source\\s*:.*$"); private static final Pattern m_chainPattern = Pattern.compile("^Chain\\s*:.*$"); private static final Pattern m_recordEndPattern = Pattern.compile("^\\s*//\\s*$"); private final Queue m_records = new LinkedList(); /** * * @param input */ public PdbSequenceParser(String input) { super(input, m_chainPattern); } /** * * @param input */ public PdbSequenceParser(byte[] input) { super(input, m_chainPattern); } /** * * @param input */ public PdbSequenceParser(InputStream input) { super(input, m_chainPattern); } /** * * @param input */ public PdbSequenceParser(Reader input) { super(input, m_chainPattern); } /** * * @return * @throws IOException */ public SequenceRecord nextRecord() throws IOException { if (m_records.isEmpty()) { if (!findFirstLine(m_identityPattern)) return null; String identity = parseField(); String name = ""; String organism = ""; Set chainIds = new TreeSet(); while (true) { if (m_line == null) return null; if (m_headerPattern.matcher(m_line).matches()) name = parseField(); else if (m_sourcePattern.matcher(m_line).matches()) organism = parseSource(); else if (m_chainPattern.matcher(m_line).matches()) parseChain(chainIds); else if (m_recordEndPattern.matcher(m_line).matches()) { String completeSource = getCompleteSource(); String filteredSource = getFilteredSource(); for (Iterator entries = chainIds.iterator() ; entries.hasNext() ; ) { SequenceRecord record = new SequenceRecord(); record.primaryId = identity + "-" + entries.next(); record.completeSource = completeSource; record.filteredSource = filteredSource; record.name = name; record.organism = organism; m_records.add(record); } break; } else readAndStoreLine(); } } return m_records.remove(); } private String parseField() throws IOException { String[] subStrings = m_line.split("\\s+:\\s+", 2); readAndStoreLine(); if (subStrings.length > 1) return subStrings[1]; else return ""; } private String parseSource() throws IOException { StringBuilder field = new StringBuilder(); String[] subStrings = m_line.split("\\s+:\\s+", 2); if (subStrings.length > 1) field.append(subStrings[1]); while (true) { readAndStoreLine(); if (m_line == null || !m_sourcePattern.matcher(m_line).matches()) break; subStrings = m_line.split("\\s+:\\s+", 2); if (subStrings.length > 1) { field.append(' '); field.append(subStrings[1]); } } return field.toString(); } private void parseChain(Set ids) throws IOException { String[] subStrings = m_line.split("\\s+:\\s+", 2); if (subStrings.length > 1) ids.add(subStrings[1]); while (true) { readAndStoreLine(); if (m_line == null || !Character.isWhitespace(m_line.charAt(0))); break; } } }