/* * ReplaceMissingDocuments.java */ package org.ngbw.utils; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; import java.util.List; import java.util.Properties; import java.util.zip.GZIPOutputStream; import org.apache.commons.codec.binary.Hex; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.ngbw.sdk.Workbench; import org.ngbw.sdk.api.tool.ToolResource; import org.ngbw.sdk.database.ConnectionManager; import org.ngbw.sdk.database.ConnectionSource; /** * * @author Paul Hoover * */ public class ReplaceMissingDocuments { // nested classes /** * */ private static class TaskRecord { // data fields long taskId; String jobHandle; String toolId; // constructors public TaskRecord(long id, String handle, String tool) { taskId = id; jobHandle = handle; toolId = tool; } } // data fields private static final String FILE_ROOT_PROPERTY = ConnectionSource.DATABASE_PROP_PREFIX + "fileRoot"; private static final int FETCH_SIZE = 100; private static final Log m_log = LogFactory.getLog(ReplaceMissingDocuments.class.getName()); // public methods /** * * @param args */ public static void main(String[] args) { try { Workbench.getInstance(); if (args.length != 1) throw new Exception("usage: ReplaceMissingDocuments days"); int numDays = Integer.parseInt(args[0]); Calendar cal = Calendar.getInstance(); cal.add(Calendar.DAY_OF_MONTH, -numDays); Timestamp cutoffDate = new Timestamp(cal.getTimeInMillis()); int numDocuments = 0; numDocuments += examineDocuments( "SELECT source_documents.SOURCE_DOCUMENT_ID, FILENAME, LENGTH, SIGNATURE " + "FROM source_documents " + "INNER JOIN userdata ON source_documents.SOURCE_DOCUMENT_ID = userdata.SOURCE_DOCUMENT_ID " + "WHERE userdata.CREATION_DATE >= ? ", cutoffDate ); numDocuments += examineDocuments( "SELECT source_documents.SOURCE_DOCUMENT_ID, FILENAME, LENGTH, SIGNATURE " + "FROM source_documents " + "INNER JOIN task_input_source_documents ON source_documents.SOURCE_DOCUMENT_ID = task_input_source_documents.SOURCE_DOCUMENT_ID " + "INNER JOIN task_input_parameters ON task_input_source_documents.INPUT_ID = task_input_parameters.INPUT_ID " + "INNER JOIN tasks on task_input_parameters.TASK_ID = tasks.TASK_ID " + "WHERE tasks.CREATION_DATE >= ? ", cutoffDate ); numDocuments += examineDocuments( "SELECT source_documents.SOURCE_DOCUMENT_ID, FILENAME, LENGTH, SIGNATURE " + "FROM source_documents " + "INNER JOIN task_output_source_documents ON source_documents.SOURCE_DOCUMENT_ID = task_output_source_documents.SOURCE_DOCUMENT_ID " + "INNER JOIN task_output_parameters ON task_output_source_documents.OUTPUT_ID = task_output_parameters.OUTPUT_ID " + "INNER JOIN tasks on task_output_parameters.TASK_ID = tasks.TASK_ID " + "WHERE tasks.CREATION_DATE >= ?", cutoffDate ); m_log.debug("examined " + numDocuments + " source documents"); } catch (Exception err) { err.printStackTrace(System.err); System.exit(1); } } // private methods /** * * @param query * @param cutoffDate * @return * @throws Exception */ private static int examineDocuments(String query, Timestamp cutoffDate) throws Exception { Connection dbConn = ConnectionManager.getConnectionSource().getConnection(); PreparedStatement selectStmt = null; ResultSet rows = null; try { selectStmt = dbConn.prepareStatement(query); selectStmt.setTimestamp(1, cutoffDate); selectStmt.setFetchSize(FETCH_SIZE); rows = selectStmt.executeQuery(); String fileRoot = getFileRoot(); int numDocuments = 0; while (rows.next()) { long sourceDocumentId = rows.getLong(1); String fileName = rows.getString(2); long length = rows.getLong(3); String signature = rows.getString(4); File dbFile = new File(fileRoot + fileName); if (signature.length() > 0 && !dbFile.exists()) { m_log.debug("file is missing for source document: id " + sourceDocumentId + ", filename " + fileName + ", length " + length + ", signature " + signature); System.out.print(fileName + " is missing: "); if (!replaceDoc(sourceDocumentId, signature, dbFile)) eraseSignature(sourceDocumentId); } numDocuments += 1; } return numDocuments; } finally { if (rows != null) rows.close(); if (selectStmt != null) selectStmt.close(); dbConn.close(); } } /** * * @return * @throws Exception */ private static String getFileRoot() throws Exception { Properties configProps = ConnectionSource.getDatabaseConfiguration(); String fileRoot = configProps.getProperty(FILE_ROOT_PROPERTY); if (fileRoot == null || fileRoot.length() == 0) throw new Exception("Property " + FILE_ROOT_PROPERTY + " is empty"); String fileSeparator = System.getProperty("file.separator"); if (fileRoot.endsWith(fileSeparator)) return fileRoot; else return fileRoot + fileSeparator; } /** * * @param sourceDocumentId * @param signature * @param dbFile * @return * @throws SQLException * @throws IOException * @throws NoSuchAlgorithmException */ private static boolean replaceDoc(long sourceDocumentId, String signature, File dbFile) throws SQLException, IOException, NoSuchAlgorithmException { List tasks = getTasksFromOutput(sourceDocumentId); if (!tasks.isEmpty()) { File outputFile = findOutputFile(tasks, sourceDocumentId); if (outputFile != null) { String copySignature = copyFile(outputFile, dbFile); if (signature.equals(copySignature)) { System.out.println("replaced output document with " + outputFile.getAbsolutePath()); return true; } else System.out.println("found a replacement for output document at " + outputFile.getAbsolutePath() + ", but signatures don't match"); } else System.out.println("can't find a replacement for output document"); } else if (isInputDoc(sourceDocumentId)) System.out.println("can't replace input document"); else if (isDataDoc(sourceDocumentId)) System.out.println("can't replace user data document"); else System.out.println("source document " + sourceDocumentId + " isn't associated with any database entity"); return false; } /** * * @param sourceDocumentId * @return * @throws SQLException */ private static List getTasksFromOutput(long sourceDocumentId) throws SQLException { Connection dbConn = ConnectionManager.getConnectionSource().getConnection(); PreparedStatement selectStmt = null; ResultSet rows = null; try { selectStmt = dbConn.prepareStatement( "SELECT tasks.TASK_ID, JOBHANDLE, TOOL_ID " + "FROM tasks " + "INNER JOIN (" + "SELECT TASK_ID " + "FROM task_output_parameters " + "INNER JOIN (" + "SELECT OUTPUT_ID " + "FROM task_output_source_documents " + "WHERE SOURCE_DOCUMENT_ID = ?" + ") AS output_ids ON task_output_parameters.OUTPUT_ID = output_ids.OUTPUT_ID " + ") AS task_ids ON tasks.TASK_ID = task_ids.TASK_ID " + "ORDER BY CREATION_DATE DESC" ); selectStmt.setLong(1, sourceDocumentId); rows = selectStmt.executeQuery(); List result = new ArrayList(); while (rows.next()) result.add(new TaskRecord(rows.getLong(1), rows.getString(2), rows.getString(3))); return result; } finally { if (rows != null) rows.close(); if (selectStmt != null) selectStmt.close(); dbConn.close(); } } /** * * @param tasks * @param sourceDocumentId * @return * @throws SQLException */ private static File findOutputFile(List tasks, long sourceDocumentId) throws SQLException { for (TaskRecord task : tasks) { String resultsDir = findResultsDir(task); if (resultsDir == null) continue; List fileNames = getFileNames(task, sourceDocumentId); for (String name : fileNames) { File output = new File(resultsDir + name); if (output.exists()) return output; } } return null; } /** * * @param inFile * @param outFile * @return * @throws IOException * @throws NoSuchAlgorithmException */ private static String copyFile(File inFile, File outFile) throws IOException, NoSuchAlgorithmException { InputStream inStream = new BufferedInputStream(new FileInputStream(inFile)); OutputStream outStream = new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(outFile))); try { int bytesRead; byte[] readBuffer = new byte[8192]; MessageDigest shaDigest = MessageDigest.getInstance("SHA-1"); while ((bytesRead = inStream.read(readBuffer, 0, readBuffer.length)) >= 0) { outStream.write(readBuffer, 0, bytesRead); shaDigest.update(readBuffer, 0, bytesRead); } return Hex.encodeHexString(shaDigest.digest()); } finally { outStream.close(); inStream.close(); } } /** * * @param task * @return */ private static String findResultsDir(TaskRecord task) { try { ToolResource resource = Workbench.getInstance().getTool(task.toolId).getToolResource(); String dirName = resource.getWorkingDirectory(task.jobHandle); if ((new File(dirName)).exists()) return dirName; dirName = resource.getArchiveDirectory(task.jobHandle); if ((new File(dirName)).exists()) return dirName; dirName = resource.getFailedDirectory(task.jobHandle); if ((new File(dirName)).exists()) return dirName; dirName = resource.getManualDirectory(task.jobHandle); if ((new File(dirName)).exists()) return dirName; } catch (Throwable err) { System.err.println(err.getLocalizedMessage()); } return null; } /** * * @param task * @param sourceDocumentId * @return * @throws SQLException */ private static List getFileNames(TaskRecord task, long sourceDocumentId) throws SQLException { Connection dbConn = ConnectionManager.getConnectionSource().getConnection(); PreparedStatement selectStmt = null; ResultSet rows = null; try { selectStmt = dbConn.prepareStatement( "SELECT NAME " + "FROM tasks " + "INNER JOIN task_output_parameters ON tasks.TASK_ID = task_output_parameters.TASK_ID " + "INNER JOIN task_output_source_documents ON task_output_parameters.OUTPUT_ID = task_output_source_documents.OUTPUT_ID " + "WHERE tasks.TASK_ID = ? " + "AND task_output_source_documents.SOURCE_DOCUMENT_ID = ?" ); selectStmt.setLong(1, task.taskId); selectStmt.setLong(2, sourceDocumentId); rows = selectStmt.executeQuery(); List result = new ArrayList(); while (rows.next()) result.add(rows.getString(1)); return result; } finally { if (rows != null) rows.close(); if (selectStmt != null) selectStmt.close(); dbConn.close(); } } /** * * @param sourceDocumentId * @return * @throws SQLException */ private static boolean isInputDoc(long sourceDocumentId) throws SQLException { Connection dbConn = ConnectionManager.getConnectionSource().getConnection(); PreparedStatement selectStmt = null; ResultSet row = null; try { selectStmt = dbConn.prepareStatement( "SELECT COUNT(*) " + "FROM task_input_source_documents " + "WHERE SOURCE_DOCUMENT_ID = ?" ); selectStmt.setLong(1, sourceDocumentId); row = selectStmt.executeQuery(); if (!row.next()) return false; return row.getLong(1) > 0; } finally { if (row != null) row.close(); if (selectStmt != null) selectStmt.close(); dbConn.close(); } } /** * * @param sourceDocumentId * @return * @throws SQLException */ private static boolean isDataDoc(long sourceDocumentId) throws SQLException { Connection dbConn = ConnectionManager.getConnectionSource().getConnection(); PreparedStatement selectStmt = null; ResultSet row = null; try { selectStmt = dbConn.prepareStatement( "SELECT COUNT(*) " + "FROM userdata " + "WHERE SOURCE_DOCUMENT_ID = ?" ); selectStmt.setLong(1, sourceDocumentId); row = selectStmt.executeQuery(); if (!row.next()) return false; return row.getLong(1) > 0; } finally { if (row != null) row.close(); if (selectStmt != null) selectStmt.close(); dbConn.close(); } } /** * * @param sourceDocumentId * @throws SQLException */ private static void eraseSignature(long sourceDocumentId) throws SQLException { Connection dbConn = ConnectionManager.getConnectionSource().getConnection(); PreparedStatement updateStmt = null; ResultSet row = null; try { updateStmt = dbConn.prepareStatement( "UPDATE source_documents " + "SET SIGNATURE = '' " + "WHERE SOURCE_DOCUMENT_ID = ?" ); updateStmt.setLong(1, sourceDocumentId); updateStmt.executeUpdate(); } finally { if (row != null) row.close(); if (updateStmt != null) updateStmt.close(); dbConn.close(); } } }