[jsword-svn] r1210 - trunk/jsword/src/main/java/org/crosswire/jsword/book/sword
dmsmith at www.crosswire.org
dmsmith at www.crosswire.org
Fri Dec 15 14:24:07 MST 2006
Author: dmsmith
Date: 2006-12-15 14:24:06 -0700 (Fri, 15 Dec 2006)
New Revision: 1210
Added:
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeKeyIndex.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeNode.java
Modified:
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/GenBookBackend.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordUtil.java
Log:
Initial Raw GenBook implementation.
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java 2006-12-12 22:31:43 UTC (rev 1209)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java 2006-12-15 21:24:06 UTC (rev 1210)
@@ -265,7 +265,7 @@
protected boolean isBackendSupported(SwordBookMetaData sbmd)
{
- return false;
+ return true;
}
/**
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/GenBookBackend.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/GenBookBackend.java 2006-12-12 22:31:43 UTC (rev 1209)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/GenBookBackend.java 2006-12-15 21:24:06 UTC (rev 1210)
@@ -21,8 +21,14 @@
*/
package org.crosswire.jsword.book.sword;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
import org.crosswire.common.activate.Activator;
import org.crosswire.common.activate.Lock;
+import org.crosswire.common.util.FileUtil;
+import org.crosswire.common.util.Logger;
import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.passage.Key;
@@ -38,9 +44,17 @@
/**
* Simple ctor
*/
- public GenBookBackend(SwordBookMetaData sbmd)
+ public GenBookBackend(SwordBookMetaData sbmd) throws BookException
{
super(sbmd);
+ String path = getExpandedDataPath();
+ bdtFile = new File(path + EXTENSION_BDT);
+
+ if (!bdtFile.canRead())
+ {
+ throw new BookException(Msg.READ_FAIL, new Object[] { bdtFile.getAbsolutePath() });
+ }
+
}
/* (non-Javadoc)
@@ -48,6 +62,15 @@
*/
public final void activate(Lock lock)
{
+ try
+ {
+ bdtRaf = new RandomAccessFile(bdtFile, FileUtil.MODE_READ);
+ }
+ catch (IOException ex)
+ {
+ log.error("failed to open files", ex); //$NON-NLS-1$
+ bdtRaf = null;
+ }
active = true;
}
@@ -56,6 +79,21 @@
*/
public final void deactivate(Lock lock)
{
+ try
+ {
+ if (bdtRaf != null)
+ {
+ bdtRaf.close();
+ }
+ }
+ catch (IOException ex)
+ {
+ log.error("failed to close gen book files", ex); //$NON-NLS-1$
+ }
+ finally
+ {
+ bdtRaf = null;
+ }
active = false;
}
@@ -84,7 +122,7 @@
/* @Override */
public boolean isSupported()
{
- return false;
+ return true;
}
/**
@@ -99,8 +137,27 @@
}
/**
+ * Raw GenBook file extensions
+ */
+ private static final String EXTENSION_BDT = ".bdt"; //$NON-NLS-1$
+
+ /**
+ * The raw data file
+ */
+ private File bdtFile;
+
+ /**
+ * The random access file for the raw data
+ */
+ private RandomAccessFile bdtRaf;
+
+ /**
* Are we active
*/
private boolean active;
+ /**
+ * The log stream
+ */
+ private static final Logger log = Logger.getLogger(GenBookBackend.class);
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordUtil.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordUtil.java 2006-12-12 22:31:43 UTC (rev 1209)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordUtil.java 2006-12-15 21:24:06 UTC (rev 1210)
@@ -55,13 +55,28 @@
/**
* Read a RandomAccessFile
* @param raf The file to read
- * @param offset The record to read
+ * @param offset The start of the record to read
* @param theSize The number of bytes to read
* @return the read data
*/
- protected static byte[] readRAF(RandomAccessFile raf, int offset, int theSize) throws IOException
+ protected static byte[] readRAF(RandomAccessFile raf, long offset, int theSize) throws IOException
{
+ raf.seek(offset);
+ return readNextRAF(raf, theSize);
+ }
+
+ /**
+ * Read a RandomAccessFile from the current location in the file.
+ *
+ * @param raf The file to read
+ * @param theSize The number of bytes to read
+ * @return the read data
+ */
+ protected static byte[] readNextRAF(RandomAccessFile raf, int theSize) throws IOException
+ {
+ long offset = raf.getFilePointer();
int size = theSize;
+
if (offset + size > raf.length())
{
DataPolice.report("Need to reduce size to avoid EOFException. offset=" + offset + " size=" + size + " but raf.length=" + raf.length()); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
@@ -74,7 +89,6 @@
return new byte[0];
}
- raf.seek(offset);
byte[] read = new byte[size];
raf.readFully(read);
@@ -82,6 +96,50 @@
}
/**
+ * Read a RandomAccessFile until a particular byte is seen
+ * @param raf The file to read
+ * @param offset The start of the record to read
+ * @param stopByte The point at which to stop reading
+ * @return the read data
+ */
+ protected static byte[] readUntilRAF(RandomAccessFile raf, int offset, byte stopByte) throws IOException
+ {
+ raf.seek(offset);
+ return readUntilRAF(raf, stopByte);
+ }
+
+ /**
+ * Read a RandomAccessFile until a particular byte is seen
+ * @param raf The file to read
+ * @param offset The start of the record to read
+ * @param stopByte The point at which to stop reading
+ * @return the read data
+ */
+ protected static byte[] readUntilRAF(RandomAccessFile raf, byte stopByte) throws IOException
+ {
+ // The strategy used here is to read the file twice.
+ // Once to determine how much to read and then getting the actual data.
+ // It may be more efficient to incrementally build up a byte buffer.
+ // Note: that growing a static array by 1 byte at a time is O(n**2)
+ // This is negligible when the n is small, but prohibitive otherwise.
+ long offset = raf.getFilePointer();
+ int size = 0;
+
+ int nextByte = -1;
+ do
+ {
+ nextByte = raf.read();
+
+ size++;
+ }
+ while (nextByte != -1 && nextByte != stopByte);
+
+ // Note: we allow for nextByte == -1 to be included in size
+ // so that readRAF will report EOF errors
+ return readRAF(raf, offset, size);
+ }
+
+ /**
* Decode little endian data from a byte array.
* This assumes that the high order bit is not set as this is used solely
* for an offset in a file in bytes. For a practical limit, 2**31 is way
@@ -196,20 +254,32 @@
*/
public static String decode(Key key, byte[] data, String charset)
{
+ return decode(key, data, data.length, charset);
+ }
+
+ /**
+ * Transform a byte array into a string given the encoding.
+ * If the encoding is bad then it just does it as a string.
+ * @param data The byte array to be converted
+ * @param charset The encoding of the byte array
+ * @return a string that is UTF-8 internally
+ */
+ public static String decode(Key key, byte[] data, int length, String charset)
+ {
if ("WINDOWS-1252".equals(charset)) //$NON-NLS-1$
{
- clean1252(key, data);
+ clean1252(key, data, length);
}
String txt = ""; //$NON-NLS-1$
try
{
- txt = new String(data, charset);
+ txt = new String(data, 0, length, charset);
}
catch (UnsupportedEncodingException ex)
{
// It is impossible! In case, use system default...
log.error(key + ": Encoding: " + charset + " not supported", ex); //$NON-NLS-1$ //$NON-NLS-2$
- txt = new String(data);
+ txt = new String(data, 0, length);
}
return txt;
@@ -223,7 +293,18 @@
*/
public static void clean1252(Key key, byte[] data)
{
- for (int i = 0; i < data.length; i++)
+ clean1252(key, data, data.length);
+ }
+
+ /**
+ * Remove rogue characters in the source.
+ * These are characters that are not valid in cp1252 aka WINDOWS-1252
+ * and in UTF-8 or are non-printing control characters in the range
+ * of 0-32.
+ */
+ public static void clean1252(Key key, byte[] data, int length)
+ {
+ for (int i = 0; i < length; i++)
{
// between 0-32 only allow whitespace
// characters 0x81, 0x8D, 0x8F, 0x90 and 0x9D are undefined in cp1252
Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeKeyIndex.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeKeyIndex.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeKeyIndex.java 2006-12-15 21:24:06 UTC (rev 1210)
@@ -0,0 +1,259 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: SwordUtil.java 1169 2006-10-19 17:48:21 -0400 (Thu, 19 Oct 2006) dmsmith $
+ */
+package org.crosswire.jsword.book.sword;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.net.URL;
+
+import org.crosswire.common.activate.Activatable;
+import org.crosswire.common.activate.Activator;
+import org.crosswire.common.activate.Lock;
+import org.crosswire.common.util.FileUtil;
+import org.crosswire.common.util.Logger;
+import org.crosswire.common.util.NetUtil;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.passage.DefaultKeyList;
+import org.crosswire.jsword.passage.Key;
+
+/**
+ * TreeKeyIndex reads Sword index files that are path based.
+ * Paths are of the form /a/b/c, and can be of any depth.
+ * The ultimate output of a TreeKeyIndex is the offset and
+ * length of a chunk of data in another file that can be read.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class TreeKeyIndex implements Activatable
+{
+ /**
+ * Simple ctor
+ * @throws BookException
+ */
+ public TreeKeyIndex(SwordBookMetaData sbmd) throws BookException
+ {
+ bmd = sbmd;
+
+ String path = getExpandedDataPath();
+
+ idxFile = new File(path + EXTENSION_INDEX);
+ datFile = new File(path + EXTENSION_DATA);
+
+ if (!idxFile.canRead())
+ {
+ throw new BookException(Msg.READ_FAIL, new Object[] { idxFile.getAbsolutePath() });
+ }
+
+ if (!datFile.canRead())
+ {
+ throw new BookException(Msg.READ_FAIL, new Object[] { datFile.getAbsolutePath() });
+ }
+
+ }
+
+ /**
+ * @return the root TreeNode for the module.
+ * @throws IOException
+ */
+ public TreeNode getRoot() throws IOException
+ {
+ return getTreeNode(getOffset(0));
+ }
+
+ /**
+ * Get the parent of the TreeNode.
+ * @param node the node being worked upon
+ * @return the parent node
+ * @throws IOException
+ */
+ public TreeNode getParent(TreeNode node) throws IOException
+ {
+ return getTreeNode(getOffset(node.getParent()));
+ }
+
+ /**
+ * Get the first child of the TreeNode.
+ * @param node the node being worked upon
+ * @return the first child node
+ * @throws IOException
+ */
+ public TreeNode getFirstChild(TreeNode node) throws IOException
+ {
+ return getTreeNode(getOffset(node.getFirstChild()));
+ }
+
+ /**
+ * Get the next sibling of the TreeNode.
+ * @param node the node being worked upon
+ * @return the next sibling node
+ * @throws IOException
+ */
+ public TreeNode getNextSibling(TreeNode node) throws IOException
+ {
+ return getTreeNode(getOffset(node.getNextSibling()));
+ }
+
+ /**
+ * The idx file contains offsets into the dat file.
+ * @param index the record id
+ * @return an offset into the dat file
+ * @throws IOException
+ */
+ private int getOffset(int index) throws IOException
+ {
+ if (index == -1)
+ {
+ return -1;
+ }
+
+ byte[] buffer = SwordUtil.readRAF(idxRaf, index, 4);
+ return SwordUtil.decodeLittleEndian32(buffer, 0);
+ }
+
+ /**
+ * Given an offset get the TreeNode from the dat file.
+ * @param offset start of a TreeNode record in the dat file.
+ * @return the TreeNode
+ * @throws IOException
+ */
+ private TreeNode getTreeNode(int offset) throws IOException
+ {
+ TreeNode node = new TreeNode(offset);
+
+ if (offset == -1)
+ {
+ return node;
+ }
+
+ byte[] buffer = SwordUtil.readRAF(datRaf, offset, 12);
+ node.setParent(SwordUtil.decodeLittleEndian32(buffer, 0));
+ node.setNextSibling(SwordUtil.decodeLittleEndian32(buffer, 4));
+ node.setFirstChild(SwordUtil.decodeLittleEndian32(buffer, 8));
+
+ buffer = SwordUtil.readUntilRAF(datRaf, (byte) 0);
+ int size = buffer.length;
+ if (buffer[size-1] == 0)
+ {
+ size--;
+ }
+
+ Key key = new DefaultKeyList(null, bmd.getName());
+ node.setName(SwordUtil.decode(key, buffer, size, bmd.getBookCharset()));
+
+ buffer = SwordUtil.readNextRAF(datRaf, 2);
+ int userDataSize = SwordUtil.decodeLittleEndian16(buffer, 0);
+ if (userDataSize > 0)
+ {
+ node.setUserData(SwordUtil.readNextRAF(datRaf, userDataSize));
+ }
+
+ return node;
+ }
+
+ /* (non-Javadoc)
+ * @see org.crosswire.common.activate.Activatable#activate(org.crosswire.common.activate.Lock)
+ */
+ public final void activate(Lock lock)
+ {
+ try
+ {
+ idxRaf = new RandomAccessFile(idxFile, FileUtil.MODE_READ);
+ datRaf = new RandomAccessFile(datFile, FileUtil.MODE_READ);
+ }
+ catch (IOException ex)
+ {
+ log.error("failed to open files", ex); //$NON-NLS-1$
+ idxRaf = null;
+ datRaf = null;
+ }
+ active = true;
+ }
+
+ /* (non-Javadoc)
+ * @see org.crosswire.common.activate.Activatable#deactivate(org.crosswire.common.activate.Lock)
+ */
+ public final void deactivate(Lock lock)
+ {
+ try
+ {
+ if (idxRaf != null)
+ {
+ idxRaf.close();
+ }
+ if (datRaf != null)
+ {
+ datRaf.close();
+ }
+ }
+ catch (IOException ex)
+ {
+ log.error("failed to close nt files", ex); //$NON-NLS-1$
+ }
+ finally
+ {
+ idxRaf = null;
+ datRaf = null;
+ }
+ active = false;
+ }
+
+ /**
+ * Helper method so we can quickly activate ourselves on access
+ */
+ protected final void checkActive()
+ {
+ if (!active)
+ {
+ Activator.activate(this);
+ }
+ }
+
+ private String getExpandedDataPath() throws BookException
+ {
+ URL loc = NetUtil.lengthenURL(bmd.getLibrary(), bmd.getProperty(ConfigEntryType.DATA_PATH));
+
+ if (loc == null)
+ {
+ throw new BookException(Msg.MISSING_FILE);
+ }
+
+ return new File(loc.getFile()).getAbsolutePath();
+ }
+
+ private static final String EXTENSION_INDEX = ".idx"; //$NON-NLS-1$
+ private static final String EXTENSION_DATA = ".dat"; //$NON-NLS-1$
+
+ private SwordBookMetaData bmd;
+ private File idxFile;
+ private File datFile;
+ private RandomAccessFile idxRaf;
+ private RandomAccessFile datRaf;
+ private boolean active;
+
+ /**
+ * The log stream
+ */
+ private static final Logger log = Logger.getLogger(TreeKeyIndex.class);
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeNode.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeNode.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeNode.java 2006-12-15 21:24:06 UTC (rev 1210)
@@ -0,0 +1,214 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: LZSSBackend.java 1143 2006-10-04 22:07:23 -0400 (Wed, 04 Oct 2006) dmsmith $
+ */
+package org.crosswire.jsword.book.sword;
+
+import java.io.Serializable;
+
+/**
+ * A node that knows where the data is in the real file and where it is in
+ * relationship to other nodes.
+ *
+ * @see gnu.lgpl.License for license details. The copyright to this program is
+ * held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+class TreeNode implements Cloneable, Serializable
+{
+ /**
+ * TreeNode default ctor.
+ */
+ TreeNode()
+ {
+ this(-1);
+ }
+
+ /**
+ * Setup with the positions of data in the file
+ *
+ * @param theOffset
+ */
+ TreeNode(int theOffset)
+ {
+ offset = theOffset;
+ name = ""; //$NON-NLS-1$
+ parent = -1;
+ nextSibling = -1;
+ firstChild = -1;
+ userData = new byte[0];
+ }
+
+ /**
+ * @return the offset
+ */
+ public int getOffset()
+ {
+ return offset;
+ }
+
+ /**
+ * @param newOffset the offset to set
+ */
+ public void setOffset(int newOffset)
+ {
+ offset = newOffset;
+ }
+
+ /**
+ * @return the name
+ */
+ public String getName()
+ {
+ return name;
+ }
+
+ /**
+ * @param newName the name to set
+ */
+ public void setName(String newName)
+ {
+ name = newName;
+ }
+
+ /**
+ * @return the userData
+ */
+ public byte[] getUserData()
+ {
+ return userData;
+ }
+
+ /**
+ * @param theUserData the userData to set
+ */
+ public void setUserData(byte[] theUserData)
+ {
+ userData = theUserData;
+ }
+
+ /**
+ * @return the firstChild
+ */
+ public int getFirstChild()
+ {
+ return firstChild;
+ }
+
+ /**
+ * @param firstChild the firstChild to set
+ */
+ public void setFirstChild(int firstChild)
+ {
+ this.firstChild = firstChild;
+ }
+
+ /**
+ * @return the nextSibling
+ */
+ public int getNextSibling()
+ {
+ return nextSibling;
+ }
+
+ /**
+ * @param nextSibling the nextSibling to set
+ */
+ public void setNextSibling(int nextSibling)
+ {
+ this.nextSibling = nextSibling;
+ }
+
+ /**
+ * @return the parent
+ */
+ public int getParent()
+ {
+ return parent;
+ }
+
+ /**
+ * @param parent the parent to set
+ */
+ public void setParent(int parent)
+ {
+ this.parent = parent;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.lang.Object#clone()
+ */
+ public Object clone()
+ {
+ try
+ {
+ return super.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ assert false;
+ }
+
+ return null;
+ }
+
+ /**
+ * The offset of this TreeNode in the offset.
+ */
+ private int offset;
+
+ /**
+ * The name of this TreeNode. Note, this is not the path. To get the path,
+ * one needs to traverse to the parent to construct the path.
+ */
+ private String name;
+
+ /**
+ * Optional, extra data associated with this TreeNode.
+ * For example, this is used to store offset and length for a raw genbook.
+ */
+ private byte[] userData;
+
+ /**
+ * The offset of the parent record in the offset. -1 means that there are no
+ * parents and this TreeNode is a root.
+ */
+ private int parent;
+
+ /**
+ * The offset of the next sibling record in the offset. -1 means that there is
+ * no next sibling.
+ */
+ private int nextSibling;
+
+ /**
+ * The offset of the first child record in the offset. -1 means that there are
+ * no children and this TreeNode is a leaf.
+ */
+ private int firstChild;
+
+ /**
+ * Serialization ID
+ */
+ private static final long serialVersionUID = -2472601787934480762L;
+
+}
More information about the jsword-svn
mailing list