[jsword-svn] jsword/java/jsword/org/crosswire/jsword/book/sword s
jswordcvs at crosswire.org
jswordcvs at crosswire.org
Sun May 1 18:29:34 MST 2005
Update of /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/sword
In directory www.crosswire.org:/tmp/cvs-serv26301/java/jsword/org/crosswire/jsword/book/sword
Modified Files:
SwordUtil.java
Log Message:
Fixed the bug where notes were being indexed by adding getVerseText as a replacement for getPlainText. The latter is still needed for non-bibles.
Also cleaned up checkstyle reports and added/corrected javadoc.
Index: SwordUtil.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/sword/SwordUtil.java,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** SwordUtil.java 18 Apr 2005 22:10:38 -0000 1.18
--- SwordUtil.java 2 May 2005 01:29:32 -0000 1.19
***************
*** 84,91 ****
protected static int decodeLittleEndian32(byte[] data, int offset)
{
- // long byte1 = SwordUtil.un2complement(data[0 + offset]);
- // long byte2 = SwordUtil.un2complement(data[1 + offset]) << 8;
- // long byte3 = SwordUtil.un2complement(data[2 + offset]) << 16;
- // long byte4 = SwordUtil.un2complement(data[3 + offset]) << 24;
// Convert from a byte to an int, but prevent sign extension.
// So -16 becomes 240
--- 84,87 ----
***************
*** 106,111 ****
protected static int decodeLittleEndian16(byte[] data, int offset)
{
- // int byte1 = SwordUtil.un2complement(data[0 + offset]);
- // int byte2 = SwordUtil.un2complement(data[1 + offset]) << 8;
// Convert from a byte to an int, but prevent sign extension.
// So -16 becomes 240
--- 102,105 ----
***************
*** 168,171 ****
--- 162,169 ----
public static String decode(Key key, byte[] data, String charset)
{
+ if (charset.equals("WINDOWS-1252")) //$NON-NLS-1$
+ {
+ clean1252(key, data);
+ }
String txt = ""; //$NON-NLS-1$
try
***************
*** 185,210 ****
/**
* Remove rogue characters in the source.
! * These are characters that are not valid in ISO-LATIN-1 (8859-1)
* and in UTF-8 or are non-printing control characters in the range
* of 0-32.
*/
! public static String clean(Key key, String data)
{
! char[] buffer = data.toCharArray();
! for (int i = 0; i < buffer.length; i++)
{
// between 0-32 only allow whitespace
! // characters 127-159 are undefined in ISO-8859-1 and UTF-8
! // Microsoft uses them in cp1250 and cp1252 for their own purpose
! // Microsoft and others frequently call that "Latin 1" when it is not
! char c = buffer[i];
! if ((c >= 0 && c < 32 && c != 9 && c != 10 && c != 13) || c == 255 || (c >= 127 && c <= 159))
{
! buffer[i] = ' ';
! // NOTE(joe): Should this be a call to DataPolice???
! log.debug(key.getName() + " has bad character " + (int) c + " at position " + i + " in input."); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
}
- return new String(buffer);
}
--- 183,204 ----
/**
* Remove rogue characters in the source.
! * These are characters that are not valid in cp1252 aka WINDOWS-1252
* and in UTF-8 or are non-printing control characters in the range
* of 0-32.
*/
! public static void clean1252(Key key, byte[] data)
{
! for (int i = 0; i < data.length; i++)
{
// between 0-32 only allow whitespace
! // characters 0x81, 0x8D, 0x8F, 0x90 and 0x9D are undefined in cp1252
! int c = data[i] & 0xFF;
! if ((c >= 0x00 && c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D)
! || (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D))
{
! data[i] = 0x20;
! DataPolice.report(key.getName() + " has bad character 0x" + Integer.toString(c, 16) + " at position " + i + " in input."); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
}
}
More information about the jsword-svn
mailing list