[sword-cvs] sword/utilities xml2gbs.cpp,NONE,1.1
sword@www.crosswire.org
sword@www.crosswire.org
Thu, 13 Feb 2003 11:01:39 -0700
Update of /usr/local/cvsroot/sword/utilities
In directory www:/tmp/cvs-serv27943
Added Files:
xml2gbs.cpp
Log Message:
added xml2gbs util
--- NEW FILE: xml2gbs.cpp ---
#include <ctype.h>
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif
#include <entriesblk.h>
//#include <iostream>
#include <string>
#include <stdio.h>
#include <treekeyidx.h>
#include <rawgenbook.h>
#ifndef O_BINARY
#define O_BINARY 0
#endif
#ifndef NO_SWORD_NAMESPACE
using sword::TreeKeyIdx;
using sword::RawGenBook;
using sword::SWKey;
#endif
#define DEBUG
/*
void printTree(TreeKeyIdx treeKey, TreeKeyIdx *target = 0, int level = 1) {
if (!target)
target = &treeKey;
unsigned long currentOffset = target->getOffset();
std::cout << ((currentOffset == treeKey.getOffset()) ? "==>" : "");
for (int i = 0; i < level; i++) std::cout << "\t";
std::cout << treeKey.getLocalName() << "/\n";
if (treeKey.firstChild()) {
printTree(treeKey, target, level+1);
treeKey.parent();
}
if (treeKey.nextSibling())
printTree(treeKey, target, level);
}
*/
void setkey (TreeKeyIdx * treeKey, char* keybuffer) {
char* tok = strtok(keybuffer, "/");
while (tok) {
bool foundkey = false;
if (treeKey->hasChildren()) {
treeKey->firstChild();
if (!strcmp(treeKey->getLocalName(), tok)) {
foundkey = true;
} else {
while (treeKey->nextSibling()) {
if (treeKey->getLocalName()) {
if (!strcmp(treeKey->getLocalName(), tok)) {
foundkey = true;
}
}
}
}
if (!foundkey) {
treeKey->append();
treeKey->setLocalName(tok);
treeKey->save();
}
}
else {
treeKey->appendChild();
treeKey->setLocalName(tok);
treeKey->save();
}
#ifdef DEBUG
// std::cout << treeKey->getLocalName() << " : " << tok << std::endl;
#endif
tok = strtok(NULL, "/");
}
}
int readline(FILE* infile, char* linebuffer) {
signed char c;
char* lbPtr = linebuffer;
while ((c = fgetc(infile)) != EOF) {
*lbPtr++ = c;
if (c == 10) {
*lbPtr = 0;
return (lbPtr-linebuffer);
}
}
return 0;
}
enum XML_FORMATS { F_AUTODETECT, F_OSIS, F_THML };
#define HELPTEXT "xml2gbs 1.0 OSIS/ThML General Book module creation tool for the SWORD Project\n usage:\n xml2gbs [-l] [-i] [-fT|-fO] <filename> [modname]\n -l uses long div names in ThML files\n -i exports to IMP format instead of creating a module\n -fO and -fT will set the importer to expect OSIS or ThML format respectively\n (otherwise it attempts to autodetect)\n"
unsigned char detectFormat(char* filename, char* entbuffer) {
unsigned char format = F_AUTODETECT;
FILE *infile;
infile = fopen(filename, "r");
if (!infile) {
fprintf(stderr, HELPTEXT);
fprintf(stderr, "\n\nCould not open file \"%s\"\n", filename);
}
else {
while (readline(infile, entbuffer) && format == F_AUTODETECT) {
if (strstr(entbuffer, "<osis")) {
format = F_OSIS;
}
else if (strstr(entbuffer, "<ThML")) {
format = F_THML;
}
}
fclose(infile);
}
return format;
}
int getTag(FILE* file, char* keybuffer) {
char c;
char* kbPtr = keybuffer;
while ((c = fgetc(file)) != '>')
*kbPtr++ = c;
*kbPtr++ = c;
*kbPtr = 0;
return (kbPtr-keybuffer);
}
int processXML(char* filename, char* modname, bool longnames, bool exportfile, unsigned char format, char* entbuffer) {
signed long i = 0;
char* strtmp;
#ifdef DEBUG
printf ("%s :%s :%d :%d :%d\n\n", filename, modname, longnames, exportfile, format);
#endif
FILE *infile;
infile = fopen(filename, "r");
if (!infile) {
fprintf(stderr, HELPTEXT);
fprintf(stderr, "\n\nCould not open file \"%s\"\n", filename);
return -1;
}
FILE *outfile;
if (exportfile) {
strcat (modname, ".imp");
outfile = fopen(modname, "w");
}
TreeKeyIdx * treeKey;
RawGenBook * book;
std::string divs[32];
int level = 0;
char* keybuffer = new char[2048];
char* keybuffer2 = new char[2048];
char* n = new char[256];
char* type = new char[256];
char* title= new char[512];
unsigned long entrysize = 0;
unsigned long keysize = 0;
bool closer = false;
if (!exportfile) {
// Do some initialization stuff
TreeKeyIdx::create(modname);
treeKey = new TreeKeyIdx(modname);
RawGenBook::createModule(modname);
delete treeKey;
book = new RawGenBook(modname);
treeKey = ((TreeKeyIdx *)((SWKey *)(*book)));
}
#ifdef DEBUG
// TreeKeyIdx root = *((TreeKeyIdx *)((SWKey *)(*book)));
#endif
int c;
while ((c = fgetc(infile)) != EOF) {
if (c == '<') {
if (getTag(infile, keybuffer)) {
if ((format == F_OSIS) && ((!strcmp(keybuffer, "/div>")) || (!strcmp(keybuffer, "/verse>"))) ||
((format == F_THML) && ((!strncmp(keybuffer, "/div", 4)) && (keybuffer[4] > '0' && keybuffer[4] < '7')))) {
if (!closer) {
keysize = 0;
keybuffer2[0] = 0;
for (i = 0; i < level; i++) {
keybuffer2[keysize] = '/';
keysize++;
keybuffer2[keysize] = 0;
strcat (keybuffer2, divs[i].c_str());
keysize += divs[i].length();
}
if (level) {
printf ("%s\n", keybuffer2);
if (exportfile) {
fprintf (outfile, "$$$%s\n%s\n", keybuffer2, entbuffer);
}
else {
treeKey->root();
setkey(treeKey, keybuffer2);
book->setEntry(entbuffer, entrysize); // save text to module at current position
}
}
}
level--;
entbuffer[0] = 0;
entrysize = 0;
closer = true;
}
else if ((format == F_OSIS) && !((!strcmp(keybuffer, "div>") || !strncmp(keybuffer, "div ", 4)) || (!strcmp(keybuffer, "verse>") || !strncmp(keybuffer, "verse ", 6))) ||
((format == F_THML) && !((!strncmp(keybuffer, "div", 3)) && (keybuffer[3] > '0' && keybuffer[3] < '7')))) {
entbuffer[entrysize++] = '<';
for (i = 0; i <= strlen(keybuffer); i++) {
entbuffer[entrysize++] = keybuffer[i];
}
entrysize--;
}
else {
//we have a divN...
if (!closer) {
keysize = 0;
keybuffer2[0] = 0;
for (i = 0; i < level; i++) {
keybuffer2[keysize] = '/';
keysize++;
keybuffer2[keysize] = 0;
strcat (keybuffer2, divs[i].c_str());
keysize += divs[i].length();
}
if (level) {
printf ("%s\n", keybuffer2);
if (exportfile) {
fprintf (outfile, "$$$%s\n%s\n", keybuffer2, entbuffer);
}
else {
treeKey->root();
setkey(treeKey, keybuffer2);
book->setEntry(entbuffer, entrysize); // save text to module at current position
}
}
}
entbuffer[0] = 0;
entrysize = 0;
level++;
keysize = strlen(keybuffer)-1;
/* keysize = 0;
while ((c = fgetc(infile)) != EOF) {
if (c != '>') {
keybuffer[keysize] = c;
keysize++;
}
else {
break;
}
}
keybuffer[keysize] = 0;*/
type[0] = 0;
n[0] = 0;
title[0] = 0;
if (format == F_OSIS && longnames == false) {
strtmp = strstr(keybuffer, "osisID=\"");
if (strtmp) {
strtmp += 8;
i = 0;
for (;*strtmp != '\"'; strtmp++) {
if (*strtmp == 10) {
title[i] = ' ';
i++;
}
else if (*strtmp == '.') {
i = 0;
}
else if (*strtmp != 13) {
title[i] = *strtmp;
i++;
}
}
title[i] = 0;
}
strcpy (keybuffer, title);
}
else {
strtmp = strstr(keybuffer, "type=\"");
if (strtmp) {
strtmp += 6;
i = 0;
for (;*strtmp != '\"'; strtmp++) {
if (*strtmp == 10) {
type[i] = ' ';
i++;
}
else if (*strtmp != 13) {
type[i] = *strtmp;
i++;
}
}
type[i] = 0;
}
strtmp = strstr(keybuffer, "n=\"");
if (strtmp) {
strtmp += 3;
i = 0;
for (;*strtmp != '\"'; strtmp++) {
if (*strtmp == 10) {
n[i] = ' ';
i++;
}
else if (*strtmp != 13) {
n[i] = *strtmp;
i++;
}
}
n[i] = 0;
}
if (format == F_OSIS) {
strtmp = strstr(keybuffer, "title=\"");
if (strtmp) {
strtmp += 7;
i = 0;
for (;*strtmp != '\"'; strtmp++) {
if (*strtmp == 10) {
title[i] = ' ';
i++;
}
else if (*strtmp != 13) {
title[i] = *strtmp;
i++;
}
}
title[i] = 0;
}
}
else if (format == F_THML) {
strtmp = strstr(keybuffer, "title=\"");
if (strtmp) {
strtmp += 7;
i = 0;
for (;*strtmp != '\"'; strtmp++) {
if (*strtmp == 10) {
title[i] = ' ';
i++;
}
else if (*strtmp != 13) {
title[i] = *strtmp;
i++;
}
}
title[i] = 0;
}
}
strcpy (keybuffer, type);
if (strlen(keybuffer) && strlen(n))
strcat (keybuffer, " ");
strcat (keybuffer, n);
if (longnames && strlen(keybuffer))
strcat (keybuffer, ": ");
if (longnames || !strlen(keybuffer))
strcat (keybuffer, title);
}
divs[level-1] = keybuffer;
closer = false;
}
}
}
else if (c != 13) {
entbuffer[entrysize] = c;
entrysize++;
entbuffer[entrysize] = 0;
}
}
#ifdef DEBUG
// printTree(root, treeKey);
#endif
// delete book; //causes nasty-bad errors upon execution
delete n;
delete type;
delete title;
delete keybuffer;
}
int main(int argc, char **argv) {
unsigned long i = 0;
char modname[256];
*modname = 0;
char filename[256];
*filename = 0;
bool longnames = false;
bool exportfile = false;
unsigned char format = F_AUTODETECT;
if (argc > 2) {
for (i = 1; i < argc; i++) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 'l':
longnames = true;
continue;
case 'i':
exportfile = true;
continue;
case 'f':
if (argv[i][2] == 'O') {
format = F_OSIS;
}
else if (argv[i][2] == 'T') {
format = F_OSIS;
}
else {
format = F_AUTODETECT;
}
continue;
}
}
else if (*filename == 0) {
strcpy (filename, argv[i]);
}
else if (*modname == 0) {
strcpy (modname, argv[i]);
}
}
}
else if (argc > 1) {
strcpy (filename, argv[1]);
}
if (!*filename) {
fprintf(stderr, HELPTEXT);
return -1;
}
else {
if (!*modname) {
for (i = 0; (i < 256) && (filename[i]) && (filename[i] != '.'); i++) {
modname[i] = filename[i];
}
modname[i] = 0;
}
char* entbuffer = new char[1048576];
format = (format == F_AUTODETECT) ? detectFormat(filename, entbuffer) : format;
if (format == F_AUTODETECT) {
fprintf(stderr, HELPTEXT);
fprintf(stderr, "\n\nCould not detect file format for file \"%s\", please specify.\n", filename);
return -1;
}
int retCode = processXML (filename, modname, longnames, exportfile, format, entbuffer);
delete entbuffer;
return retCode;
}
}