[sword-svn] r167 - in trunk/utils: . flash2mongo


Sat Feb 5 22:00:43 MST 2011


Author: 
Date: 2011-02-05 22:00:43 -0700 (Sat, 05 Feb 2011)
New Revision: 167

Added:
   trunk/utils/flash2mongo/
   trunk/utils/flash2mongo/flash2mongo.php
Log:
added utility to insert flash files into a mongo db

Added: trunk/utils/flash2mongo/flash2mongo.php
===================================================================
--- trunk/utils/flash2mongo/flash2mongo.php	                        (rev 0)
+++ trunk/utils/flash2mongo/flash2mongo.php	2011-02-06 05:00:43 UTC (rev 167)
@@ -0,0 +1,118 @@
+<?php
+
+/**
+ * This script processes .flash files into a mongo collection
+ * Usage:
+ * php flash2mongo.php [source flash dir] [mongo host] [mongo db] [ mongo collection]
+ * 
+ * mongodb://sworduser:swordpass@dbh04.mongolab.com:27047/sword
+ */
+function utf8_to_unicode_code($utf8_string) {
+    $expanded = iconv("UTF-8", "UTF-32", $utf8_string);
+    return unpack("L*", $expanded);
+}
+
+function unicode_code_to_utf8($unicode_list) {
+    $result = "";
+    foreach ($unicode_list as $key => $value) {
+        $one_character = pack("L", $value);
+        $result .= iconv("UTF-32", "UTF-8", $one_character);
+    }
+    return $result;
+}
+
+function convertUnicode($str) {
+    $s = preg_replace('/\\\u(.{4})/i', '&#x$1;', $str);
+    return html_entity_decode($s, ENT_NOQUOTES, 'UTF-8');
+}
+
+if ($argc < 3) {
+    die("Need to supply directory of flash cards");
+}
+
+$lessonDir			= $argv[1];
+$mongo_host			= $argv[2];
+$mongo_db			= $argv[3];
+
+if (!is_dir($lessonDir)) {
+    die("Directory [" . $lessonDir . "] is invalid");
+}
+
+echo "Source directory: [" . $lessonDir . "]" . PHP_EOL;
+echo "Mongo host: [" . $mongo_host . "]" . PHP_EOL;
+echo "Mongo db: [" . $mongo_db . "]" . PHP_EOL;
+
+$dirArray = array();
+
+$myDirectory = opendir($argv[1]);
+
+$mongo 		= new Mongo($mongo_host);
+$db    		= $mongo->selectDB($mongo_db);
+$lessonsets = $db->selectCollection('lessonsets');
+$cards = $db->selectCollection('cards');
+
+while ($entryName = readdir($myDirectory)) {
+    if (!preg_match('/^\./', $entryName) && is_dir($lessonDir . '/' . $entryName)) {
+        $dirArray[] = $entryName;
+
+        $lessonsets->insert(array("lessonset"=>$entryName));
+    }
+}
+
+$dbh = null;
+closedir($myDirectory);
+
+echo "Found " . count($dirArray) . " lessonsets: " . json_encode($dirArray) . PHP_EOL;
+
+foreach($dirArray as $dir) {
+    $pdata = array();
+
+    $lessonSetDir = $dir;
+
+    $lessons = array();
+    $myDirectory = opendir($lessonDir . '/' . $lessonSetDir);
+
+    $lid = 1;
+    $totwords = 0;
+    while ($cardfile = readdir($myDirectory)) {
+        if (!preg_match('/^\./', $entryName) && is_file($lessonDir . '/' . $lessonSetDir . '/' . $cardfile)) {
+            $lessons[] = $cardfile;
+            $lesson_data = @parse_ini_file($lessonDir . '/' . $lessonSetDir . '/' . $cardfile, false, INI_SCANNER_RAW);
+
+            if (!is_array($lesson_data)) {
+                echo $lessonDir . '/' . $lessonSetDir . '/' . $cardfile . ' failed to parse' . PHP_EOL;
+                continue;
+            }
+
+            $lesson = preg_replace('/\.flash/i', '', $cardfile);
+
+            if(!array_key_exists('lessonFont', $lesson_data)) {
+                echo $lessonDir . '/' . $lessonSetDir . '/' . $cardfile . ' has no font specified ' . PHP_EOL;
+            }
+
+            $cardCount = $lesson_data['wordCount'];
+            
+            $lessonfont = array_key_exists('lessonFont', $lesson_data) ? $lesson_data['lessonFont'] : "GalSILB201";
+            
+            $totwords += $cardCount;
+            for ($i = 0; $i < $cardCount; $i++) {
+                $front = convertUnicode($lesson_data['word' . $i]);
+                $back = $lesson_data['answers' . $i];
+				$cards->insert(array(
+					"lessonset"=>$lessonSetDir,
+					"lesson"=>$lesson,
+					"font"=>$lessonfont,
+					"front"=>$front,
+					"back"=>$back
+				));
+            }
+
+            $lid++;
+            echo $lessonDir . '/' . $lessonSetDir . '/' . $cardfile . ': processed ' . $cardCount . ' words' . PHP_EOL;
+        }
+    }
+
+    echo 'processed ' . $lid . ' lessons and ' . $totwords . ' words' . PHP_EOL;
+
+    closedir($myDirectory);
+}




More information about the sword-cvs mailing list