Updated for JSHint

This commit is contained in:
andris9 2011-06-13 22:04:21 +03:00
parent ea226be4c4
commit 44df08632e
2 changed files with 255 additions and 243 deletions

3
README.md Normal file
View File

@ -0,0 +1,3 @@
#epub
**epub** is a node.js module to parse EPUB electronic book files.

333
epub.js
View File

@ -3,11 +3,7 @@ var ZipFile = require("zipfile").ZipFile,
utillib = require("util"), utillib = require("util"),
EventEmitter = require('events').EventEmitter; EventEmitter = require('events').EventEmitter;
//TODO: Cache parsed data
//TODO: Cache parsed data to DB
// export
module.exports = EPub;
/** /**
* new EPub(fname[, imageroot][, linkroot]) * new EPub(fname[, imageroot][, linkroot])
@ -18,10 +14,10 @@ module.exports = EPub;
* Creates an Event Emitter type object for parsing epub files * Creates an Event Emitter type object for parsing epub files
* *
* var epub = new EPub("book.epub"); * var epub = new EPub("book.epub");
* epub.on("end", function(){ * epub.on("end", function () {
* console.log(epub.spine); * console.log(epub.spine);
* }); * });
* epub.on("error", function(error){ ... }); * epub.on("error", function (error) { ... });
* epub.parse(); * epub.parse();
* *
* Image and link URL format is: * Image and link URL format is:
@ -34,15 +30,19 @@ module.exports = EPub;
* *
* /images/logo_img/OPT/logo.jpg * /images/logo_img/OPT/logo.jpg
**/ **/
function EPub(fname, imageroot, linkroot){ function EPub(fname, imageroot, linkroot) {
EventEmitter.call(this); EventEmitter.call(this);
this.filename = fname; this.filename = fname;
this.imageroot = (imageroot || "/images/").trim(); this.imageroot = (imageroot || "/images/").trim();
this.linkroot = (linkroot || "/links/").trim(); this.linkroot = (linkroot || "/links/").trim();
if(this.imageroot.substr(-1)!="/")this.imageroot+="/"; if (this.imageroot.substr(-1) != "/") {
if(this.linkroot.substr(-1)!="/")this.linkroot+="/"; this.imageroot += "/";
}
if (this.linkroot.substr(-1) != "/") {
this.linkroot += "/";
}
} }
utillib.inherits(EPub, EventEmitter); utillib.inherits(EPub, EventEmitter);
@ -51,7 +51,7 @@ utillib.inherits(EPub, EventEmitter);
* *
* Starts the parser, needs to be called by the script * Starts the parser, needs to be called by the script
**/ **/
EPub.prototype.parse = function(){ EPub.prototype.parse = function () {
this.containerFile = false; this.containerFile = false;
this.mimeFile = false; this.mimeFile = false;
@ -59,11 +59,11 @@ EPub.prototype.parse = function(){
this.metadata = {}; this.metadata = {};
this.manifest = {}; this.manifest = {};
this.spine = {toc: false, contents:[]}; this.spine = {toc: false, contents: []};
this.toc = []; this.toc = [];
this.open(); this.open();
} };
/** /**
* EPub#open() -> undefined * EPub#open() -> undefined
@ -71,21 +71,21 @@ EPub.prototype.parse = function(){
* Opens the epub file with Zip unpacker, retrieves file listing * Opens the epub file with Zip unpacker, retrieves file listing
* and runs mime type check * and runs mime type check
**/ **/
EPub.prototype.open = function(){ EPub.prototype.open = function () {
try{ try {
this.zip = new ZipFile(this.filename); this.zip = new ZipFile(this.filename);
}catch(E){ } catch (E) {
this.emit("error", new Error("Invalid/missing file")); this.emit("error", new Error("Invalid/missing file"));
return; return;
} }
if(!this.zip.names || !this.zip.names.length){ if (!this.zip.names || !this.zip.names.length) {
this.emit("error", new Error("No files in archive")); this.emit("error", new Error("No files in archive"));
return; return;
} }
this.checkMimeType(); this.checkMimeType();
} };
/** /**
* EPub#checkMimeType() -> undefined * EPub#checkMimeType() -> undefined
@ -93,32 +93,34 @@ EPub.prototype.open = function(){
* Checks if there's a file called "mimetype" and that it's contents * Checks if there's a file called "mimetype" and that it's contents
* are "application/epub+zip". On success runs root file check. * are "application/epub+zip". On success runs root file check.
**/ **/
EPub.prototype.checkMimeType = function(){ EPub.prototype.checkMimeType = function () {
for(var i=0, len = this.zip.names.length; i<len; i++){ var i, len;
if(this.zip.names[i].toLowerCase() == "mimetype"){
for (i = 0, len = this.zip.names.length; i < len; i++) {
if (this.zip.names[i].toLowerCase() == "mimetype") {
this.mimeFile = this.zip.names[i]; this.mimeFile = this.zip.names[i];
break; break;
} }
} }
if(!this.mimeFile){ if (!this.mimeFile) {
this.emit("error", new Error("No mimetype file in archive")); this.emit("error", new Error("No mimetype file in archive"));
return; return;
} }
this.zip.readFile(this.mimeFile, (function(err, data){ this.zip.readFile(this.mimeFile, (function (err, data) {
if(err){ if (err) {
this.emit("error", new Error("Reading archive failed")); this.emit("error", new Error("Reading archive failed"));
return; return;
} }
var txt = data.toString("utf-8").toLowerCase().trim(); var txt = data.toString("utf-8").toLowerCase().trim();
if(txt != "application/epub+zip"){ if (txt != "application/epub+zip") {
this.emit("error", new Error("Unsupported mime type")); this.emit("error", new Error("Unsupported mime type"));
return; return;
} }
this.getRootFiles(); this.getRootFiles();
}).bind(this)); }).bind(this));
} };
/** /**
* EPub#getRootFiles() -> undefined * EPub#getRootFiles() -> undefined
@ -127,69 +129,70 @@ EPub.prototype.checkMimeType = function(){
* rootfile element with mime type "application/oebps-package+xml". * rootfile element with mime type "application/oebps-package+xml".
* On success calls the rootfile parser * On success calls the rootfile parser
**/ **/
EPub.prototype.getRootFiles = function(){ EPub.prototype.getRootFiles = function () {
for(var i=0, len = this.zip.names.length; i<len; i++){ var i, len;
if(this.zip.names[i].toLowerCase() == "meta-inf/container.xml"){ for (i = 0, len = this.zip.names.length; i < len; i++) {
if (this.zip.names[i].toLowerCase() == "meta-inf/container.xml") {
this.containerFile = this.zip.names[i]; this.containerFile = this.zip.names[i];
break; break;
} }
} }
if(!this.containerFile){ if (!this.containerFile) {
this.emit("error", new Error("No container file in archive")); this.emit("error", new Error("No container file in archive"));
return; return;
} }
this.zip.readFile(this.containerFile, (function(err, data){ this.zip.readFile(this.containerFile, (function (err, data) {
if(err){ if (err) {
this.emit("error", new Error("Reading archive failed")); this.emit("error", new Error("Reading archive failed"));
return; return;
} }
var xml = data.toString("utf-8").toLowerCase().trim(), var xml = data.toString("utf-8").toLowerCase().trim(),
xmlparser = new XML2JS(); xmlparser = new XML2JS();
xmlparser.on("end", (function(result){ xmlparser.on("end", (function (result) {
if(!result.rootfiles || !result.rootfiles.rootfile){ if (!result.rootfiles || !result.rootfiles.rootfile) {
this.emit("error", new Error("No rootfiles found")); this.emit("error", new Error("No rootfiles found"));
return; return;
} }
var rootfile = result.rootfiles.rootfile, var rootfile = result.rootfiles.rootfile,
filename = false; filename = false, i, len;
if(Array.isArray(rootfile)){ if (Array.isArray(rootfile)) {
for(var i=0, len = rootfile.length; i<len; i++){ for (i = 0, len = rootfile.length; i < len; i++) {
if(rootfile[i]["@"]["media-type"] && if (rootfile[i]["@"]["media-type"] &&
rootfile[i]["@"]["media-type"] == "application/oebps-package+xml" && rootfile[i]["@"]["media-type"] == "application/oebps-package+xml" &&
rootfile[i]["@"]["full-path"]){ rootfile[i]["@"]["full-path"]) {
filename = rootfile[i]["@"]["full-path"].toLowerCase().trim(); filename = rootfile[i]["@"]["full-path"].toLowerCase().trim();
break; break;
} }
} }
}else if(rootfile["@"]){ } else if (rootfile["@"]) {
if(rootfile["@"]["media-type"] != "application/oebps-package+xml" || !rootfile["@"]["full-path"]){ if (rootfile["@"]["media-type"] != "application/oebps-package+xml" || !rootfile["@"]["full-path"]) {
this.emit("error", new Error("Rootfile in unknown format")); this.emit("error", new Error("Rootfile in unknown format"));
return; return;
} }
filename = rootfile["@"]["full-path"].toLowerCase().trim(); filename = rootfile["@"]["full-path"].toLowerCase().trim();
} }
if(!filename){ if (!filename) {
this.emit("error", new Error("Empty rootfile")); this.emit("error", new Error("Empty rootfile"));
return; return;
} }
for(var i=0, len = this.zip.names.length; i<len; i++){ for (i = 0, len = this.zip.names.length; i < len; i++) {
if(this.zip.names[i].toLowerCase() == filename){ if (this.zip.names[i].toLowerCase() == filename) {
this.rootFile = this.zip.names[i]; this.rootFile = this.zip.names[i];
break; break;
} }
} }
if(!this.rootFile){ if (!this.rootFile) {
this.emit("error", new Error("Rootfile not found from archive")); this.emit("error", new Error("Rootfile not found from archive"));
return; return;
} }
@ -198,7 +201,7 @@ EPub.prototype.getRootFiles = function(){
}).bind(this)); }).bind(this));
xmlparser.on("error", (function(err){ xmlparser.on("error", (function (err) {
this.emit("error", new Error("Parsing container XML failed")); this.emit("error", new Error("Parsing container XML failed"));
return; return;
}).bind(this)); }).bind(this));
@ -207,17 +210,17 @@ EPub.prototype.getRootFiles = function(){
}).bind(this)); }).bind(this));
} };
/** /**
* EPub#handleRootFile() -> undefined * EPub#handleRootFile() -> undefined
* *
* Parser the rootfile XML and calls rootfile parser * Parser the rootfile XML and calls rootfile parser
**/ **/
EPub.prototype.handleRootFile = function(){ EPub.prototype.handleRootFile = function () {
this.zip.readFile(this.rootFile, (function(err, data){ this.zip.readFile(this.rootFile, (function (err, data) {
if(err){ if (err) {
this.emit("error", new Error("Reading archive failed")); this.emit("error", new Error("Reading archive failed"));
return; return;
} }
@ -226,7 +229,7 @@ EPub.prototype.handleRootFile = function(){
xmlparser.on("end", this.parseRootFile.bind(this)); xmlparser.on("end", this.parseRootFile.bind(this));
xmlparser.on("error", (function(err){ xmlparser.on("error", (function (err) {
this.emit("error", new Error("Parsing container XML failed")); this.emit("error", new Error("Parsing container XML failed"));
return; return;
}).bind(this)); }).bind(this));
@ -234,7 +237,7 @@ EPub.prototype.handleRootFile = function(){
xmlparser.parseString(xml); xmlparser.parseString(xml);
}).bind(this)); }).bind(this));
} };
/** /**
* EPub#parseRootFile() -> undefined * EPub#parseRootFile() -> undefined
@ -242,15 +245,15 @@ EPub.prototype.handleRootFile = function(){
* Parses elements "metadata," "manifest," "spine" and TOC. * Parses elements "metadata," "manifest," "spine" and TOC.
* Emits "end" if no TOC * Emits "end" if no TOC
**/ **/
EPub.prototype.parseRootFile = function(rootfile){ EPub.prototype.parseRootFile = function (rootfile) {
var keys, keyparts, key; var i, len, keys, keyparts, key;
keys = Object.keys(rootfile); keys = Object.keys(rootfile);
for(var i=0, len = keys.length; i<len; i++){ for (i = 0, len = keys.length; i < len; i++) {
keyparts = keys[i].split(":"); keyparts = keys[i].split(":");
key = (keyparts.pop() || "").toLowerCase().trim(); key = (keyparts.pop() || "").toLowerCase().trim();
switch(key){ switch (key) {
case "metadata": case "metadata":
this.parseMetadata(rootfile[keys[i]]); this.parseMetadata(rootfile[keys[i]]);
break; break;
@ -266,88 +269,89 @@ EPub.prototype.parseRootFile = function(rootfile){
} }
} }
if(this.spine.toc){ if (this.spine.toc) {
this.parseTOC(); this.parseTOC();
}else } else {
this.emit("end"); this.emit("end");
} }
};
/** /**
* EPub#parseMetadata() -> undefined * EPub#parseMetadata() -> undefined
* *
* Parses "metadata" block (book metadata, title, author etc.) * Parses "metadata" block (book metadata, title, author etc.)
**/ **/
EPub.prototype.parseMetadata = function(metadata){ EPub.prototype.parseMetadata = function (metadata) {
var keys, keyparts, key; var i, j, len, keys, keyparts, key;
keys = Object.keys(metadata); keys = Object.keys(metadata);
for(var i=0, len = keys.length; i<len; i++){ for (i = 0, len = keys.length; i < len; i++) {
keyparts = keys[i].split(":"); keyparts = keys[i].split(":");
key = (keyparts.pop() || "").toLowerCase().trim(); key = (keyparts.pop() || "").toLowerCase().trim();
switch(key){ switch (key) {
case "publisher": case "publisher":
if(Array.isArray(metadata[keys[i]])){ if (Array.isArray(metadata[keys[i]])) {
this.metadata.publisher = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); this.metadata.publisher = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
}else{ } else {
this.metadata.publisher = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); this.metadata.publisher = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
} }
break; break;
case "language": case "language":
if(Array.isArray(metadata[keys[i]])){ if (Array.isArray(metadata[keys[i]])) {
this.metadata.language = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").toLowerCase().trim(); this.metadata.language = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").toLowerCase().trim();
}else{ } else {
this.metadata.language = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").toLowerCase().trim(); this.metadata.language = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").toLowerCase().trim();
} }
break; break;
case "title": case "title":
if(Array.isArray(metadata[keys[i]])){ if (Array.isArray(metadata[keys[i]])) {
this.metadata.title = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); this.metadata.title = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
}else{ } else {
this.metadata.title = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); this.metadata.title = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
} }
break; break;
case "subject": case "subject":
if(Array.isArray(metadata[keys[i]])){ if (Array.isArray(metadata[keys[i]])) {
this.metadata.subject = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); this.metadata.subject = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
}else{ } else {
this.metadata.subject = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); this.metadata.subject = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
} }
break; break;
case "description": case "description":
if(Array.isArray(metadata[keys[i]])){ if (Array.isArray(metadata[keys[i]])) {
this.metadata.description = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); this.metadata.description = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
}else{ } else {
this.metadata.description = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); this.metadata.description = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
} }
break; break;
case "creator": case "creator":
if(Array.isArray(metadata[keys[i]])){ if (Array.isArray(metadata[keys[i]])) {
this.metadata.creator = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); this.metadata.creator = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
this.metadata.creatorFileAs = String(metadata[keys[i]][0] && metadata[keys[i]][0]['@'] && metadata[keys[i]][0]['@']["opf:file-as"] || this.metadata.creator).trim(); this.metadata.creatorFileAs = String(metadata[keys[i]][0] && metadata[keys[i]][0]['@'] && metadata[keys[i]][0]['@']["opf:file-as"] || this.metadata.creator).trim();
}else{ } else {
this.metadata.creator = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); this.metadata.creator = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
this.metadata.creatorFileAs = String(metadata[keys[i]]['@'] && metadata[keys[i]]['@']["opf:file-as"] || this.metadata.creator).trim(); this.metadata.creatorFileAs = String(metadata[keys[i]]['@'] && metadata[keys[i]]['@']["opf:file-as"] || this.metadata.creator).trim();
} }
break; break;
case "date": case "date":
if(Array.isArray(metadata[keys[i]])){ if (Array.isArray(metadata[keys[i]])) {
this.metadata.date = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); this.metadata.date = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
}else{ } else {
this.metadata.date = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); this.metadata.date = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
} }
break; break;
case "identifier": case "identifier":
if(metadata[keys[i]]["@"] && metadata[keys[i]]["@"]["opf:scheme"]=="ISBN"){ if (metadata[keys[i]]["@"] && metadata[keys[i]]["@"]["opf:scheme"] == "ISBN") {
this.metadata.ISBN = String(metadata[keys[i]]["#"] || "").trim(); this.metadata.ISBN = String(metadata[keys[i]]["#"] || "").trim();
}else if(metadata[keys[i]]["@"] && metadata[keys[i]]["@"]["id"] && metadata[keys[i]]["@"]["id"].match(/uuid/i)){ } else if (metadata[keys[i]]["@"] && metadata[keys[i]]["@"].id && metadata[keys[i]]["@"].id.match(/uuid/i)) {
this.metadata.UUID = String(metadata[keys[i]]["#"] || "").replace('urn:uuid:','').toUpperCase().trim(); this.metadata.UUID = String(metadata[keys[i]]["#"] || "").replace('urn:uuid:', '').toUpperCase().trim();
}else if(Array.isArray(metadata[keys[i]])){ } else if (Array.isArray(metadata[keys[i]])) {
for(var j=0; j<metadata[keys[i]].length; j++){ for (j = 0; j < metadata[keys[i]].length; j++) {
if(metadata[keys[i]][j]["@"]){ if (metadata[keys[i]][j]["@"]) {
if(metadata[keys[i]][j]["@"]["opf:scheme"]=="ISBN"){ if (metadata[keys[i]][j]["@"]["opf:scheme"] == "ISBN") {
this.metadata.ISBN = String(metadata[keys[i]][j]["#"] || "").trim(); this.metadata.ISBN = String(metadata[keys[i]][j]["#"] || "").trim();
}else if(metadata[keys[i]][j]["@"]["id"] && metadata[keys[i]][j]["@"]["id"].match(/uuid/i)){ } else if (metadata[keys[i]][j]["@"].id && metadata[keys[i]][j]["@"].id.match(/uuid/i)) {
this.metadata.UUID = String(metadata[keys[i]][j]["#"] || "").replace('urn:uuid:','').toUpperCase().trim(); this.metadata.UUID = String(metadata[keys[i]][j]["#"] || "").replace('urn:uuid:', '').toUpperCase().trim();
} }
} }
} }
@ -355,24 +359,24 @@ EPub.prototype.parseMetadata = function(metadata){
break; break;
} }
} }
} };
/** /**
* EPub#parseManifest() -> undefined * EPub#parseManifest() -> undefined
* *
* Parses "manifest" block (all items included, html files, images, styles) * Parses "manifest" block (all items included, html files, images, styles)
**/ **/
EPub.prototype.parseManifest = function(manifest){ EPub.prototype.parseManifest = function (manifest) {
var path = this.rootFile.split("/"), element, path_str; var i, len, path = this.rootFile.split("/"), element, path_str;
path.pop(); path.pop();
path_str = path.join("/"); path_str = path.join("/");
if(manifest.item){ if (manifest.item) {
for(var i=0, len = manifest.item.length; i<len; i++){ for (i = 0, len = manifest.item.length; i < len; i++) {
if(manifest.item[i]['@']){ if (manifest.item[i]['@']) {
element = manifest.item[i]['@']; element = manifest.item[i]['@'];
if(element.href && element.href.substr(0, path_str.length) != path_str){ if (element.href && element.href.substr(0, path_str.length) != path_str) {
element.href = path.concat([element.href]).join("/"); element.href = path.concat([element.href]).join("/");
} }
@ -381,63 +385,63 @@ EPub.prototype.parseManifest = function(manifest){
} }
} }
} }
} };
/** /**
* EPub#parseSpine() -> undefined * EPub#parseSpine() -> undefined
* *
* Parses "spine" block (all html elements that are shown to the reader) * Parses "spine" block (all html elements that are shown to the reader)
**/ **/
EPub.prototype.parseSpine = function(spine){ EPub.prototype.parseSpine = function (spine) {
var path = this.rootFile.split("/"), element, path_s var i, len, path = this.rootFile.split("/"), element;
path.pop(); path.pop();
if(spine['@'] && spine['@'].toc){ if (spine['@'] && spine['@'].toc) {
this.spine.toc = this.manifest[spine['@'].toc] || false; this.spine.toc = this.manifest[spine['@'].toc] || false;
} }
if(spine.itemref){ if (spine.itemref) {
for(var i=0, len = spine.itemref.length; i<len; i++){ for (i = 0, len = spine.itemref.length; i < len; i++) {
if(spine.itemref[i]['@']){ if (spine.itemref[i]['@']) {
if(element = this.manifest[spine.itemref[i]['@'].idref]){ if (element = this.manifest[spine.itemref[i]['@'].idref]) {
this.spine.contents.push(element); this.spine.contents.push(element);
} }
} }
} }
} }
} };
/** /**
* EPub#parseTOC() -> undefined * EPub#parseTOC() -> undefined
* *
* Parses ncx file for table of contents (title, html file) * Parses ncx file for table of contents (title, html file)
**/ **/
EPub.prototype.parseTOC = function(){ EPub.prototype.parseTOC = function () {
var path = this.spine.toc.href.split("/"), id_list = {}, keys; var i, len, path = this.spine.toc.href.split("/"), id_list = {}, keys;
path.pop(); path.pop();
keys = Object.keys(this.manifest); keys = Object.keys(this.manifest);
for(var i=0, len = keys.length; i<len; i++){ for (i = 0, len = keys.length; i < len; i++) {
id_list[this.manifest[keys[i]].href] = keys[i]; id_list[this.manifest[keys[i]].href] = keys[i];
} }
this.zip.readFile(this.spine.toc.href, (function(err, data){ this.zip.readFile(this.spine.toc.href, (function (err, data) {
if(err){ if (err) {
this.emit("error", new Error("Reading archive failed")); this.emit("error", new Error("Reading archive failed"));
return; return;
} }
var xml = data.toString("utf-8"), var xml = data.toString("utf-8"),
xmlparser = new XML2JS(); xmlparser = new XML2JS();
xmlparser.on("end", (function(result){ xmlparser.on("end", (function (result) {
if(result.navMap && result.navMap.navPoint){ if (result.navMap && result.navMap.navPoint) {
this.toc = this.walkNavMap(result.navMap.navPoint, path, id_list); this.toc = this.walkNavMap(result.navMap.navPoint, path, id_list);
} }
this.emit("end"); this.emit("end");
}).bind(this)); }).bind(this));
xmlparser.on("error", (function(err){ xmlparser.on("error", (function (err) {
this.emit("error", new Error("Parsing container XML failed")); this.emit("error", new Error("Parsing container XML failed"));
return; return;
}).bind(this)); }).bind(this));
@ -445,7 +449,7 @@ EPub.prototype.parseTOC = function(){
xmlparser.parseString(xml); xmlparser.parseString(xml);
}).bind(this)); }).bind(this));
} };
/** /**
* EPub#walkNavMap(branch, path, id_list,[, level]) -> Array * EPub#walkNavMap(branch, path, id_list,[, level]) -> Array
@ -457,42 +461,44 @@ EPub.prototype.parseTOC = function(){
* Walks the NavMap object through all levels and finds elements * Walks the NavMap object through all levels and finds elements
* for TOC * for TOC
**/ **/
EPub.prototype.walkNavMap = function(branch, path, id_list, level){ EPub.prototype.walkNavMap = function (branch, path, id_list, level) {
level = level || 0; level = level || 0;
// don't go too far // don't go too far
if(level>7)return []; if (level > 7) {
return [];
}
var output = [], element, id, title, order, href; var i, len, output = [], element, title, order, href;
if(!Array.isArray(branch)){ if (!Array.isArray(branch)) {
branch = [branch]; branch = [branch];
} }
for(var i=0, len = branch.length; i<len; i++){ for (i = 0, len = branch.length; i < len; i++) {
if(branch[i]["navLabel"]){ if (branch[i].navLabel) {
title = (branch[i]["navLabel"] && branch[i]["navLabel"].text || branch[i]["navLabel"] || "").trim(); title = (branch[i].navLabel && branch[i].navLabel.text || branch[i].navLabel || "").trim();
order = Number(branch[i]["@"] && branch[i]["@"].playOrder || 0); order = Number(branch[i]["@"] && branch[i]["@"].playOrder || 0);
href = (branch[i]["content"] && branch[i]["content"]["@"] && branch[i]["content"]["@"].src || "").trim(); href = (branch[i].content && branch[i].content["@"] && branch[i].content["@"].src || "").trim();
element = { element = {
level: level, level: level,
order: order, order: order,
title: title title: title
} };
if(href){ if (href) {
href = path.concat([href]).join("/") href = path.concat([href]).join("/");
element.href = href; element.href = href;
if(id_list[element.href]){ if (id_list[element.href]) {
// link existing object // link existing object
element = this.manifest[id_list[element.href]]; element = this.manifest[id_list[element.href]];
element.title = title; element.title = title;
element.order = order; element.order = order;
element.level = level; element.level = level;
}else{ } else {
// use new one // use new one
element.href = href; element.href = href;
element.id = (branch[i]["@"] && branch[i]["@"].id || "").trim(); element.id = (branch[i]["@"] && branch[i]["@"].id || "").trim();
@ -501,12 +507,12 @@ EPub.prototype.walkNavMap = function(branch, path, id_list, level){
output.push(element); output.push(element);
} }
} }
if(branch[i]["navPoint"]){ if (branch[i].navPoint) {
output = output.concat(this.walkNavMap(branch[i]["navPoint"], path, id_list, level + 1)); output = output.concat(this.walkNavMap(branch[i].navPoint, path, id_list, level + 1));
} }
} }
return output; return output;
} };
/** /**
* EPub#getChapter(id, callback) -> undefined * EPub#getChapter(id, callback) -> undefined
@ -516,18 +522,18 @@ EPub.prototype.walkNavMap = function(branch, path, id_list, level){
* Finds a chapter text for an id. Replaces image and link URL's, removes * Finds a chapter text for an id. Replaces image and link URL's, removes
* <head> etc. elements. Return only chapters with mime type application/xhtml+xml * <head> etc. elements. Return only chapters with mime type application/xhtml+xml
**/ **/
EPub.prototype.getChapter = function(id, callback){ EPub.prototype.getChapter = function (id, callback) {
var path = this.rootFile.split("/"), keys = Object.keys(this.manifest); var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest);
path.pop(); path.pop();
if(this.manifest[id]){ if (this.manifest[id]) {
if((this.manifest[id]['media-type'] || "").toLowerCase().trim() != "application/xhtml+xml"){ if ((this.manifest[id]['media-type'] || "").toLowerCase().trim() != "application/xhtml+xml") {
return callback(new Error("Inavlid mime type for chapter")); return callback(new Error("Inavlid mime type for chapter"));
} }
this.zip.readFile(this.manifest[id].href, (function(err, data){ this.zip.readFile(this.manifest[id].href, (function (err, data) {
if(err){ if (err) {
callback(new Error("Reading archive failed")); callback(new Error("Reading archive failed"));
return; return;
} }
@ -535,85 +541,85 @@ EPub.prototype.getChapter = function(id, callback){
var str = data.toString("utf-8"); var str = data.toString("utf-8");
// remove linebreaks (no multi line matches in JS regex!) // remove linebreaks (no multi line matches in JS regex!)
str = str.replace(/\r?\n/g,"\u0000"); str = str.replace(/\r?\n/g, "\u0000");
// keep only <body> contents // keep only <body> contents
str.replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function(o,d){ str.replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function (o, d) {
str = d.trim(); str = d.trim();
}); });
// remove <script> blocks if any // remove <script> blocks if any
str = str.replace(/<script[^>]*?>(.*?)<\/script[^>]*?>/ig, function(o, s){ str = str.replace(/<script[^>]*?>(.*?)<\/script[^>]*?>/ig, function (o, s) {
return ""; return "";
}); });
// remove <style> blocks if any // remove <style> blocks if any
str = str.replace(/<style[^>]*?>(.*?)<\/style[^>]*?>/ig, function(o, s){ str = str.replace(/<style[^>]*?>(.*?)<\/style[^>]*?>/ig, function (o, s) {
return ""; return "";
}); });
// remove onEvent handlers // remove onEvent handlers
str = str.replace(/(\s)(on\w+)(\s*=\s*["']?[^"'\s>]*?["'\s>])/g, function(o, a,b,c){ str = str.replace(/(\s)(on\w+)(\s*=\s*["']?[^"'\s>]*?["'\s>])/g, function (o, a, b, c) {
return a + "skip-" + b + c; return a + "skip-" + b + c;
}); });
// replace images // replace images
str = str.replace(/(\ssrc\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function(o, a,b,c){ str = str.replace(/(\ssrc\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function (o, a, b, c) {
var img = path.concat([b]).join("/").trim(), var img = path.concat([b]).join("/").trim(),
element; element;
for(var i=0, len=keys.length; i<len; i++){ for (i = 0, len = keys.length; i < len; i++) {
if(this.manifest[keys[i]].href == img){ if (this.manifest[keys[i]].href == img) {
element = this.manifest[keys[i]]; element = this.manifest[keys[i]];
break; break;
} }
} }
// include only images from manifest // include only images from manifest
if(element){ if (element) {
return a + this.imageroot + element.id+ "/" + img + c; return a + this.imageroot + element.id + "/" + img + c;
}else{ } else {
return ""; return "";
} }
}).bind(this)); }).bind(this));
// replace links // replace links
str = str.replace(/(\shref\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function(o, a, b, c){ str = str.replace(/(\shref\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function (o, a, b, c) {
var linkparts = b && b.split("#"), var linkparts = b && b.split("#"),
link = path.concat([(linkparts.shift() || "")]).join("/").trim(), link = path.concat([(linkparts.shift() || "")]).join("/").trim(),
element; element;
for(var i=0, len=keys.length; i<len; i++){ for (i = 0, len = keys.length; i < len; i++) {
if(this.manifest[keys[i]].href.split("#")[0] == link){ if (this.manifest[keys[i]].href.split("#")[0] == link) {
element = this.manifest[keys[i]]; element = this.manifest[keys[i]];
break; break;
} }
} }
if(linkparts.length){ if (linkparts.length) {
link += "#" + linkparts.join("#"); link += "#" + linkparts.join("#");
} }
// include only images from manifest // include only images from manifest
if(element){ if (element) {
return a + this.linkroot + element.id+ "/" + link + c; return a + this.linkroot + element.id + "/" + link + c;
}else{ } else {
return a + b + c; return a + b + c;
} }
}).bind(this)); }).bind(this));
// bring back linebreaks // bring back linebreaks
str = str.replace(/\u0000/g,"\n").trim(); str = str.replace(/\u0000/g, "\n").trim();
callback(null, str); callback(null, str);
}).bind(this)); }).bind(this));
}else{ } else {
callback(new Error("File not found")); callback(new Error("File not found"));
} }
} };
/** /**
@ -623,24 +629,27 @@ EPub.prototype.getChapter = function(id, callback){
* *
* Finds an image an id. Returns the image as Buffer. Callback gets * Finds an image an id. Returns the image as Buffer. Callback gets
* an error object, image buffer and image content-type. * an error object, image buffer and image content-type.
* Return only images with mime type image/* * Return only images with mime type image
**/ **/
EPub.prototype.getImage = function(id, callback){ EPub.prototype.getImage = function (id, callback) {
if(this.manifest[id]){ if (this.manifest[id]) {
if((this.manifest[id]['media-type'] || "").toLowerCase().trim().substr(0,6) != "image/"){ if ((this.manifest[id]['media-type'] || "").toLowerCase().trim().substr(0, 6) != "image/") {
return callback(new Error("Inavlid mime type for image")); return callback(new Error("Inavlid mime type for image"));
} }
this.zip.readFile(this.manifest[id].href, (function(err, data){ this.zip.readFile(this.manifest[id].href, (function (err, data) {
if(err){ if (err) {
callback(new Error("Reading archive failed")); callback(new Error("Reading archive failed"));
return; return;
} }
callback(null, data, this.manifest[id]['media-type']); callback(null, data, this.manifest[id]['media-type']);
}).bind(this)); }).bind(this));
}else{ } else {
callback(new Error("File not found")); callback(new Error("File not found"));
} }
} };
// Expose to the world
module.exports = EPub;