diff --git a/README.md b/README.md new file mode 100644 index 0000000..7f19ae6 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +#epub + +**epub** is a node.js module to parse EPUB electronic book files. \ No newline at end of file diff --git a/epub.js b/epub.js index f5c7800..5ad3385 100644 --- a/epub.js +++ b/epub.js @@ -3,11 +3,7 @@ var ZipFile = require("zipfile").ZipFile, utillib = require("util"), EventEmitter = require('events').EventEmitter; - -//TODO: Cache parsed data to DB - -// export -module.exports = EPub; +//TODO: Cache parsed data /** * new EPub(fname[, imageroot][, linkroot]) @@ -18,10 +14,10 @@ module.exports = EPub; * Creates an Event Emitter type object for parsing epub files * * var epub = new EPub("book.epub"); - * epub.on("end", function(){ + * epub.on("end", function () { * console.log(epub.spine); * }); - * epub.on("error", function(error){ ... }); + * epub.on("error", function (error) { ... }); * epub.parse(); * * Image and link URL format is: @@ -29,41 +25,45 @@ module.exports = EPub; * imageroot + img_id + img_zip_path * * So an image "logo.jpg" which resides in "OPT/" in the zip archive - * and is listed in the manifest with id "logo_img" will have the + * and is listed in the manifest with id "logo_img" will have the * following url (providing that imageroot is "/images/"): * * /images/logo_img/OPT/logo.jpg **/ -function EPub(fname, imageroot, linkroot){ +function EPub(fname, imageroot, linkroot) { EventEmitter.call(this); this.filename = fname; this.imageroot = (imageroot || "/images/").trim(); this.linkroot = (linkroot || "/links/").trim(); - if(this.imageroot.substr(-1)!="/")this.imageroot+="/"; - if(this.linkroot.substr(-1)!="/")this.linkroot+="/"; + if (this.imageroot.substr(-1) != "/") { + this.imageroot += "/"; + } + if (this.linkroot.substr(-1) != "/") { + this.linkroot += "/"; + } } utillib.inherits(EPub, EventEmitter); /** * EPub#parse() -> undefined - * + * * Starts the parser, needs to be called by the script **/ -EPub.prototype.parse = function(){ - +EPub.prototype.parse = function () { + this.containerFile = false; this.mimeFile = false; this.rootFile = false; this.metadata = {}; this.manifest = {}; - this.spine = {toc: false, contents:[]}; + this.spine = {toc: false, contents: []}; this.toc = []; this.open(); -} +}; /** * EPub#open() -> undefined @@ -71,21 +71,21 @@ EPub.prototype.parse = function(){ * Opens the epub file with Zip unpacker, retrieves file listing * and runs mime type check **/ -EPub.prototype.open = function(){ - try{ +EPub.prototype.open = function () { + try { this.zip = new ZipFile(this.filename); - }catch(E){ + } catch (E) { this.emit("error", new Error("Invalid/missing file")); return; } - - if(!this.zip.names || !this.zip.names.length){ + + if (!this.zip.names || !this.zip.names.length) { this.emit("error", new Error("No files in archive")); return; } this.checkMimeType(); -} +}; /** * EPub#checkMimeType() -> undefined @@ -93,32 +93,34 @@ EPub.prototype.open = function(){ * Checks if there's a file called "mimetype" and that it's contents * are "application/epub+zip". On success runs root file check. **/ -EPub.prototype.checkMimeType = function(){ - for(var i=0, len = this.zip.names.length; i undefined @@ -127,97 +129,98 @@ EPub.prototype.checkMimeType = function(){ * rootfile element with mime type "application/oebps-package+xml". * On success calls the rootfile parser **/ -EPub.prototype.getRootFiles = function(){ - for(var i=0, len = this.zip.names.length; i undefined * * Parser the rootfile XML and calls rootfile parser **/ -EPub.prototype.handleRootFile = function(){ +EPub.prototype.handleRootFile = function () { - this.zip.readFile(this.rootFile, (function(err, data){ - if(err){ + this.zip.readFile(this.rootFile, (function (err, data) { + if (err) { this.emit("error", new Error("Reading archive failed")); return; } @@ -226,7 +229,7 @@ EPub.prototype.handleRootFile = function(){ xmlparser.on("end", this.parseRootFile.bind(this)); - xmlparser.on("error", (function(err){ + xmlparser.on("error", (function (err) { this.emit("error", new Error("Parsing container XML failed")); return; }).bind(this)); @@ -234,145 +237,146 @@ EPub.prototype.handleRootFile = function(){ xmlparser.parseString(xml); }).bind(this)); -} +}; /** * EPub#parseRootFile() -> undefined * - * Parses elements "metadata," "manifest," "spine" and TOC. + * Parses elements "metadata," "manifest," "spine" and TOC. * Emits "end" if no TOC **/ -EPub.prototype.parseRootFile = function(rootfile){ +EPub.prototype.parseRootFile = function (rootfile) { - var keys, keyparts, key; + var i, len, keys, keyparts, key; keys = Object.keys(rootfile); - for(var i=0, len = keys.length; i undefined * * Parses "metadata" block (book metadata, title, author etc.) **/ -EPub.prototype.parseMetadata = function(metadata){ - var keys, keyparts, key; +EPub.prototype.parseMetadata = function (metadata) { + var i, j, len, keys, keyparts, key; keys = Object.keys(metadata); - for(var i=0, len = keys.length; i undefined * * Parses "manifest" block (all items included, html files, images, styles) **/ -EPub.prototype.parseManifest = function(manifest){ - var path = this.rootFile.split("/"), element, path_str; +EPub.prototype.parseManifest = function (manifest) { + var i, len, path = this.rootFile.split("/"), element, path_str; path.pop(); path_str = path.join("/"); - if(manifest.item){ - for(var i=0, len = manifest.item.length; i undefined * * Parses "spine" block (all html elements that are shown to the reader) **/ -EPub.prototype.parseSpine = function(spine){ - var path = this.rootFile.split("/"), element, path_s +EPub.prototype.parseSpine = function (spine) { + var i, len, path = this.rootFile.split("/"), element; path.pop(); - if(spine['@'] && spine['@'].toc){ + if (spine['@'] && spine['@'].toc) { this.spine.toc = this.manifest[spine['@'].toc] || false; } - if(spine.itemref){ - for(var i=0, len = spine.itemref.length; i undefined * * Parses ncx file for table of contents (title, html file) **/ -EPub.prototype.parseTOC = function(){ - var path = this.spine.toc.href.split("/"), id_list = {}, keys; +EPub.prototype.parseTOC = function () { + var i, len, path = this.spine.toc.href.split("/"), id_list = {}, keys; path.pop(); keys = Object.keys(this.manifest); - for(var i=0, len = keys.length; i Array @@ -457,56 +461,58 @@ EPub.prototype.parseTOC = function(){ * Walks the NavMap object through all levels and finds elements * for TOC **/ -EPub.prototype.walkNavMap = function(branch, path, id_list, level){ +EPub.prototype.walkNavMap = function (branch, path, id_list, level) { level = level || 0; - + // don't go too far - if(level>7)return []; + if (level > 7) { + return []; + } - var output = [], element, id, title, order, href; + var i, len, output = [], element, title, order, href; - if(!Array.isArray(branch)){ + if (!Array.isArray(branch)) { branch = [branch]; } - for(var i=0, len = branch.length; i undefined @@ -516,18 +522,18 @@ EPub.prototype.walkNavMap = function(branch, path, id_list, level){ * Finds a chapter text for an id. Replaces image and link URL's, removes * etc. elements. Return only chapters with mime type application/xhtml+xml **/ -EPub.prototype.getChapter = function(id, callback){ - var path = this.rootFile.split("/"), keys = Object.keys(this.manifest); +EPub.prototype.getChapter = function (id, callback) { + var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest); path.pop(); - if(this.manifest[id]){ - - if((this.manifest[id]['media-type'] || "").toLowerCase().trim() != "application/xhtml+xml"){ + if (this.manifest[id]) { + + if ((this.manifest[id]['media-type'] || "").toLowerCase().trim() != "application/xhtml+xml") { return callback(new Error("Inavlid mime type for chapter")); } - - this.zip.readFile(this.manifest[id].href, (function(err, data){ - if(err){ + + this.zip.readFile(this.manifest[id].href, (function (err, data) { + if (err) { callback(new Error("Reading archive failed")); return; } @@ -535,85 +541,85 @@ EPub.prototype.getChapter = function(id, callback){ var str = data.toString("utf-8"); // remove linebreaks (no multi line matches in JS regex!) - str = str.replace(/\r?\n/g,"\u0000"); + str = str.replace(/\r?\n/g, "\u0000"); // keep only contents - str.replace(/]*?>(.*)<\/body[^>]*?>/i, function(o,d){ + str.replace(/]*?>(.*)<\/body[^>]*?>/i, function (o, d) { str = d.trim(); }); // remove