var ZipFile = require("zipfile").ZipFile, XML2JS = require("xml2js").Parser, utillib = require("util"), EventEmitter = require('events').EventEmitter; /** * new EPub(fname[, imageroot][, linkroot]) * - fname (String): filename for the ebook * - imageroot (String): URL prefix for images * - linkroot (String): URL prefix for links * * Creates an Event Emitter type object for parsing epub files * * var epub = new EPub("book.epub"); * epub.on("end", function(){ ... }); * epub.parse(); **/ function EPub(fname, imageroot, linkroot){ EventEmitter.call(this); this.filename = fname; this.imageroot = (imageroot || "/images/").trim(); this.linkroot = (linkroot || "/links/").trim(); if(this.imageroot.substr(-1)!="/")this.imageroot+="/"; if(this.linkroot.substr(-1)!="/")this.linkroot+="/"; } utillib.inherits(EPub, EventEmitter); /** * EPub#parse() -> undefined * * Starts the parser, needs to be called by the script **/ EPub.prototype.parse = function(){ this.containerFile = false; this.mimeFile = false; this.rootFile = false; this.metadata = {}; this.manifest = {}; this.spine = {toc: false, contents:[]}; this.toc = []; this.open(); } /** * EPub#open() -> undefined * * Opens the epub file with Zip unpacker, retrieves file listing * and runs mime type check **/ EPub.prototype.open = function(){ try{ this.zip = new ZipFile(this.filename); }catch(E){ this.emit("error", new Error("Invalid/missing file")); return; } if(!this.zip.names || !this.zip.names.length){ this.emit("error", new Error("No files in archive")); return; } this.checkMimeType(); } /** * EPub#checkMimeType() -> undefined * * Checks if there's a file called "mimetype" and that it's contents * are "application/epub+zip". On success runs root file check. **/ EPub.prototype.checkMimeType = function(){ for(var i=0, len = this.zip.names.length; i undefined * * Looks for a "meta-inf/container.xml" file and searches for a * rootfile element with mime type "application/oebps-package+xml". * On success calls the rootfile parser **/ EPub.prototype.getRootFiles = function(){ for(var i=0, len = this.zip.names.length; i undefined * * Parser the rootfile XML and calls rootfile parser **/ EPub.prototype.handleRootFile = function(){ this.zip.readFile(this.rootFile, (function(err, data){ if(err){ this.emit("error", new Error("Reading archive failed")); return; } var xml = data.toString("utf-8"), xmlparser = new XML2JS(); xmlparser.on("end", this.parseRootFile.bind(this)); xmlparser.on("error", (function(err){ this.emit("error", new Error("Parsing container XML failed")); return; }).bind(this)); xmlparser.parseString(xml); }).bind(this)); } /** * EPub#parseRootFile() -> undefined * * Parses elements "metadata," "manifest," "spine" and TOC. * Emits "end" if no TOC **/ EPub.prototype.parseRootFile = function(rootfile){ var keys, keyparts, key; keys = Object.keys(rootfile); for(var i=0, len = keys.length; i undefined * * Parses "metadata" block (book metadata, title, author etc.) **/ EPub.prototype.parseMetadata = function(metadata){ var keys, keyparts, key; keys = Object.keys(metadata); for(var i=0, len = keys.length; i undefined * * Parses "manifest" block (all items included, html files, images, styles) **/ EPub.prototype.parseManifest = function(manifest){ var path = this.rootFile.split("/"), element, path_str; path.pop(); path_str = path.join("/"); if(manifest.item){ for(var i=0, len = manifest.item.length; i undefined * * Parses "spine" block (all html elements that are shown to the reader) **/ EPub.prototype.parseSpine = function(spine){ var path = this.rootFile.split("/"), element, path_s path.pop(); if(spine['@'] && spine['@'].toc){ this.spine.toc = this.manifest[spine['@'].toc] || false; } if(spine.itemref){ for(var i=0, len = spine.itemref.length; i undefined * * Parses ncx file for table of contents (title, html file) **/ EPub.prototype.parseTOC = function(){ var path = this.spine.toc.href.split("/"); path.pop(); this.zip.readFile(this.spine.toc.href, (function(err, data){ if(err){ this.emit("error", new Error("Reading archive failed")); return; } var xml = data.toString("utf-8"), xmlparser = new XML2JS(); xmlparser.on("end", (function(result){ if(result.navMap && result.navMap.navPoint){ this.toc = this.walkNavMap(result.navMap.navPoint, path); } this.emit("end"); }).bind(this)); xmlparser.on("error", (function(err){ this.emit("error", new Error("Parsing container XML failed")); return; }).bind(this)); xmlparser.parseString(xml); }).bind(this)); } /** * EPub#walkNavMap(branch, path[, level]) -> Array * - branch (Array | Object): NCX NavPoint object * - path (Array): Base path * - level (Number): deepness * * Walks the NavMap object through all levels and finds elements * for TOC **/ EPub.prototype.walkNavMap = function(branch, path, level){ level || level || 0; if(level>7)return []; var output = [], element; if(!Array.isArray(branch)){ branch = [branch]; } for(var i=0, len = branch.length; i undefined * - id (String): Manifest id value for a chapter * - callback (Function): callback function * * Finds a chapter text for an id. Replaces image and link URL's, removes * etc. elements **/ EPub.prototype.getChapter = function(id, callback){ var path = this.rootFile.split("/"), keys = Object.keys(this.manifest); path.pop(); if(this.manifest[id]){ this.zip.readFile(this.manifest[id].href, (function(err, data){ if(err){ callback(new Error("Reading archive failed")); return; } var str = data.toString("utf-8"); // strip str.replace(/\n/g,"\u0000").replace(/]*?>(.*)<\/body[^>]*?>/i, function(o,d){ str = d.replace(/\u0000/g,"\n").trim(); }); // replace images str = str.replace(/(\ssrc\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function(o, a,b,c){ var img = path.concat([b]).join("/").trim(), element; for(var i=0, len=keys.length; i]*?)(["'\s>])/g, (function(o, a, b, c){ var linkparts = b && b.split("#"), link = path.concat([(linkparts.shift() || "")]).join("/").trim(), element; console.log(link, linkparts.join("#")); for(var i=0, len=keys.length; i