From d8570d97a85acb30875024570e3a72cd0565aa55 Mon Sep 17 00:00:00 2001 From: andris9 Date: Sun, 12 Jun 2011 21:47:48 +0300 Subject: [PATCH] Initial import --- .gitignore | 1 + epub.js | 576 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 577 insertions(+) create mode 100644 .gitignore create mode 100644 epub.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b333c38 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.epub \ No newline at end of file diff --git a/epub.js b/epub.js new file mode 100644 index 0000000..5462fe9 --- /dev/null +++ b/epub.js @@ -0,0 +1,576 @@ +var ZipFile = require("zipfile").ZipFile, + XML2JS = require("xml2js").Parser, + utillib = require("util"), + EventEmitter = require('events').EventEmitter; + + +/** + * new EPub(fname[, imageroot][, linkroot]) + * - fname (String): filename for the ebook + * - imageroot (String): URL prefix for images + * - linkroot (String): URL prefix for links + * + * Creates an Event Emitter type object for parsing epub files + * + * var epub = new EPub("book.epub"); + * epub.on("end", function(){ ... }); + * epub.parse(); + **/ +function EPub(fname, imageroot, linkroot){ + EventEmitter.call(this); + this.filename = fname; + + this.imageroot = (imageroot || "/images/").trim(); + this.linkroot = (linkroot || "/links/").trim(); + + if(this.imageroot.substr(-1)!="/")this.imageroot+="/"; + if(this.linkroot.substr(-1)!="/")this.linkroot+="/"; +} +utillib.inherits(EPub, EventEmitter); + +/** + * EPub#parse() -> undefined + * + * Starts the parser, needs to be called by the script + **/ +EPub.prototype.parse = function(){ + + this.containerFile = false; + this.mimeFile = false; + this.rootFile = false; + + this.metadata = {}; + this.manifest = {}; + this.spine = {toc: false, contents:[]}; + this.toc = []; + + this.open(); +} + +/** + * EPub#open() -> undefined + * + * Opens the epub file with Zip unpacker, retrieves file listing + * and runs mime type check + **/ +EPub.prototype.open = function(){ + try{ + this.zip = new ZipFile(this.filename); + }catch(E){ + this.emit("error", new Error("Invalid/missing file")); + return; + } + + if(!this.zip.names || !this.zip.names.length){ + this.emit("error", new Error("No files in archive")); + return; + } + + this.checkMimeType(); +} + +/** + * EPub#checkMimeType() -> undefined + * + * Checks if there's a file called "mimetype" and that it's contents + * are "application/epub+zip". On success runs root file check. + **/ +EPub.prototype.checkMimeType = function(){ + for(var i=0, len = this.zip.names.length; i undefined + * + * Looks for a "meta-inf/container.xml" file and searches for a + * rootfile element with mime type "application/oebps-package+xml". + * On success calls the rootfile parser + **/ +EPub.prototype.getRootFiles = function(){ + for(var i=0, len = this.zip.names.length; i undefined + * + * Parser the rootfile XML and calls rootfile parser + **/ +EPub.prototype.handleRootFile = function(){ + + this.zip.readFile(this.rootFile, (function(err, data){ + if(err){ + this.emit("error", new Error("Reading archive failed")); + return; + } + var xml = data.toString("utf-8"), + xmlparser = new XML2JS(); + + xmlparser.on("end", this.parseRootFile.bind(this)); + + xmlparser.on("error", (function(err){ + this.emit("error", new Error("Parsing container XML failed")); + return; + }).bind(this)); + + xmlparser.parseString(xml); + + }).bind(this)); +} + +/** + * EPub#parseRootFile() -> undefined + * + * Parses elements "metadata," "manifest," "spine" and TOC. + * Emits "end" if no TOC + **/ +EPub.prototype.parseRootFile = function(rootfile){ + + var keys, keyparts, key; + + keys = Object.keys(rootfile); + for(var i=0, len = keys.length; i undefined + * + * Parses "metadata" block (book metadata, title, author etc.) + **/ +EPub.prototype.parseMetadata = function(metadata){ + var keys, keyparts, key; + + keys = Object.keys(metadata); + for(var i=0, len = keys.length; i undefined + * + * Parses "manifest" block (all items included, html files, images, styles) + **/ +EPub.prototype.parseManifest = function(manifest){ + var path = this.rootFile.split("/"), element, path_str; + path.pop(); + path_str = path.join("/"); + + if(manifest.item){ + for(var i=0, len = manifest.item.length; i undefined + * + * Parses "spine" block (all html elements that are shown to the reader) + **/ +EPub.prototype.parseSpine = function(spine){ + var path = this.rootFile.split("/"), element, path_s + path.pop(); + + if(spine['@'] && spine['@'].toc){ + this.spine.toc = this.manifest[spine['@'].toc] || false; + } + + if(spine.itemref){ + for(var i=0, len = spine.itemref.length; i undefined + * + * Parses ncx file for table of contents (title, html file) + **/ +EPub.prototype.parseTOC = function(){ + var path = this.spine.toc.href.split("/"); + path.pop(); + + this.zip.readFile(this.spine.toc.href, (function(err, data){ + if(err){ + this.emit("error", new Error("Reading archive failed")); + return; + } + var xml = data.toString("utf-8"), + xmlparser = new XML2JS(); + + xmlparser.on("end", (function(result){ + if(result.navMap && result.navMap.navPoint){ + this.toc = this.walkNavMap(result.navMap.navPoint, path); + } + + this.emit("end"); + }).bind(this)); + + xmlparser.on("error", (function(err){ + this.emit("error", new Error("Parsing container XML failed")); + return; + }).bind(this)); + + xmlparser.parseString(xml); + + }).bind(this)); +} + +/** + * EPub#walkNavMap(branch, path[, level]) -> Array + * - branch (Array | Object): NCX NavPoint object + * - path (Array): Base path + * - level (Number): deepness + * + * Walks the NavMap object through all levels and finds elements + * for TOC + **/ +EPub.prototype.walkNavMap = function(branch, path, level){ + level || level || 0; + if(level>7)return []; + + var output = [], element; + + if(!Array.isArray(branch)){ + branch = [branch]; + } + + for(var i=0, len = branch.length; i undefined + * - id (String): Manifest id value for a chapter + * - callback (Function): callback function + * + * Finds a chapter text for an id. Replaces image and link URL's, removes + * etc. elements + **/ +EPub.prototype.getChapter = function(id, callback){ + var path = this.rootFile.split("/"), keys = Object.keys(this.manifest); + path.pop(); + + if(this.manifest[id]){ + this.zip.readFile(this.manifest[id].href, (function(err, data){ + if(err){ + callback(new Error("Reading archive failed")); + return; + } + + var str = data.toString("utf-8"); + + // strip + str.replace(/\n/g,"\u0000").replace(/]*?>(.*)<\/body[^>]*?>/i, function(o,d){ + str = d.replace(/\u0000/g,"\n").trim(); + }); + + // replace images + str = str.replace(/(\ssrc\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function(o, a,b,c){ + var img = path.concat([b]).join("/").trim(), + element; + + for(var i=0, len=keys.length; i]*?)(["'\s>])/g, (function(o, a, b, c){ + var linkparts = b && b.split("#"), + link = path.concat([(linkparts.shift() || "")]).join("/").trim(), + element; + + console.log(link, linkparts.join("#")); + + for(var i=0, len=keys.length; i