diff --git a/.gitignore b/.gitignore index 6f4cff7..7896fb6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ -*.epub -test.* \ No newline at end of file +test.* +server.* +tasuja.epub \ No newline at end of file diff --git a/README.md b/README.md index 7f19ae6..7169e30 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,80 @@ #epub -**epub** is a node.js module to parse EPUB electronic book files. \ No newline at end of file +**epub** is a node.js module to parse EPUB electronic book files. + +**NB!** Only ebooks in UTF-8 are currently supported!. + +## Installation + + npm install epub + +## Usage + + var epub = new EPub(epubfile, imagewebroot, chapterwebroot); + +Where + + * **epubfile** is the file path to an EPUB file + * **imagewebroot** is the prefix for image URL's. If it's */images/* then the actual URL is going to be */images/IMG_ID/IMG_FILENAME*, `IMG_ID` can be used to fetch the image form the ebook with `getImage` + * **chapterwebroot** is the prefix for chapter URL's. If it's */chapter/* then the actual URL is going to be */chapters/CHAPTER_ID/CHAPTER_FILENAME*, `CHAPTER_ID` can be used to fetch the image form the ebook with `getChapter` + +Before the contents of the ebook can be read, it must be opened (`EPub` is an `EventEmitter`). + + epub.on("end", function(){ + // epub is now usable + console.log(epub.metadata.title); + + epub.getChapter("chapter_id", function(err, text){}); + }); + epub.parse(); + + +## metadata + +Property of the *epub* object that holds several metadata fields about the book. + + epub = new EPub(...); + ... + epub.metadata; + +Available fields: + + * **creator** Author of the book (if multiple authors, then the first on the list) (*Lewis Carroll*) + * **creatorFileAs** Author name on file (*Carroll, Lewis*) + * **title** Title of the book (*Alice's Adventures in Wonderland*) + * **language** Language code (*en* or *en-us* etc.) + * **subject** Topic of the book (*Fantasy*) + * **date** creation of the file (*2006-08-12*) + +## flow + +*flow* is a property of the *epub* object and holds the actual list of chapters (TOC is just an indication and can link to a # url inside a chapter file) + + epub = new EPub(...); + ... + epub.flow.forEach(function(chapter){ + console.log(chapter.id); + }); + +Chapter `id` is needed to load the chapters `getChapter` + +## toc +*flow* is a property of the *epub* object and indicates a list of titles/urls for the TOC. Actual chapter and it's ID needs to be detected with the `href` property + + +## getChapter(chapter_id, callback) + +Load chapter text from the ebook. + + var epub = new EPub(...); + ... + epub.getChapter("chapter1", function(error, text){}); + +## getImage(image_id, callback) + +Load image (as a Buffer value) from the ebook. + + var epub = new EPub(...); + ... + epub.getImage("image1", function(error, img, mimeType){}); + diff --git a/epub.js b/epub.js index 5ad3385..ecf66c5 100644 --- a/epub.js +++ b/epub.js @@ -60,6 +60,7 @@ EPub.prototype.parse = function () { this.metadata = {}; this.manifest = {}; this.spine = {toc: false, contents: []}; + this.flow = []; this.toc = []; this.open(); @@ -248,7 +249,6 @@ EPub.prototype.handleRootFile = function () { EPub.prototype.parseRootFile = function (rootfile) { var i, len, keys, keyparts, key; - keys = Object.keys(rootfile); for (i = 0, len = keys.length; i < len; i++) { keyparts = keys[i].split(":"); @@ -401,6 +401,9 @@ EPub.prototype.parseSpine = function (spine) { } if (spine.itemref) { + if(!Array.isArray(spine.itemref)){ + spine.itemref = [spine.itemref]; + } for (i = 0, len = spine.itemref.length; i < len; i++) { if (spine.itemref[i]['@']) { if (element = this.manifest[spine.itemref[i]['@'].idref]) { @@ -409,6 +412,7 @@ EPub.prototype.parseSpine = function (spine) { } } } + this.flow = this.spine.contents; }; /** diff --git a/index.js b/example.js similarity index 100% rename from index.js rename to example.js diff --git a/example/alice.epub b/example/alice.epub new file mode 100644 index 0000000..1882b21 Binary files /dev/null and b/example/alice.epub differ diff --git a/example/example.js b/example/example.js new file mode 100644 index 0000000..e0d4d0c --- /dev/null +++ b/example/example.js @@ -0,0 +1,38 @@ +var EPub = require("../epub"); + +var epub = new EPub("alice.epub", "/imagewebroot/", "/articlewebroot/"); +epub.on("error", function(err){ + console.log("ERROR\n-----"); + throw err; +}); + +epub.on("end", function(err){ + console.log("METADATA:\n"); + console.log(epub.metadata); + + console.log("\nSPINE:\n"); + console.log(epub.flow); + + console.log("\nTOC:\n"); + console.log(epub.toc); + + // get first chapter + epub.getChapter(epub.spine.contents[0].id, function(err, data){ + if(err){ + console.log(err); + return; + } + console.log("\nFIRST CHAPTER:\n"); + console.log(data.substr(0,512)+"..."); // first 512 bytes + }); + + /* + epub.getImage(image_id, function(err, data, mimeType){ + console.log(err || data); + console.log(mimeType) + }); + */ + +}); + +epub.parse(); \ No newline at end of file diff --git a/index.html b/index.html deleted file mode 100644 index 5c4c71c..0000000 --- a/index.html +++ /dev/null @@ -1,105 +0,0 @@ - - - - - EPUB reader - - - - - - - - - - - - - - - - -
- -
- - - \ No newline at end of file