diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2f6141d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +node_modules +npm-debug.log +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index 98f9d49..2b68c32 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Available fields: * **language** Language code (*en* or *en-us* etc.) * **subject** Topic of the book (*Fantasy*) * **date** creation of the file (*2006-08-12*) + * **description** ## flow @@ -71,6 +72,10 @@ Load chapter text from the ebook. ... epub.getChapter("chapter1", function(error, text){}); +## getChapterRaw(chapter_id, callback) + +Load raw chapter text from the ebook. + ## getImage(image_id, callback) Load image (as a Buffer value) from the ebook. @@ -79,3 +84,11 @@ Load image (as a Buffer value) from the ebook. ... epub.getImage("image1", function(error, img, mimeType){}); +## getFile(file_id, callback) + +Load any file (as a Buffer value) from the ebook. + + var epub = new EPub(...); + ... + epub.getFile("css1", function(error, data, mimeType){}); + diff --git a/epub.js b/epub.js index ecf66c5..90c7b7e 100644 --- a/epub.js +++ b/epub.js @@ -216,7 +216,7 @@ EPub.prototype.getRootFiles = function () { /** * EPub#handleRootFile() -> undefined * - * Parser the rootfile XML and calls rootfile parser + * Parses the rootfile XML and calls rootfile parser **/ EPub.prototype.handleRootFile = function () { @@ -527,13 +527,105 @@ EPub.prototype.walkNavMap = function (branch, path, id_list, level) { *
etc. elements. Return only chapters with mime type application/xhtml+xml **/ EPub.prototype.getChapter = function (id, callback) { - var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest); - path.pop(); + this.getChapterRaw(id, (function (err, str) { + if (err) { + callback(err); + return; + } + var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest); + path.pop(); + + // remove linebreaks (no multi line matches in JS regex!) + str = str.replace(/\r?\n/g, "\u0000"); + + // keep only contents + str.replace(/]*?>(.*)<\/body[^>]*?>/i, function (o, d) { + str = d.trim(); + }); + + // remove