"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var xml2js = require('xml2js'); var xml2jsOptions = xml2js.defaults['0.1']; var EventEmitter = require('events').EventEmitter; // try { // // zipfile is an optional dependency: // var ZipFile = require("zipfile").ZipFile; // } catch (err) { // // Mock zipfile using pure-JS adm-zip: // var AdmZip = require('adm-zip'); // var ZipFile = function (filename) { // this.admZip = new AdmZip(filename); // this.names = this.admZip.getEntries().map(function (zipEntry) { // return zipEntry.entryName; // }); // this.count = this.names.length; // }; // ZipFile.prototype.readFile = function (name, cb) { // this.admZip.readFileAsync(this.admZip.getEntry(name), function (buffer, error) { // // `error` is bogus right now, so let's just drop it. // // see https://github.com/cthackers/adm-zip/pull/88 // return cb(null, buffer); // }); // }; // } const fs = require("fs"); const JSZip = require("jszip"); //TODO: Cache parsed data /** * new EPub(fname[, imageroot][, linkroot]) * - fname (String): filename for the ebook * - imageroot (String): URL prefix for images * - linkroot (String): URL prefix for links * * Creates an Event Emitter type object for parsing epub files * * var epub = new EPub("book.epub"); * epub.on("end", function () { * console.log(epub.spine); * }); * epub.on("error", function (error) { ... }); * epub.parse(); * * Image and link URL format is: * * imageroot + img_id + img_zip_path * * So an image "logo.jpg" which resides in "OPT/" in the zip archive * and is listed in the manifest with id "logo_img" will have the * following url (providing that imageroot is "/images/"): * * /images/logo_img/OPT/logo.jpg **/ class EPub { constructor(fname, imageroot, linkroot) { this.metadata = {}; this.manifest = {}; this.spine = { toc: undefined, contents: [] }; this.flow = []; this.toc = []; this.filename = fname; this.imageroot = (imageroot || "/images/").trim(); this.linkroot = (linkroot || "/links/").trim(); if (this.imageroot.substr(-1) != "/") { this.imageroot += "/"; } if (this.linkroot.substr(-1) != "/") { this.linkroot += "/"; } } /** * EPub#parse() -> undefined * * Starts the parser, needs to be called by the script **/ async parse() { this.containerFile = undefined; this.mimeFile = undefined; this.rootFile = undefined; this.metadata = {}; this.manifest = {}; this.spine = { toc: undefined, contents: [] }; this.flow = []; this.toc = []; await this.open(); } /** * EPub#open() -> undefined * * Opens the epub file with Zip unpacker, retrieves file listing * and runs mime type check **/ async open() { this.zip = await new Promise((resolve, reject) => { fs.readFile(this.filename, (err, data) => { if (err) { reject(err); } else { resolve(data); } }); }).then((data) => { return JSZip.loadAsync(data); }); if (Object.keys(this.zip.files).length < 1) { throw new Error("No files in archive"); } await this.checkMimeType(); } ; /** * EPub#checkMimeType() -> undefined * * Checks if there's a file called "mimetype" and that it's contents * are "application/epub+zip". On success runs root file check. **/ async checkMimeType() { var i, len; for (let file in this.zip.files) { if (file.toLowerCase() == "mimetype") { this.mimeFile = file; break; } } if (!this.mimeFile) { throw new Error("No mimetype file in archive"); } let data = await this.zip.file(this.mimeFile).async("nodebuffer"); let txt = data.toString("utf-8").toLowerCase().trim(); if (txt != "application/epub+zip") { throw new Error("Unsupported mime type"); } await this.getRootFiles(); } ; /** * EPub#getRootFiles() -> undefined * * Looks for a "meta-inf/container.xml" file and searches for a * rootfile element with mime type "application/oebps-package+xml". * On success calls the rootfile parser **/ async getRootFiles() { for (let file in this.zip.files) { if (file.toLowerCase() == "meta-inf/container.xml") { this.containerFile = file; break; } } if (!this.containerFile) throw new Error("No container file in archive"); let data = await this.zip.files[this.containerFile].async("nodebuffer"); var xml = data.toString("utf-8").toLowerCase().trim(), xmlparser = new xml2js.Parser(xml2jsOptions); let res = await new Promise((resolve, reject) => { xmlparser.on("end", (result) => { if (!result.rootfiles || !result.rootfiles.rootfile) { reject(new Error("No rootfiles found")); console.dir(result); return; } var rootfile = result.rootfiles.rootfile, filename = undefined, i, len; if (Array.isArray(rootfile)) { for (i = 0, len = rootfile.length; i < len; i++) { if (rootfile[i]["@"]["media-type"] && rootfile[i]["@"]["media-type"] == "application/oebps-package+xml" && rootfile[i]["@"]["full-path"]) { filename = rootfile[i]["@"]["full-path"].toLowerCase().trim(); break; } } } else if (rootfile["@"]) { if (rootfile["@"]["media-type"] != "application/oebps-package+xml" || !rootfile["@"]["full-path"]) { reject(new Error("Rootfile in unknown format")); return; } filename = rootfile["@"]["full-path"].toLowerCase().trim(); } if (!filename) { reject(new Error("Empty rootfile")); return; } for (let file in this.zip.files) { if (file == filename) { this.rootFile = file; break; } } if (!this.rootFile) { reject(new Error("Rootfile not found from archive")); return; } resolve(); }); xmlparser.on("error", (err) => { reject(new Error("Parsing container XML failed")); return; }); xmlparser.parseString(xml); }); await this.handleRootFile(); } ; /** * EPub#handleRootFile() -> undefined * * Parses the rootfile XML and calls rootfile parser **/ async handleRootFile() { let data = await this.zip.files[this.rootFile].async("nodebuffer"); var xml = data.toString("utf-8"); let rf = await new Promise((resolve, reject) => { let xmlparser = new xml2js.Parser(xml2jsOptions); xmlparser.on("end", (data) => { resolve(data); }); xmlparser.on("error", err => { reject(err); }); xmlparser.parseString(xml); }); await this.parseRootFile(rf); } ; /** * EPub#parseRootFile() -> undefined * * Parses elements "metadata," "manifest," "spine" and TOC. * Emits "end" if no TOC **/ async parseRootFile(rootfile) { this.version = rootfile['@'].version || '2.0'; var i, len, keys, keyparts, key; keys = Object.keys(rootfile); for (i = 0, len = keys.length; i < len; i++) { keyparts = keys[i].split(":"); key = (keyparts.pop() || "").toLowerCase().trim(); switch (key) { case "metadata": this.parseMetadata(rootfile[keys[i]]); break; case "manifest": this.parseManifest(rootfile[keys[i]]); break; case "spine": this.parseSpine(rootfile[keys[i]]); break; case "guide": //this.parseGuide(rootfile[keys[i]]); break; } } if (this.spine.toc) { await this.parseTOC(); } } ; /** * EPub#parseMetadata() -> undefined * * Parses "metadata" block (book metadata, title, author etc.) **/ parseMetadata(metadata) { var i, j, len, keys, keyparts, key; keys = Object.keys(metadata); for (i = 0, len = keys.length; i < len; i++) { keyparts = keys[i].split(":"); key = (keyparts.pop() || "").toLowerCase().trim(); switch (key) { case "publisher": if (Array.isArray(metadata[keys[i]])) { this.metadata.publisher = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); } else { this.metadata.publisher = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); } break; case "language": if (Array.isArray(metadata[keys[i]])) { this.metadata.language = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").toLowerCase().trim(); } else { this.metadata.language = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").toLowerCase().trim(); } break; case "title": if (Array.isArray(metadata[keys[i]])) { this.metadata.title = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); } else { this.metadata.title = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); } break; case "subject": if (Array.isArray(metadata[keys[i]])) { this.metadata.subject = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); } else { this.metadata.subject = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); } break; case "description": if (Array.isArray(metadata[keys[i]])) { this.metadata.description = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); } else { this.metadata.description = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); } break; case "creator": if (Array.isArray(metadata[keys[i]])) { this.metadata.creator = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); this.metadata.creatorFileAs = String(metadata[keys[i]][0] && metadata[keys[i]][0]['@'] && metadata[keys[i]][0]['@']["opf:file-as"] || this.metadata.creator).trim(); } else { this.metadata.creator = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); this.metadata.creatorFileAs = String(metadata[keys[i]]['@'] && metadata[keys[i]]['@']["opf:file-as"] || this.metadata.creator).trim(); } break; case "date": if (Array.isArray(metadata[keys[i]])) { this.metadata.date = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim(); } else { this.metadata.date = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim(); } break; case "identifier": if (metadata[keys[i]]["@"] && metadata[keys[i]]["@"]["opf:scheme"] == "ISBN") { this.metadata.ISBN = String(metadata[keys[i]]["#"] || "").trim(); } else if (metadata[keys[i]]["@"] && metadata[keys[i]]["@"].id && metadata[keys[i]]["@"].id.match(/uuid/i)) { this.metadata.UUID = String(metadata[keys[i]]["#"] || "").replace('urn:uuid:', '').toUpperCase().trim(); } else if (Array.isArray(metadata[keys[i]])) { for (j = 0; j < metadata[keys[i]].length; j++) { if (metadata[keys[i]][j]["@"]) { if (metadata[keys[i]][j]["@"]["opf:scheme"] == "ISBN") { this.metadata.ISBN = String(metadata[keys[i]][j]["#"] || "").trim(); } else if (metadata[keys[i]][j]["@"].id && metadata[keys[i]][j]["@"].id.match(/uuid/i)) { this.metadata.UUID = String(metadata[keys[i]][j]["#"] || "").replace('urn:uuid:', '').toUpperCase().trim(); } } } } break; } } var metas = metadata['meta'] || {}; Object.keys(metas).forEach((key) => { var meta = metas[key]; if (meta['@'] && meta['@'].name) { var name = meta['@'].name; this.metadata[name] = meta['@'].content; } if (meta['#'] && meta['@'].property) { this.metadata[meta['@'].property] = meta['#']; } if (meta.name && meta.name == "cover") { this.metadata[meta.name] = meta.content; } }, this); } ; /** * EPub#parseManifest() -> undefined * * Parses "manifest" block (all items included, html files, images, styles) **/ parseManifest(manifest) { var i, len, path = this.rootFile.split("/"), element, path_str; path.pop(); path_str = path.join("/"); if (manifest.item) { for (i = 0, len = manifest.item.length; i < len; i++) { if (manifest.item[i]['@']) { element = manifest.item[i]['@']; if (element.href && element.href.substr(0, path_str.length) != path_str) { element.href = path.concat([element.href]).join("/"); } this.manifest[manifest.item[i]['@'].id] = element; } } } } ; /** * EPub#parseSpine() -> undefined * * Parses "spine" block (all html elements that are shown to the reader) **/ parseSpine(spine) { var i, len, path = this.rootFile.split("/"), element; path.pop(); if (spine['@'] && spine['@'].toc) { this.spine.toc = this.manifest[spine['@'].toc] || false; } if (spine.itemref) { if (!Array.isArray(spine.itemref)) { spine.itemref = [spine.itemref]; } for (i = 0, len = spine.itemref.length; i < len; i++) { if (spine.itemref[i]['@']) { if (element = this.manifest[spine.itemref[i]['@'].idref]) { this.spine.contents.push(element); } } } } this.flow = this.spine.contents; } ; /** * EPub#parseTOC() -> undefined * * Parses ncx file for table of contents (title, html file) **/ async parseTOC() { var i, len, path = this.spine.toc.href.split("/"), id_list = {}, keys; path.pop(); keys = Object.keys(this.manifest); for (i = 0, len = keys.length; i < len; i++) { id_list[this.manifest[keys[i]].href] = keys[i]; } let data = await this.zip.files[this.spine.toc.href].async("nodebuffer"); var xml = data.toString("utf-8"); await new Promise((resolve, reject) => { let xmlparser = new xml2js.Parser(xml2jsOptions); xmlparser.on("end", result => { if (result.navMap && result.navMap.navPoint) { this.toc = this.walkNavMap(result.navMap.navPoint, path, id_list); } resolve(); }); xmlparser.on("error", (err) => { reject(err); }); xmlparser.parseString(xml); }); } ; /** * EPub#walkNavMap(branch, path, id_list,[, level]) -> Array * - branch (Array | Object): NCX NavPoint object * - path (Array): Base path * - id_list (Object): map of file paths and id values * - level (Number): deepness * * Walks the NavMap object through all levels and finds elements * for TOC **/ walkNavMap(branch, path, id_list, level) { level = level || 0; // don't go too far if (level > 7) { return []; } var output = []; if (!Array.isArray(branch)) { branch = [branch]; } for (var i = 0; i < branch.length; i++) { if (branch[i].navLabel) { var title = ''; if (branch[i].navLabel && typeof branch[i].navLabel.text == 'string') { title = branch[i].navLabel && branch[i].navLabel.text || branch[i].navLabel === branch[i].navLabel ? (branch[i].navLabel && branch[i].navLabel.text || branch[i].navLabel || "").trim() : ''; } var order = Number(branch[i]["@"] && branch[i]["@"].playOrder || 0); if (isNaN(order)) { order = 0; } var href = ''; if (branch[i].content && branch[i].content["@"] && typeof branch[i].content["@"].src == 'string') { href = branch[i].content["@"].src.trim(); } var element = { level: level, order: order, title: title, href: undefined, id: undefined }; if (href) { href = path.concat([href]).join("/"); element.href = href; if (id_list[element.href]) { // link existing object element = this.manifest[id_list[element.href]]; element.title = title; element.order = order; element.level = level; } else { // use new one element.href = href; element.id = (branch[i]["@"] && branch[i]["@"].id || "").trim(); } output.push(element); } } if (branch[i].navPoint) { output = output.concat(this.walkNavMap(branch[i].navPoint, path, id_list, level + 1)); } } return output; } ; /** * EPub#getChapter(id, callback) -> undefined * - id (String): Manifest id value for a chapter * - callback (Function): callback function * * Finds a chapter text for an id. Replaces image and link URL's, removes * etc. elements. Return only chapters with mime type application/xhtml+xml **/ async getChapter(id) { let str = await this.getChapterRaw(id); var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest); path.pop(); // remove linebreaks (no multi line matches in JS regex!) str = str.replace(/\r?\n/g, "\u0000"); // keep only contents str.replace(/]*?>(.*)<\/body[^>]*?>/i, function (o, d) { str = d.trim(); return ""; }); // remove