This repository has been archived on 2019-08-30. You can view files and clone it, but cannot push or open issues or pull requests.
epub/epub.js

711 lines
22 KiB
JavaScript
Raw Normal View History

2011-06-12 18:47:48 +00:00
var ZipFile = require("zipfile").ZipFile,
XML2JS = require("xml2js").Parser,
utillib = require("util"),
EventEmitter = require('events').EventEmitter;
2011-06-13 19:04:21 +00:00
//TODO: Cache parsed data
2011-06-12 19:04:40 +00:00
2011-06-12 18:47:48 +00:00
/**
* new EPub(fname[, imageroot][, linkroot])
* - fname (String): filename for the ebook
* - imageroot (String): URL prefix for images
* - linkroot (String): URL prefix for links
*
* Creates an Event Emitter type object for parsing epub files
*
* var epub = new EPub("book.epub");
2011-06-13 19:04:21 +00:00
* epub.on("end", function () {
2011-06-12 19:04:40 +00:00
* console.log(epub.spine);
* });
2011-06-13 19:04:21 +00:00
* epub.on("error", function (error) { ... });
2011-06-12 18:47:48 +00:00
* epub.parse();
*
* Image and link URL format is:
*
* imageroot + img_id + img_zip_path
*
* So an image "logo.jpg" which resides in "OPT/" in the zip archive
2011-06-13 19:04:21 +00:00
* and is listed in the manifest with id "logo_img" will have the
* following url (providing that imageroot is "/images/"):
*
* /images/logo_img/OPT/logo.jpg
2011-06-12 18:47:48 +00:00
**/
2011-06-13 19:04:21 +00:00
function EPub(fname, imageroot, linkroot) {
2011-06-12 18:47:48 +00:00
EventEmitter.call(this);
this.filename = fname;
2011-06-12 18:50:05 +00:00
this.imageroot = (imageroot || "/images/").trim();
this.linkroot = (linkroot || "/links/").trim();
2011-06-12 18:47:48 +00:00
2011-06-13 19:04:21 +00:00
if (this.imageroot.substr(-1) != "/") {
this.imageroot += "/";
}
if (this.linkroot.substr(-1) != "/") {
this.linkroot += "/";
}
2011-06-12 18:47:48 +00:00
}
utillib.inherits(EPub, EventEmitter);
/**
* EPub#parse() -> undefined
2011-06-13 19:04:21 +00:00
*
2011-06-12 18:47:48 +00:00
* Starts the parser, needs to be called by the script
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.parse = function () {
2011-06-12 18:47:48 +00:00
this.containerFile = false;
this.mimeFile = false;
this.rootFile = false;
this.metadata = {};
this.manifest = {};
2011-06-13 19:04:21 +00:00
this.spine = {toc: false, contents: []};
2011-06-13 20:09:43 +00:00
this.flow = [];
2011-06-12 18:47:48 +00:00
this.toc = [];
this.open();
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#open() -> undefined
*
* Opens the epub file with Zip unpacker, retrieves file listing
* and runs mime type check
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.open = function () {
try {
2011-06-12 18:47:48 +00:00
this.zip = new ZipFile(this.filename);
2011-06-13 19:04:21 +00:00
} catch (E) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Invalid/missing file"));
return;
}
2011-06-13 19:04:21 +00:00
if (!this.zip.names || !this.zip.names.length) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("No files in archive"));
return;
}
this.checkMimeType();
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#checkMimeType() -> undefined
*
* Checks if there's a file called "mimetype" and that it's contents
* are "application/epub+zip". On success runs root file check.
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.checkMimeType = function () {
var i, len;
for (i = 0, len = this.zip.names.length; i < len; i++) {
if (this.zip.names[i].toLowerCase() == "mimetype") {
2011-06-12 18:47:48 +00:00
this.mimeFile = this.zip.names[i];
break;
}
}
2011-06-13 19:04:21 +00:00
if (!this.mimeFile) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("No mimetype file in archive"));
return;
}
2011-06-13 19:04:21 +00:00
this.zip.readFile(this.mimeFile, (function (err, data) {
if (err) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Reading archive failed"));
return;
}
var txt = data.toString("utf-8").toLowerCase().trim();
2011-06-13 19:04:21 +00:00
if (txt != "application/epub+zip") {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Unsupported mime type"));
return;
}
this.getRootFiles();
}).bind(this));
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#getRootFiles() -> undefined
*
* Looks for a "meta-inf/container.xml" file and searches for a
* rootfile element with mime type "application/oebps-package+xml".
* On success calls the rootfile parser
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.getRootFiles = function () {
var i, len;
for (i = 0, len = this.zip.names.length; i < len; i++) {
if (this.zip.names[i].toLowerCase() == "meta-inf/container.xml") {
2011-06-12 18:47:48 +00:00
this.containerFile = this.zip.names[i];
break;
}
}
2011-06-13 19:04:21 +00:00
if (!this.containerFile) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("No container file in archive"));
return;
}
2011-06-13 19:04:21 +00:00
this.zip.readFile(this.containerFile, (function (err, data) {
if (err) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Reading archive failed"));
return;
}
var xml = data.toString("utf-8").toLowerCase().trim(),
xmlparser = new XML2JS();
2011-06-13 19:04:21 +00:00
xmlparser.on("end", (function (result) {
if (!result.rootfiles || !result.rootfiles.rootfile) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("No rootfiles found"));
return;
}
var rootfile = result.rootfiles.rootfile,
2011-06-13 19:04:21 +00:00
filename = false, i, len;
if (Array.isArray(rootfile)) {
for (i = 0, len = rootfile.length; i < len; i++) {
if (rootfile[i]["@"]["media-type"] &&
rootfile[i]["@"]["media-type"] == "application/oebps-package+xml" &&
rootfile[i]["@"]["full-path"]) {
filename = rootfile[i]["@"]["full-path"].toLowerCase().trim();
break;
}
2011-06-12 18:47:48 +00:00
}
2011-06-13 19:04:21 +00:00
} else if (rootfile["@"]) {
if (rootfile["@"]["media-type"] != "application/oebps-package+xml" || !rootfile["@"]["full-path"]) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Rootfile in unknown format"));
return;
}
filename = rootfile["@"]["full-path"].toLowerCase().trim();
}
2011-06-13 19:04:21 +00:00
if (!filename) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Empty rootfile"));
2011-06-13 19:04:21 +00:00
return;
2011-06-12 18:47:48 +00:00
}
2011-06-13 19:04:21 +00:00
for (i = 0, len = this.zip.names.length; i < len; i++) {
if (this.zip.names[i].toLowerCase() == filename) {
2011-06-12 18:47:48 +00:00
this.rootFile = this.zip.names[i];
break;
}
}
2011-06-13 19:04:21 +00:00
if (!this.rootFile) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Rootfile not found from archive"));
2011-06-13 19:04:21 +00:00
return;
2011-06-12 18:47:48 +00:00
}
2011-06-13 19:04:21 +00:00
2011-06-12 18:47:48 +00:00
this.handleRootFile();
}).bind(this));
2011-06-13 19:04:21 +00:00
xmlparser.on("error", (function (err) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Parsing container XML failed"));
return;
}).bind(this));
xmlparser.parseString(xml);
2011-06-13 19:04:21 +00:00
2011-06-12 18:47:48 +00:00
}).bind(this));
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#handleRootFile() -> undefined
*
* Parses the rootfile XML and calls rootfile parser
2011-06-12 18:47:48 +00:00
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.handleRootFile = function () {
2011-06-12 18:47:48 +00:00
2011-06-13 19:04:21 +00:00
this.zip.readFile(this.rootFile, (function (err, data) {
if (err) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Reading archive failed"));
return;
}
var xml = data.toString("utf-8"),
xmlparser = new XML2JS();
xmlparser.on("end", this.parseRootFile.bind(this));
2011-06-13 19:04:21 +00:00
xmlparser.on("error", (function (err) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Parsing container XML failed"));
return;
}).bind(this));
xmlparser.parseString(xml);
}).bind(this));
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#parseRootFile() -> undefined
*
2011-06-13 19:04:21 +00:00
* Parses elements "metadata," "manifest," "spine" and TOC.
2011-06-12 18:47:48 +00:00
* Emits "end" if no TOC
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.parseRootFile = function (rootfile) {
2011-06-12 18:47:48 +00:00
2014-03-22 15:51:04 +00:00
this.version = rootfile['@'].version || '2.0';
2011-06-13 19:04:21 +00:00
var i, len, keys, keyparts, key;
2011-06-12 18:47:48 +00:00
keys = Object.keys(rootfile);
2011-06-13 19:04:21 +00:00
for (i = 0, len = keys.length; i < len; i++) {
2011-06-12 18:47:48 +00:00
keyparts = keys[i].split(":");
key = (keyparts.pop() || "").toLowerCase().trim();
2011-06-13 19:04:21 +00:00
switch (key) {
case "metadata":
this.parseMetadata(rootfile[keys[i]]);
break;
case "manifest":
this.parseManifest(rootfile[keys[i]]);
break;
case "spine":
this.parseSpine(rootfile[keys[i]]);
break;
case "guide":
//this.parseGuide(rootfile[keys[i]]);
break;
2011-06-12 18:47:48 +00:00
}
}
2011-06-13 19:04:21 +00:00
if (this.spine.toc) {
2011-06-12 18:47:48 +00:00
this.parseTOC();
2011-06-13 19:04:21 +00:00
} else {
2011-06-12 18:47:48 +00:00
this.emit("end");
2011-06-13 19:04:21 +00:00
}
};
2011-06-12 18:47:48 +00:00
/**
* EPub#parseMetadata() -> undefined
*
* Parses "metadata" block (book metadata, title, author etc.)
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.parseMetadata = function (metadata) {
var i, j, len, keys, keyparts, key;
2011-06-12 18:47:48 +00:00
keys = Object.keys(metadata);
2011-06-13 19:04:21 +00:00
for (i = 0, len = keys.length; i < len; i++) {
2011-06-12 18:47:48 +00:00
keyparts = keys[i].split(":");
key = (keyparts.pop() || "").toLowerCase().trim();
2011-06-13 19:04:21 +00:00
switch (key) {
case "publisher":
if (Array.isArray(metadata[keys[i]])) {
this.metadata.publisher = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
} else {
this.metadata.publisher = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
}
break;
case "language":
if (Array.isArray(metadata[keys[i]])) {
this.metadata.language = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").toLowerCase().trim();
} else {
this.metadata.language = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").toLowerCase().trim();
}
break;
case "title":
if (Array.isArray(metadata[keys[i]])) {
this.metadata.title = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
} else {
this.metadata.title = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
}
break;
case "subject":
if (Array.isArray(metadata[keys[i]])) {
this.metadata.subject = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
} else {
this.metadata.subject = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
}
break;
case "description":
if (Array.isArray(metadata[keys[i]])) {
this.metadata.description = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
} else {
this.metadata.description = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
}
break;
case "creator":
if (Array.isArray(metadata[keys[i]])) {
this.metadata.creator = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
this.metadata.creatorFileAs = String(metadata[keys[i]][0] && metadata[keys[i]][0]['@'] && metadata[keys[i]][0]['@']["opf:file-as"] || this.metadata.creator).trim();
} else {
this.metadata.creator = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
this.metadata.creatorFileAs = String(metadata[keys[i]]['@'] && metadata[keys[i]]['@']["opf:file-as"] || this.metadata.creator).trim();
}
break;
case "date":
if (Array.isArray(metadata[keys[i]])) {
this.metadata.date = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
} else {
this.metadata.date = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
}
break;
case "identifier":
if (metadata[keys[i]]["@"] && metadata[keys[i]]["@"]["opf:scheme"] == "ISBN") {
this.metadata.ISBN = String(metadata[keys[i]]["#"] || "").trim();
} else if (metadata[keys[i]]["@"] && metadata[keys[i]]["@"].id && metadata[keys[i]]["@"].id.match(/uuid/i)) {
this.metadata.UUID = String(metadata[keys[i]]["#"] || "").replace('urn:uuid:', '').toUpperCase().trim();
} else if (Array.isArray(metadata[keys[i]])) {
for (j = 0; j < metadata[keys[i]].length; j++) {
if (metadata[keys[i]][j]["@"]) {
if (metadata[keys[i]][j]["@"]["opf:scheme"] == "ISBN") {
this.metadata.ISBN = String(metadata[keys[i]][j]["#"] || "").trim();
} else if (metadata[keys[i]][j]["@"].id && metadata[keys[i]][j]["@"].id.match(/uuid/i)) {
this.metadata.UUID = String(metadata[keys[i]][j]["#"] || "").replace('urn:uuid:', '').toUpperCase().trim();
2011-06-12 18:47:48 +00:00
}
}
}
2011-06-13 19:04:21 +00:00
}
break;
2011-06-12 18:47:48 +00:00
}
}
var metas = metadata['meta'] || {};
Object.keys(metas).forEach(function(key) {
var meta = metas[key];
if (meta['@'] && meta['@'].name) {
var name = meta['@'].name;
this.metadata[name] = meta['@'].content;
}
if (meta['#'] && meta['@'].property) {
this.metadata[meta['@'].property] = meta['#'];
}
}, this);
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#parseManifest() -> undefined
*
* Parses "manifest" block (all items included, html files, images, styles)
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.parseManifest = function (manifest) {
var i, len, path = this.rootFile.split("/"), element, path_str;
2011-06-12 18:47:48 +00:00
path.pop();
path_str = path.join("/");
2011-06-13 19:04:21 +00:00
if (manifest.item) {
for (i = 0, len = manifest.item.length; i < len; i++) {
if (manifest.item[i]['@']) {
2011-06-12 18:47:48 +00:00
element = manifest.item[i]['@'];
2011-06-13 19:04:21 +00:00
if (element.href && element.href.substr(0, path_str.length) != path_str) {
2011-06-12 18:47:48 +00:00
element.href = path.concat([element.href]).join("/");
}
this.manifest[manifest.item[i]['@'].id] = element;
}
}
}
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#parseSpine() -> undefined
*
* Parses "spine" block (all html elements that are shown to the reader)
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.parseSpine = function (spine) {
var i, len, path = this.rootFile.split("/"), element;
2011-06-12 18:47:48 +00:00
path.pop();
2011-06-13 19:04:21 +00:00
if (spine['@'] && spine['@'].toc) {
2011-06-12 18:47:48 +00:00
this.spine.toc = this.manifest[spine['@'].toc] || false;
}
2011-06-13 19:04:21 +00:00
if (spine.itemref) {
2011-06-13 20:09:43 +00:00
if(!Array.isArray(spine.itemref)){
spine.itemref = [spine.itemref];
}
2011-06-13 19:04:21 +00:00
for (i = 0, len = spine.itemref.length; i < len; i++) {
if (spine.itemref[i]['@']) {
if (element = this.manifest[spine.itemref[i]['@'].idref]) {
2011-06-12 18:47:48 +00:00
this.spine.contents.push(element);
}
}
}
}
2011-06-13 20:09:43 +00:00
this.flow = this.spine.contents;
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#parseTOC() -> undefined
*
* Parses ncx file for table of contents (title, html file)
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.parseTOC = function () {
var i, len, path = this.spine.toc.href.split("/"), id_list = {}, keys;
2011-06-12 18:47:48 +00:00
path.pop();
2011-06-13 10:11:29 +00:00
keys = Object.keys(this.manifest);
2011-06-13 19:04:21 +00:00
for (i = 0, len = keys.length; i < len; i++) {
2011-06-13 10:11:29 +00:00
id_list[this.manifest[keys[i]].href] = keys[i];
}
2011-06-13 19:04:21 +00:00
this.zip.readFile(this.spine.toc.href, (function (err, data) {
if (err) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Reading archive failed"));
return;
}
var xml = data.toString("utf-8"),
xmlparser = new XML2JS();
2011-06-13 19:04:21 +00:00
xmlparser.on("end", (function (result) {
if (result.navMap && result.navMap.navPoint) {
2011-06-13 10:11:29 +00:00
this.toc = this.walkNavMap(result.navMap.navPoint, path, id_list);
2011-06-12 18:47:48 +00:00
}
this.emit("end");
}).bind(this));
2011-06-13 19:04:21 +00:00
xmlparser.on("error", (function (err) {
2011-06-12 18:47:48 +00:00
this.emit("error", new Error("Parsing container XML failed"));
return;
}).bind(this));
xmlparser.parseString(xml);
}).bind(this));
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
2011-06-13 10:11:29 +00:00
* EPub#walkNavMap(branch, path, id_list,[, level]) -> Array
2011-06-12 18:47:48 +00:00
* - branch (Array | Object): NCX NavPoint object
* - path (Array): Base path
2011-06-13 10:11:29 +00:00
* - id_list (Object): map of file paths and id values
2011-06-12 18:47:48 +00:00
* - level (Number): deepness
*
* Walks the NavMap object through all levels and finds elements
* for TOC
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.walkNavMap = function (branch, path, id_list, level) {
2011-06-13 10:11:29 +00:00
level = level || 0;
2011-06-13 19:04:21 +00:00
2011-06-13 10:11:29 +00:00
// don't go too far
2011-06-13 19:04:21 +00:00
if (level > 7) {
return [];
}
2011-06-12 18:47:48 +00:00
2011-06-13 19:04:21 +00:00
var i, len, output = [], element, title, order, href;
2011-06-12 18:47:48 +00:00
2011-06-13 19:04:21 +00:00
if (!Array.isArray(branch)) {
2011-06-12 18:47:48 +00:00
branch = [branch];
}
2011-06-13 19:04:21 +00:00
for (i = 0, len = branch.length; i < len; i++) {
if (branch[i].navLabel) {
title = (branch[i].navLabel && branch[i].navLabel.text || branch[i].navLabel || "").trim();
2011-06-13 10:11:29 +00:00
order = Number(branch[i]["@"] && branch[i]["@"].playOrder || 0);
2011-06-13 19:04:21 +00:00
href = (branch[i].content && branch[i].content["@"] && branch[i].content["@"].src || "").trim();
2011-06-12 18:47:48 +00:00
element = {
2011-06-13 10:11:29 +00:00
level: level,
order: order,
title: title
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
2011-06-13 19:04:21 +00:00
if (href) {
href = path.concat([href]).join("/");
2011-06-13 10:11:29 +00:00
element.href = href;
2011-06-13 19:04:21 +00:00
if (id_list[element.href]) {
2011-06-13 10:11:29 +00:00
// link existing object
element = this.manifest[id_list[element.href]];
element.title = title;
element.order = order;
element.level = level;
2011-06-13 19:04:21 +00:00
} else {
2011-06-13 10:11:29 +00:00
// use new one
element.href = href;
element.id = (branch[i]["@"] && branch[i]["@"].id || "").trim();
}
2011-06-13 19:04:21 +00:00
2011-06-12 18:47:48 +00:00
output.push(element);
}
}
2011-06-13 19:04:21 +00:00
if (branch[i].navPoint) {
output = output.concat(this.walkNavMap(branch[i].navPoint, path, id_list, level + 1));
2011-06-12 18:47:48 +00:00
}
}
return output;
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
/**
* EPub#getChapter(id, callback) -> undefined
* - id (String): Manifest id value for a chapter
* - callback (Function): callback function
*
* Finds a chapter text for an id. Replaces image and link URL's, removes
2011-06-13 11:33:53 +00:00
* <head> etc. elements. Return only chapters with mime type application/xhtml+xml
2011-06-12 18:47:48 +00:00
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.getChapter = function (id, callback) {
this.getChapterRaw(id, (function (err, str) {
if (err) {
callback(err);
return;
}
2011-06-12 18:47:48 +00:00
var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest);
path.pop();
2011-06-13 19:04:21 +00:00
// remove linebreaks (no multi line matches in JS regex!)
str = str.replace(/\r?\n/g, "\u0000");
2011-06-13 19:04:21 +00:00
// keep only <body> contents
str.replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function (o, d) {
str = d.trim();
});
2011-06-12 18:47:48 +00:00
// remove <script> blocks if any
str = str.replace(/<script[^>]*?>(.*?)<\/script[^>]*?>/ig, function (o, s) {
return "";
});
2011-06-12 18:47:48 +00:00
// remove <style> blocks if any
str = str.replace(/<style[^>]*?>(.*?)<\/style[^>]*?>/ig, function (o, s) {
return "";
});
// remove onEvent handlers
str = str.replace(/(\s)(on\w+)(\s*=\s*["']?[^"'\s>]*?["'\s>])/g, function (o, a, b, c) {
return a + "skip-" + b + c;
});
// replace images
str = str.replace(/(\ssrc\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function (o, a, b, c) {
var img = path.concat([b]).join("/").trim(),
element;
for (i = 0, len = keys.length; i < len; i++) {
if (this.manifest[keys[i]].href == img) {
element = this.manifest[keys[i]];
break;
}
}
// include only images from manifest
if (element) {
return a + this.imageroot + element.id + "/" + img + c;
} else {
return "";
}
}).bind(this));
2011-06-12 18:47:48 +00:00
// replace links
str = str.replace(/(\shref\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function (o, a, b, c) {
var linkparts = b && b.split("#"),
link = path.concat([(linkparts.shift() || "")]).join("/").trim(),
element;
2011-06-12 18:47:48 +00:00
for (i = 0, len = keys.length; i < len; i++) {
if (this.manifest[keys[i]].href.split("#")[0] == link) {
element = this.manifest[keys[i]];
break;
2011-06-12 18:47:48 +00:00
}
}
2011-06-12 18:47:48 +00:00
if (linkparts.length) {
link += "#" + linkparts.join("#");
}
2011-06-13 19:04:21 +00:00
// include only images from manifest
if (element) {
return a + this.linkroot + element.id + "/" + link + c;
} else {
return a + b + c;
}
2011-06-12 18:47:48 +00:00
}).bind(this));
2011-06-12 18:47:48 +00:00
// bring back linebreaks
str = str.replace(/\u0000/g, "\n").trim();
2011-06-12 18:47:48 +00:00
callback(null, str);
}).bind(this));
};
2011-06-12 18:47:48 +00:00
2011-06-13 19:04:21 +00:00
/**
* EPub#getChapterRaw(id, callback) -> undefined
* - id (String): Manifest id value for a chapter
* - callback (Function): callback function
*
* Returns the raw chapter text for an id.
**/
EPub.prototype.getChapterRaw = function (id, callback) {
if (this.manifest[id]) {
if (!(this.manifest[id]['media-type'] == "application/xhtml+xml" || this.manifest[id]['media-type'] == "image/svg+xml")) {
return callback(new Error("Invalid mime type for chapter"));
}
this.zip.readFile(this.manifest[id].href, (function (err, data) {
if (err) {
callback(new Error("Reading archive failed"));
return;
}
2011-06-12 18:47:48 +00:00
var str = data.toString("utf-8");
2011-06-12 18:47:48 +00:00
callback(null, str);
}).bind(this));
2011-06-13 19:04:21 +00:00
} else {
2011-06-13 11:33:53 +00:00
callback(new Error("File not found"));
2011-06-12 18:47:48 +00:00
}
2011-06-13 19:04:21 +00:00
};
2011-06-12 18:47:48 +00:00
2011-06-12 19:04:40 +00:00
/**
* EPub#getImage(id, callback) -> undefined
* - id (String): Manifest id value for an image
* - callback (Function): callback function
*
* Finds an image for an id. Returns the image as Buffer. Callback gets
2011-06-13 11:33:53 +00:00
* an error object, image buffer and image content-type.
2011-06-13 19:04:21 +00:00
* Return only images with mime type image
2011-06-12 19:04:40 +00:00
**/
2011-06-13 19:04:21 +00:00
EPub.prototype.getImage = function (id, callback) {
if (this.manifest[id]) {
if ((this.manifest[id]['media-type'] || "").toLowerCase().trim().substr(0, 6) != "image/") {
return callback(new Error("Invalid mime type for image"));
2011-06-13 11:33:53 +00:00
}
2011-06-13 19:04:21 +00:00
this.getFile(id, callback);
} else {
callback(new Error("File not found"));
}
};
/**
* EPub#getFile(id, callback) -> undefined
* - id (String): Manifest id value for a file
* - callback (Function): callback function
*
* Finds a file for an id. Returns the file as Buffer. Callback gets
* an error object, file contents buffer and file content-type.
**/
EPub.prototype.getFile = function (id, callback) {
if (this.manifest[id]) {
2011-06-13 19:04:21 +00:00
this.zip.readFile(this.manifest[id].href, (function (err, data) {
if (err) {
2011-06-12 19:04:40 +00:00
callback(new Error("Reading archive failed"));
return;
}
2011-06-12 18:47:48 +00:00
2011-06-12 19:04:40 +00:00
callback(null, data, this.manifest[id]['media-type']);
}).bind(this));
2011-06-13 19:04:21 +00:00
} else {
2011-06-13 11:33:53 +00:00
callback(new Error("File not found"));
2011-06-12 19:04:40 +00:00
}
2011-06-13 19:04:21 +00:00
};
2011-06-13 19:04:21 +00:00
// Expose to the world
module.exports = EPub;