Merge pull request #4 from julien-c/master
Add ability to get any file from the Epub + access raw HTML content
This commit is contained in:
commit
e7fa8d455d
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
node_modules
|
||||
npm-debug.log
|
||||
.DS_Store
|
13
README.md
13
README.md
@ -46,6 +46,7 @@ Available fields:
|
||||
* **language** Language code (*en* or *en-us* etc.)
|
||||
* **subject** Topic of the book (*Fantasy*)
|
||||
* **date** creation of the file (*2006-08-12*)
|
||||
* **description**
|
||||
|
||||
## flow
|
||||
|
||||
@ -71,6 +72,10 @@ Load chapter text from the ebook.
|
||||
...
|
||||
epub.getChapter("chapter1", function(error, text){});
|
||||
|
||||
## getChapterRaw(chapter_id, callback)
|
||||
|
||||
Load raw chapter text from the ebook.
|
||||
|
||||
## getImage(image_id, callback)
|
||||
|
||||
Load image (as a Buffer value) from the ebook.
|
||||
@ -79,3 +84,11 @@ Load image (as a Buffer value) from the ebook.
|
||||
...
|
||||
epub.getImage("image1", function(error, img, mimeType){});
|
||||
|
||||
## getFile(file_id, callback)
|
||||
|
||||
Load any file (as a Buffer value) from the ebook.
|
||||
|
||||
var epub = new EPub(...);
|
||||
...
|
||||
epub.getFile("css1", function(error, data, mimeType){});
|
||||
|
||||
|
196
epub.js
196
epub.js
@ -216,7 +216,7 @@ EPub.prototype.getRootFiles = function () {
|
||||
/**
|
||||
* EPub#handleRootFile() -> undefined
|
||||
*
|
||||
* Parser the rootfile XML and calls rootfile parser
|
||||
* Parses the rootfile XML and calls rootfile parser
|
||||
**/
|
||||
EPub.prototype.handleRootFile = function () {
|
||||
|
||||
@ -527,13 +527,105 @@ EPub.prototype.walkNavMap = function (branch, path, id_list, level) {
|
||||
* <head> etc. elements. Return only chapters with mime type application/xhtml+xml
|
||||
**/
|
||||
EPub.prototype.getChapter = function (id, callback) {
|
||||
var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest);
|
||||
path.pop();
|
||||
this.getChapterRaw(id, (function (err, str) {
|
||||
if (err) {
|
||||
callback(err);
|
||||
return;
|
||||
}
|
||||
|
||||
var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest);
|
||||
path.pop();
|
||||
|
||||
// remove linebreaks (no multi line matches in JS regex!)
|
||||
str = str.replace(/\r?\n/g, "\u0000");
|
||||
|
||||
// keep only <body> contents
|
||||
str.replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function (o, d) {
|
||||
str = d.trim();
|
||||
});
|
||||
|
||||
// remove <script> blocks if any
|
||||
str = str.replace(/<script[^>]*?>(.*?)<\/script[^>]*?>/ig, function (o, s) {
|
||||
return "";
|
||||
});
|
||||
|
||||
// remove <style> blocks if any
|
||||
str = str.replace(/<style[^>]*?>(.*?)<\/style[^>]*?>/ig, function (o, s) {
|
||||
return "";
|
||||
});
|
||||
|
||||
// remove onEvent handlers
|
||||
str = str.replace(/(\s)(on\w+)(\s*=\s*["']?[^"'\s>]*?["'\s>])/g, function (o, a, b, c) {
|
||||
return a + "skip-" + b + c;
|
||||
});
|
||||
|
||||
// replace images
|
||||
str = str.replace(/(\ssrc\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function (o, a, b, c) {
|
||||
var img = path.concat([b]).join("/").trim(),
|
||||
element;
|
||||
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
if (this.manifest[keys[i]].href == img) {
|
||||
element = this.manifest[keys[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// include only images from manifest
|
||||
if (element) {
|
||||
return a + this.imageroot + element.id + "/" + img + c;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
|
||||
}).bind(this));
|
||||
|
||||
// replace links
|
||||
str = str.replace(/(\shref\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function (o, a, b, c) {
|
||||
var linkparts = b && b.split("#"),
|
||||
link = path.concat([(linkparts.shift() || "")]).join("/").trim(),
|
||||
element;
|
||||
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
if (this.manifest[keys[i]].href.split("#")[0] == link) {
|
||||
element = this.manifest[keys[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (linkparts.length) {
|
||||
link += "#" + linkparts.join("#");
|
||||
}
|
||||
|
||||
// include only images from manifest
|
||||
if (element) {
|
||||
return a + this.linkroot + element.id + "/" + link + c;
|
||||
} else {
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
}).bind(this));
|
||||
|
||||
// bring back linebreaks
|
||||
str = str.replace(/\u0000/g, "\n").trim();
|
||||
|
||||
callback(null, str);
|
||||
}).bind(this));
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* EPub#getChapterRaw(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for a chapter
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Returns the raw chapter text for an id.
|
||||
**/
|
||||
EPub.prototype.getChapterRaw = function (id, callback) {
|
||||
if (this.manifest[id]) {
|
||||
|
||||
if ((this.manifest[id]['media-type'] || "").toLowerCase().trim() != "application/xhtml+xml") {
|
||||
return callback(new Error("Inavlid mime type for chapter"));
|
||||
return callback(new Error("Invalid mime type for chapter"));
|
||||
}
|
||||
|
||||
this.zip.readFile(this.manifest[id].href, (function (err, data) {
|
||||
@ -544,79 +636,6 @@ EPub.prototype.getChapter = function (id, callback) {
|
||||
|
||||
var str = data.toString("utf-8");
|
||||
|
||||
// remove linebreaks (no multi line matches in JS regex!)
|
||||
str = str.replace(/\r?\n/g, "\u0000");
|
||||
|
||||
// keep only <body> contents
|
||||
str.replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function (o, d) {
|
||||
str = d.trim();
|
||||
});
|
||||
|
||||
// remove <script> blocks if any
|
||||
str = str.replace(/<script[^>]*?>(.*?)<\/script[^>]*?>/ig, function (o, s) {
|
||||
return "";
|
||||
});
|
||||
|
||||
// remove <style> blocks if any
|
||||
str = str.replace(/<style[^>]*?>(.*?)<\/style[^>]*?>/ig, function (o, s) {
|
||||
return "";
|
||||
});
|
||||
|
||||
// remove onEvent handlers
|
||||
str = str.replace(/(\s)(on\w+)(\s*=\s*["']?[^"'\s>]*?["'\s>])/g, function (o, a, b, c) {
|
||||
return a + "skip-" + b + c;
|
||||
});
|
||||
|
||||
// replace images
|
||||
str = str.replace(/(\ssrc\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function (o, a, b, c) {
|
||||
var img = path.concat([b]).join("/").trim(),
|
||||
element;
|
||||
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
if (this.manifest[keys[i]].href == img) {
|
||||
element = this.manifest[keys[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// include only images from manifest
|
||||
if (element) {
|
||||
return a + this.imageroot + element.id + "/" + img + c;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
|
||||
}).bind(this));
|
||||
|
||||
// replace links
|
||||
str = str.replace(/(\shref\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (function (o, a, b, c) {
|
||||
var linkparts = b && b.split("#"),
|
||||
link = path.concat([(linkparts.shift() || "")]).join("/").trim(),
|
||||
element;
|
||||
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
if (this.manifest[keys[i]].href.split("#")[0] == link) {
|
||||
element = this.manifest[keys[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (linkparts.length) {
|
||||
link += "#" + linkparts.join("#");
|
||||
}
|
||||
|
||||
// include only images from manifest
|
||||
if (element) {
|
||||
return a + this.linkroot + element.id + "/" + link + c;
|
||||
} else {
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
}).bind(this));
|
||||
|
||||
// bring back linebreaks
|
||||
str = str.replace(/\u0000/g, "\n").trim();
|
||||
|
||||
callback(null, str);
|
||||
|
||||
}).bind(this));
|
||||
@ -631,7 +650,7 @@ EPub.prototype.getChapter = function (id, callback) {
|
||||
* - id (String): Manifest id value for an image
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Finds an image an id. Returns the image as Buffer. Callback gets
|
||||
* Finds an image for an id. Returns the image as Buffer. Callback gets
|
||||
* an error object, image buffer and image content-type.
|
||||
* Return only images with mime type image
|
||||
**/
|
||||
@ -639,9 +658,27 @@ EPub.prototype.getImage = function (id, callback) {
|
||||
if (this.manifest[id]) {
|
||||
|
||||
if ((this.manifest[id]['media-type'] || "").toLowerCase().trim().substr(0, 6) != "image/") {
|
||||
return callback(new Error("Inavlid mime type for image"));
|
||||
return callback(new Error("Invalid mime type for image"));
|
||||
}
|
||||
|
||||
this.getFile(id, callback);
|
||||
} else {
|
||||
callback(new Error("File not found"));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* EPub#getFile(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for a file
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Finds a file for an id. Returns the file as Buffer. Callback gets
|
||||
* an error object, file contents buffer and file content-type.
|
||||
**/
|
||||
EPub.prototype.getFile = function (id, callback) {
|
||||
if (this.manifest[id]) {
|
||||
|
||||
this.zip.readFile(this.manifest[id].href, (function (err, data) {
|
||||
if (err) {
|
||||
callback(new Error("Reading archive failed"));
|
||||
@ -655,5 +692,6 @@ EPub.prototype.getImage = function (id, callback) {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Expose to the world
|
||||
module.exports = EPub;
|
Reference in New Issue
Block a user