html replace update (skip script blocks etc.)
This commit is contained in:
parent
fd06139fcb
commit
e0f3931a19
3
.gitignore
vendored
3
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
*.epub
|
*.epub
|
||||||
|
test.*
|
37
epub.js
37
epub.js
@ -23,6 +23,16 @@ module.exports = EPub;
|
|||||||
* });
|
* });
|
||||||
* epub.on("error", function(error){ ... });
|
* epub.on("error", function(error){ ... });
|
||||||
* epub.parse();
|
* epub.parse();
|
||||||
|
*
|
||||||
|
* Image and link URL format is:
|
||||||
|
*
|
||||||
|
* imageroot + img_id + img_zip_path
|
||||||
|
*
|
||||||
|
* So an image "logo.jpg" which resides in "OPT/" in the zip archive
|
||||||
|
* and is listed in the manifest with id "logo_img" will have the
|
||||||
|
* following url (providing that imageroot is "/images/"):
|
||||||
|
*
|
||||||
|
* /images/logo_img/OPT/logo.jpg
|
||||||
**/
|
**/
|
||||||
function EPub(fname, imageroot, linkroot){
|
function EPub(fname, imageroot, linkroot){
|
||||||
EventEmitter.call(this);
|
EventEmitter.call(this);
|
||||||
@ -493,9 +503,27 @@ EPub.prototype.getChapter = function(id, callback){
|
|||||||
|
|
||||||
var str = data.toString("utf-8");
|
var str = data.toString("utf-8");
|
||||||
|
|
||||||
// strip <body>
|
// remove linebreaks (no multi line matches in JS regex!)
|
||||||
str.replace(/\n/g,"\u0000").replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function(o,d){
|
str = str.replace(/\r?\n/g,"\u0000");
|
||||||
str = d.replace(/\u0000/g,"\n").trim();
|
|
||||||
|
// keep only <body> contents
|
||||||
|
str.replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function(o,d){
|
||||||
|
str = d.trim();
|
||||||
|
});
|
||||||
|
|
||||||
|
// remove <script> blocks if any
|
||||||
|
str = str.replace(/<script[^>]*?>(.*?)<\/script[^>]*?>/ig, function(o, s){
|
||||||
|
return "";
|
||||||
|
});
|
||||||
|
|
||||||
|
// remove <style> blocks if any
|
||||||
|
str = str.replace(/<style[^>]*?>(.*?)<\/style[^>]*?>/ig, function(o, s){
|
||||||
|
return "";
|
||||||
|
});
|
||||||
|
|
||||||
|
// remove onEvent handlers
|
||||||
|
str = str.replace(/(\s)(on\w+)(\s*=\s*["']?[^"'\s>]*?["'\s>])/g, function(o, a,b,c){
|
||||||
|
return a + "skip-" + b + c;
|
||||||
});
|
});
|
||||||
|
|
||||||
// replace images
|
// replace images
|
||||||
@ -545,6 +573,9 @@ EPub.prototype.getChapter = function(id, callback){
|
|||||||
|
|
||||||
}).bind(this));
|
}).bind(this));
|
||||||
|
|
||||||
|
// bring back linebreaks
|
||||||
|
str = str.replace(/\u0000/g,"\n").trim();
|
||||||
|
|
||||||
callback(null, str);
|
callback(null, str);
|
||||||
|
|
||||||
}).bind(this));
|
}).bind(this));
|
||||||
|
Reference in New Issue
Block a user