html replace update (skip script blocks etc.)
This commit is contained in:
		
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1 +1,2 @@ | |||||||
| *.epub | *.epub | ||||||
|  | test.* | ||||||
							
								
								
									
										37
									
								
								epub.js
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								epub.js
									
									
									
									
									
								
							| @ -23,6 +23,16 @@ module.exports = EPub; | |||||||
|  *      }); |  *      }); | ||||||
|  *      epub.on("error", function(error){ ... }); |  *      epub.on("error", function(error){ ... }); | ||||||
|  *      epub.parse(); |  *      epub.parse(); | ||||||
|  |  * | ||||||
|  |  *  Image and link URL format is: | ||||||
|  |  * | ||||||
|  |  *      imageroot + img_id + img_zip_path | ||||||
|  |  * | ||||||
|  |  *  So an image "logo.jpg" which resides in "OPT/" in the zip archive | ||||||
|  |  *  and is listed in the manifest with id "logo_img" will have the  | ||||||
|  |  *  following url (providing that imageroot is "/images/"): | ||||||
|  |  * | ||||||
|  |  *      /images/logo_img/OPT/logo.jpg | ||||||
|  **/ |  **/ | ||||||
| function EPub(fname, imageroot, linkroot){ | function EPub(fname, imageroot, linkroot){ | ||||||
|     EventEmitter.call(this); |     EventEmitter.call(this); | ||||||
| @ -493,9 +503,27 @@ EPub.prototype.getChapter = function(id, callback){ | |||||||
|  |  | ||||||
|             var str = data.toString("utf-8"); |             var str = data.toString("utf-8"); | ||||||
|  |  | ||||||
|             // strip <body> |             // remove linebreaks (no multi line matches in JS regex!) | ||||||
|             str.replace(/\n/g,"\u0000").replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function(o,d){ |             str = str.replace(/\r?\n/g,"\u0000"); | ||||||
|                 str = d.replace(/\u0000/g,"\n").trim(); |  | ||||||
|  |             // keep only <body> contents | ||||||
|  |             str.replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function(o,d){ | ||||||
|  |                 str = d.trim(); | ||||||
|  |             }); | ||||||
|  |  | ||||||
|  |             // remove <script> blocks if any | ||||||
|  |             str = str.replace(/<script[^>]*?>(.*?)<\/script[^>]*?>/ig, function(o, s){ | ||||||
|  |                 return ""; | ||||||
|  |             }); | ||||||
|  |  | ||||||
|  |             // remove <style> blocks if any | ||||||
|  |             str = str.replace(/<style[^>]*?>(.*?)<\/style[^>]*?>/ig, function(o, s){ | ||||||
|  |                 return ""; | ||||||
|  |             }); | ||||||
|  |  | ||||||
|  |             // remove onEvent handlers | ||||||
|  |             str = str.replace(/(\s)(on\w+)(\s*=\s*["']?[^"'\s>]*?["'\s>])/g, function(o, a,b,c){ | ||||||
|  |                 return a + "skip-" + b + c; | ||||||
|             }); |             }); | ||||||
|  |  | ||||||
|             // replace images |             // replace images | ||||||
| @ -545,6 +573,9 @@ EPub.prototype.getChapter = function(id, callback){ | |||||||
|                  |                  | ||||||
|             }).bind(this)); |             }).bind(this)); | ||||||
|  |  | ||||||
|  |             // bring back linebreaks | ||||||
|  |             str = str.replace(/\u0000/g,"\n").trim(); | ||||||
|  |  | ||||||
|             callback(null, str); |             callback(null, str); | ||||||
|  |  | ||||||
|         }).bind(this)); |         }).bind(this)); | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	 andris9
					andris9