This commit is contained in:
andris9 2011-06-13 23:09:43 +03:00
parent 44df08632e
commit d0cd1048e6
7 changed files with 124 additions and 109 deletions

3
.gitignore vendored
View File

@ -1,2 +1,3 @@
*.epub
test.* test.*
server.*
tasuja.epub

View File

@ -1,3 +1,80 @@
#epub #epub
**epub** is a node.js module to parse EPUB electronic book files. **epub** is a node.js module to parse EPUB electronic book files.
**NB!** Only ebooks in UTF-8 are currently supported!.
## Installation
npm install epub
## Usage
var epub = new EPub(epubfile, imagewebroot, chapterwebroot);
Where
* **epubfile** is the file path to an EPUB file
* **imagewebroot** is the prefix for image URL's. If it's */images/* then the actual URL is going to be */images/IMG_ID/IMG_FILENAME*, `IMG_ID` can be used to fetch the image form the ebook with `getImage`
* **chapterwebroot** is the prefix for chapter URL's. If it's */chapter/* then the actual URL is going to be */chapters/CHAPTER_ID/CHAPTER_FILENAME*, `CHAPTER_ID` can be used to fetch the image form the ebook with `getChapter`
Before the contents of the ebook can be read, it must be opened (`EPub` is an `EventEmitter`).
epub.on("end", function(){
// epub is now usable
console.log(epub.metadata.title);
epub.getChapter("chapter_id", function(err, text){});
});
epub.parse();
## metadata
Property of the *epub* object that holds several metadata fields about the book.
epub = new EPub(...);
...
epub.metadata;
Available fields:
* **creator** Author of the book (if multiple authors, then the first on the list) (*Lewis Carroll*)
* **creatorFileAs** Author name on file (*Carroll, Lewis*)
* **title** Title of the book (*Alice's Adventures in Wonderland*)
* **language** Language code (*en* or *en-us* etc.)
* **subject** Topic of the book (*Fantasy*)
* **date** creation of the file (*2006-08-12*)
## flow
*flow* is a property of the *epub* object and holds the actual list of chapters (TOC is just an indication and can link to a # url inside a chapter file)
epub = new EPub(...);
...
epub.flow.forEach(function(chapter){
console.log(chapter.id);
});
Chapter `id` is needed to load the chapters `getChapter`
## toc
*flow* is a property of the *epub* object and indicates a list of titles/urls for the TOC. Actual chapter and it's ID needs to be detected with the `href` property
## getChapter(chapter_id, callback)
Load chapter text from the ebook.
var epub = new EPub(...);
...
epub.getChapter("chapter1", function(error, text){});
## getImage(image_id, callback)
Load image (as a Buffer value) from the ebook.
var epub = new EPub(...);
...
epub.getImage("image1", function(error, img, mimeType){});

View File

@ -60,6 +60,7 @@ EPub.prototype.parse = function () {
this.metadata = {}; this.metadata = {};
this.manifest = {}; this.manifest = {};
this.spine = {toc: false, contents: []}; this.spine = {toc: false, contents: []};
this.flow = [];
this.toc = []; this.toc = [];
this.open(); this.open();
@ -248,7 +249,6 @@ EPub.prototype.handleRootFile = function () {
EPub.prototype.parseRootFile = function (rootfile) { EPub.prototype.parseRootFile = function (rootfile) {
var i, len, keys, keyparts, key; var i, len, keys, keyparts, key;
keys = Object.keys(rootfile); keys = Object.keys(rootfile);
for (i = 0, len = keys.length; i < len; i++) { for (i = 0, len = keys.length; i < len; i++) {
keyparts = keys[i].split(":"); keyparts = keys[i].split(":");
@ -401,6 +401,9 @@ EPub.prototype.parseSpine = function (spine) {
} }
if (spine.itemref) { if (spine.itemref) {
if(!Array.isArray(spine.itemref)){
spine.itemref = [spine.itemref];
}
for (i = 0, len = spine.itemref.length; i < len; i++) { for (i = 0, len = spine.itemref.length; i < len; i++) {
if (spine.itemref[i]['@']) { if (spine.itemref[i]['@']) {
if (element = this.manifest[spine.itemref[i]['@'].idref]) { if (element = this.manifest[spine.itemref[i]['@'].idref]) {
@ -409,6 +412,7 @@ EPub.prototype.parseSpine = function (spine) {
} }
} }
} }
this.flow = this.spine.contents;
}; };
/** /**

BIN
example/alice.epub Normal file

Binary file not shown.

38
example/example.js Normal file
View File

@ -0,0 +1,38 @@
var EPub = require("../epub");
var epub = new EPub("alice.epub", "/imagewebroot/", "/articlewebroot/");
epub.on("error", function(err){
console.log("ERROR\n-----");
throw err;
});
epub.on("end", function(err){
console.log("METADATA:\n");
console.log(epub.metadata);
console.log("\nSPINE:\n");
console.log(epub.flow);
console.log("\nTOC:\n");
console.log(epub.toc);
// get first chapter
epub.getChapter(epub.spine.contents[0].id, function(err, data){
if(err){
console.log(err);
return;
}
console.log("\nFIRST CHAPTER:\n");
console.log(data.substr(0,512)+"..."); // first 512 bytes
});
/*
epub.getImage(image_id, function(err, data, mimeType){
console.log(err || data);
console.log(mimeType)
});
*/
});
epub.parse();

View File

@ -1,105 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>EPUB reader</title>
<style type="text/css">
body{
font-family: "luxi-serif-1","luxi-serif-2",serif;
}
h1, h2, h3, h4, h5{
text-align: center;
}
#contents{
width: 640px;
margin:10px auto;
padding: 20px;
background: #D3CDAC;
}
p{
text-align: justify;
}
img{
//display: block;
//margin: 5px 0;
max-width: 640px;
}
hr{
width: 90%;
height: 0px;
border:0;
border-top: 1px dashed #B75F14;
margin: 10px auto;
}
.chapter{
border-bottom: 5px dotted #604328;
padding-bottom: 10px;
margin-bottom: 10px;
}
.center{
text-align: center;
}
</style>
<script src="https://ajax.googleapis.com/ajax/libs/prototype/1.7.0.0/prototype.js" type="text/javascript"></script>
<script type="text/javascript">
$(document).observe("dom:loaded", function(){
new Ajax.Request("/contents",{
method:"post",
onComplete: function(response){
var contents = {toc:[], flow:[]};
try{
contents = response.responseText.evalJSON();
}catch(E){}
renderPage(contents);
}
});
});
function renderPage(contents){
if(!contents.flow.length)return;
var chapter = contents.flow.shift();;
new Ajax.Request("/chapter/"+chapter.id,{
method:"post",
onComplete: function(response){
$("contents").innerHTML += "<div class=\"chapter\">"+response.responseText+"</div>";
if(contents.flow.length){
renderPage(contents);
}
}
});
}
</script>
<script type="text/javascript" src="http://use.typekit.com/qbv6mfk.js"></script>
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
</head>
<body>
<div id="toc"></div>
<div id="contents"></div>
</body>
</html>