Using JSZip and make everything a promise
This commit is contained in:
168
lib/epub.d.ts
vendored
Normal file
168
lib/epub.d.ts
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
import * as JSZip from "jszip";
|
||||
/**
|
||||
* new EPub(fname[, imageroot][, linkroot])
|
||||
* - fname (String): filename for the ebook
|
||||
* - imageroot (String): URL prefix for images
|
||||
* - linkroot (String): URL prefix for links
|
||||
*
|
||||
* Creates an Event Emitter type object for parsing epub files
|
||||
*
|
||||
* var epub = new EPub("book.epub");
|
||||
* epub.on("end", function () {
|
||||
* console.log(epub.spine);
|
||||
* });
|
||||
* epub.on("error", function (error) { ... });
|
||||
* epub.parse();
|
||||
*
|
||||
* Image and link URL format is:
|
||||
*
|
||||
* imageroot + img_id + img_zip_path
|
||||
*
|
||||
* So an image "logo.jpg" which resides in "OPT/" in the zip archive
|
||||
* and is listed in the manifest with id "logo_img" will have the
|
||||
* following url (providing that imageroot is "/images/"):
|
||||
*
|
||||
* /images/logo_img/OPT/logo.jpg
|
||||
**/
|
||||
export declare class EPub {
|
||||
zip: JSZip;
|
||||
filename: string;
|
||||
imageroot: string;
|
||||
linkroot: string;
|
||||
containerFile: string;
|
||||
mimeFile: string;
|
||||
rootFile: string;
|
||||
metadata: {
|
||||
publisher?: string;
|
||||
language?: string;
|
||||
title?: string;
|
||||
subject?: string;
|
||||
description?: string;
|
||||
creator?: string;
|
||||
creatorFileAs?: string;
|
||||
date?: string;
|
||||
ISBN?: string;
|
||||
UUID?: string;
|
||||
};
|
||||
manifest: {};
|
||||
spine: {
|
||||
toc: any;
|
||||
contents: any[];
|
||||
};
|
||||
flow: any[];
|
||||
toc: any[];
|
||||
version: string;
|
||||
constructor(fname: any, imageroot: any, linkroot: any);
|
||||
/**
|
||||
* EPub#parse() -> undefined
|
||||
*
|
||||
* Starts the parser, needs to be called by the script
|
||||
**/
|
||||
parse(): Promise<void>;
|
||||
/**
|
||||
* EPub#open() -> undefined
|
||||
*
|
||||
* Opens the epub file with Zip unpacker, retrieves file listing
|
||||
* and runs mime type check
|
||||
**/
|
||||
open(): Promise<void>;
|
||||
/**
|
||||
* EPub#checkMimeType() -> undefined
|
||||
*
|
||||
* Checks if there's a file called "mimetype" and that it's contents
|
||||
* are "application/epub+zip". On success runs root file check.
|
||||
**/
|
||||
checkMimeType(): Promise<void>;
|
||||
/**
|
||||
* EPub#getRootFiles() -> undefined
|
||||
*
|
||||
* Looks for a "meta-inf/container.xml" file and searches for a
|
||||
* rootfile element with mime type "application/oebps-package+xml".
|
||||
* On success calls the rootfile parser
|
||||
**/
|
||||
getRootFiles(): Promise<void>;
|
||||
/**
|
||||
* EPub#handleRootFile() -> undefined
|
||||
*
|
||||
* Parses the rootfile XML and calls rootfile parser
|
||||
**/
|
||||
handleRootFile(): Promise<void>;
|
||||
/**
|
||||
* EPub#parseRootFile() -> undefined
|
||||
*
|
||||
* Parses elements "metadata," "manifest," "spine" and TOC.
|
||||
* Emits "end" if no TOC
|
||||
**/
|
||||
parseRootFile(rootfile: any): Promise<void>;
|
||||
/**
|
||||
* EPub#parseMetadata() -> undefined
|
||||
*
|
||||
* Parses "metadata" block (book metadata, title, author etc.)
|
||||
**/
|
||||
parseMetadata(metadata: any): void;
|
||||
/**
|
||||
* EPub#parseManifest() -> undefined
|
||||
*
|
||||
* Parses "manifest" block (all items included, html files, images, styles)
|
||||
**/
|
||||
parseManifest(manifest: any): void;
|
||||
/**
|
||||
* EPub#parseSpine() -> undefined
|
||||
*
|
||||
* Parses "spine" block (all html elements that are shown to the reader)
|
||||
**/
|
||||
parseSpine(spine: any): void;
|
||||
/**
|
||||
* EPub#parseTOC() -> undefined
|
||||
*
|
||||
* Parses ncx file for table of contents (title, html file)
|
||||
**/
|
||||
parseTOC(): Promise<void>;
|
||||
/**
|
||||
* EPub#walkNavMap(branch, path, id_list,[, level]) -> Array
|
||||
* - branch (Array | Object): NCX NavPoint object
|
||||
* - path (Array): Base path
|
||||
* - id_list (Object): map of file paths and id values
|
||||
* - level (Number): deepness
|
||||
*
|
||||
* Walks the NavMap object through all levels and finds elements
|
||||
* for TOC
|
||||
**/
|
||||
walkNavMap(branch: any, path: any, id_list: any, level?: any): any[];
|
||||
/**
|
||||
* EPub#getChapter(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for a chapter
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Finds a chapter text for an id. Replaces image and link URL's, removes
|
||||
* <head> etc. elements. Return only chapters with mime type application/xhtml+xml
|
||||
**/
|
||||
getChapter(id: any): Promise<string>;
|
||||
/**
|
||||
* EPub#getChapterRaw(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for a chapter
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Returns the raw chapter text for an id.
|
||||
**/
|
||||
getChapterRaw(id: any): Promise<string>;
|
||||
/**
|
||||
* EPub#getImage(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for an image
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Finds an image for an id. Returns the image as Buffer. Callback gets
|
||||
* an error object, image buffer and image content-type.
|
||||
* Return only images with mime type image
|
||||
**/
|
||||
getImage(id: any): Promise<void>;
|
||||
/**
|
||||
* EPub#getFile(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for a file
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Finds a file for an id. Returns the file as Buffer. Callback gets
|
||||
* an error object, file contents buffer and file content-type.
|
||||
**/
|
||||
getFile(id: any): Promise<void>;
|
||||
}
|
651
lib/epub.js
Normal file
651
lib/epub.js
Normal file
@ -0,0 +1,651 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
var xml2js = require('xml2js');
|
||||
var xml2jsOptions = xml2js.defaults['0.1'];
|
||||
var EventEmitter = require('events').EventEmitter;
|
||||
// try {
|
||||
// // zipfile is an optional dependency:
|
||||
// var ZipFile = require("zipfile").ZipFile;
|
||||
// } catch (err) {
|
||||
// // Mock zipfile using pure-JS adm-zip:
|
||||
// var AdmZip = require('adm-zip');
|
||||
// var ZipFile = function (filename) {
|
||||
// this.admZip = new AdmZip(filename);
|
||||
// this.names = this.admZip.getEntries().map(function (zipEntry) {
|
||||
// return zipEntry.entryName;
|
||||
// });
|
||||
// this.count = this.names.length;
|
||||
// };
|
||||
// ZipFile.prototype.readFile = function (name, cb) {
|
||||
// this.admZip.readFileAsync(this.admZip.getEntry(name), function (buffer, error) {
|
||||
// // `error` is bogus right now, so let's just drop it.
|
||||
// // see https://github.com/cthackers/adm-zip/pull/88
|
||||
// return cb(null, buffer);
|
||||
// });
|
||||
// };
|
||||
// }
|
||||
const fs = require("fs");
|
||||
const JSZip = require("jszip");
|
||||
//TODO: Cache parsed data
|
||||
/**
|
||||
* new EPub(fname[, imageroot][, linkroot])
|
||||
* - fname (String): filename for the ebook
|
||||
* - imageroot (String): URL prefix for images
|
||||
* - linkroot (String): URL prefix for links
|
||||
*
|
||||
* Creates an Event Emitter type object for parsing epub files
|
||||
*
|
||||
* var epub = new EPub("book.epub");
|
||||
* epub.on("end", function () {
|
||||
* console.log(epub.spine);
|
||||
* });
|
||||
* epub.on("error", function (error) { ... });
|
||||
* epub.parse();
|
||||
*
|
||||
* Image and link URL format is:
|
||||
*
|
||||
* imageroot + img_id + img_zip_path
|
||||
*
|
||||
* So an image "logo.jpg" which resides in "OPT/" in the zip archive
|
||||
* and is listed in the manifest with id "logo_img" will have the
|
||||
* following url (providing that imageroot is "/images/"):
|
||||
*
|
||||
* /images/logo_img/OPT/logo.jpg
|
||||
**/
|
||||
class EPub {
|
||||
constructor(fname, imageroot, linkroot) {
|
||||
this.metadata = {};
|
||||
this.manifest = {};
|
||||
this.spine = { toc: undefined, contents: [] };
|
||||
this.flow = [];
|
||||
this.toc = [];
|
||||
this.filename = fname;
|
||||
this.imageroot = (imageroot || "/images/").trim();
|
||||
this.linkroot = (linkroot || "/links/").trim();
|
||||
if (this.imageroot.substr(-1) != "/") {
|
||||
this.imageroot += "/";
|
||||
}
|
||||
if (this.linkroot.substr(-1) != "/") {
|
||||
this.linkroot += "/";
|
||||
}
|
||||
}
|
||||
/**
|
||||
* EPub#parse() -> undefined
|
||||
*
|
||||
* Starts the parser, needs to be called by the script
|
||||
**/
|
||||
async parse() {
|
||||
this.containerFile = undefined;
|
||||
this.mimeFile = undefined;
|
||||
this.rootFile = undefined;
|
||||
this.metadata = {};
|
||||
this.manifest = {};
|
||||
this.spine = { toc: undefined, contents: [] };
|
||||
this.flow = [];
|
||||
this.toc = [];
|
||||
await this.open();
|
||||
}
|
||||
/**
|
||||
* EPub#open() -> undefined
|
||||
*
|
||||
* Opens the epub file with Zip unpacker, retrieves file listing
|
||||
* and runs mime type check
|
||||
**/
|
||||
async open() {
|
||||
this.zip = await new Promise((resolve, reject) => {
|
||||
fs.readFile(this.filename, (err, data) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
}
|
||||
else {
|
||||
resolve(data);
|
||||
}
|
||||
});
|
||||
}).then((data) => {
|
||||
return JSZip.loadAsync(data);
|
||||
});
|
||||
if (Object.keys(this.zip.files).length < 1) {
|
||||
throw new Error("No files in archive");
|
||||
}
|
||||
await this.checkMimeType();
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#checkMimeType() -> undefined
|
||||
*
|
||||
* Checks if there's a file called "mimetype" and that it's contents
|
||||
* are "application/epub+zip". On success runs root file check.
|
||||
**/
|
||||
async checkMimeType() {
|
||||
var i, len;
|
||||
for (let file in this.zip.files) {
|
||||
if (file.toLowerCase() == "mimetype") {
|
||||
this.mimeFile = file;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!this.mimeFile) {
|
||||
throw new Error("No mimetype file in archive");
|
||||
}
|
||||
let data = await this.zip.file(this.mimeFile).async("nodebuffer");
|
||||
let txt = data.toString("utf-8").toLowerCase().trim();
|
||||
if (txt != "application/epub+zip") {
|
||||
throw new Error("Unsupported mime type");
|
||||
}
|
||||
await this.getRootFiles();
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#getRootFiles() -> undefined
|
||||
*
|
||||
* Looks for a "meta-inf/container.xml" file and searches for a
|
||||
* rootfile element with mime type "application/oebps-package+xml".
|
||||
* On success calls the rootfile parser
|
||||
**/
|
||||
async getRootFiles() {
|
||||
for (let file in this.zip.files) {
|
||||
if (file.toLowerCase() == "meta-inf/container.xml") {
|
||||
this.containerFile = file;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!this.containerFile)
|
||||
throw new Error("No container file in archive");
|
||||
let data = await this.zip.files[this.containerFile].async("nodebuffer");
|
||||
var xml = data.toString("utf-8").toLowerCase().trim(), xmlparser = new xml2js.Parser(xml2jsOptions);
|
||||
let res = await new Promise((resolve, reject) => {
|
||||
xmlparser.on("end", (result) => {
|
||||
if (!result.rootfiles || !result.rootfiles.rootfile) {
|
||||
reject(new Error("No rootfiles found"));
|
||||
console.dir(result);
|
||||
return;
|
||||
}
|
||||
var rootfile = result.rootfiles.rootfile, filename = undefined, i, len;
|
||||
if (Array.isArray(rootfile)) {
|
||||
for (i = 0, len = rootfile.length; i < len; i++) {
|
||||
if (rootfile[i]["@"]["media-type"] &&
|
||||
rootfile[i]["@"]["media-type"] == "application/oebps-package+xml" &&
|
||||
rootfile[i]["@"]["full-path"]) {
|
||||
filename = rootfile[i]["@"]["full-path"].toLowerCase().trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (rootfile["@"]) {
|
||||
if (rootfile["@"]["media-type"] != "application/oebps-package+xml" || !rootfile["@"]["full-path"]) {
|
||||
reject(new Error("Rootfile in unknown format"));
|
||||
return;
|
||||
}
|
||||
filename = rootfile["@"]["full-path"].toLowerCase().trim();
|
||||
}
|
||||
if (!filename) {
|
||||
reject(new Error("Empty rootfile"));
|
||||
return;
|
||||
}
|
||||
for (let file in this.zip.files) {
|
||||
if (file == filename) {
|
||||
this.rootFile = file;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!this.rootFile) {
|
||||
reject(new Error("Rootfile not found from archive"));
|
||||
return;
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
xmlparser.on("error", (err) => {
|
||||
reject(new Error("Parsing container XML failed"));
|
||||
return;
|
||||
});
|
||||
xmlparser.parseString(xml);
|
||||
});
|
||||
await this.handleRootFile();
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#handleRootFile() -> undefined
|
||||
*
|
||||
* Parses the rootfile XML and calls rootfile parser
|
||||
**/
|
||||
async handleRootFile() {
|
||||
let data = await this.zip.files[this.rootFile].async("nodebuffer");
|
||||
var xml = data.toString("utf-8");
|
||||
let rf = await new Promise((resolve, reject) => {
|
||||
let xmlparser = new xml2js.Parser(xml2jsOptions);
|
||||
xmlparser.on("end", (data) => {
|
||||
resolve(data);
|
||||
});
|
||||
xmlparser.on("error", err => {
|
||||
reject(err);
|
||||
});
|
||||
xmlparser.parseString(xml);
|
||||
});
|
||||
await this.parseRootFile(rf);
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#parseRootFile() -> undefined
|
||||
*
|
||||
* Parses elements "metadata," "manifest," "spine" and TOC.
|
||||
* Emits "end" if no TOC
|
||||
**/
|
||||
async parseRootFile(rootfile) {
|
||||
this.version = rootfile['@'].version || '2.0';
|
||||
var i, len, keys, keyparts, key;
|
||||
keys = Object.keys(rootfile);
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
keyparts = keys[i].split(":");
|
||||
key = (keyparts.pop() || "").toLowerCase().trim();
|
||||
switch (key) {
|
||||
case "metadata":
|
||||
this.parseMetadata(rootfile[keys[i]]);
|
||||
break;
|
||||
case "manifest":
|
||||
this.parseManifest(rootfile[keys[i]]);
|
||||
break;
|
||||
case "spine":
|
||||
this.parseSpine(rootfile[keys[i]]);
|
||||
break;
|
||||
case "guide":
|
||||
//this.parseGuide(rootfile[keys[i]]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (this.spine.toc) {
|
||||
await this.parseTOC();
|
||||
}
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#parseMetadata() -> undefined
|
||||
*
|
||||
* Parses "metadata" block (book metadata, title, author etc.)
|
||||
**/
|
||||
parseMetadata(metadata) {
|
||||
var i, j, len, keys, keyparts, key;
|
||||
keys = Object.keys(metadata);
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
keyparts = keys[i].split(":");
|
||||
key = (keyparts.pop() || "").toLowerCase().trim();
|
||||
switch (key) {
|
||||
case "publisher":
|
||||
if (Array.isArray(metadata[keys[i]])) {
|
||||
this.metadata.publisher = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
|
||||
}
|
||||
else {
|
||||
this.metadata.publisher = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
|
||||
}
|
||||
break;
|
||||
case "language":
|
||||
if (Array.isArray(metadata[keys[i]])) {
|
||||
this.metadata.language = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").toLowerCase().trim();
|
||||
}
|
||||
else {
|
||||
this.metadata.language = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").toLowerCase().trim();
|
||||
}
|
||||
break;
|
||||
case "title":
|
||||
if (Array.isArray(metadata[keys[i]])) {
|
||||
this.metadata.title = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
|
||||
}
|
||||
else {
|
||||
this.metadata.title = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
|
||||
}
|
||||
break;
|
||||
case "subject":
|
||||
if (Array.isArray(metadata[keys[i]])) {
|
||||
this.metadata.subject = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
|
||||
}
|
||||
else {
|
||||
this.metadata.subject = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
|
||||
}
|
||||
break;
|
||||
case "description":
|
||||
if (Array.isArray(metadata[keys[i]])) {
|
||||
this.metadata.description = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
|
||||
}
|
||||
else {
|
||||
this.metadata.description = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
|
||||
}
|
||||
break;
|
||||
case "creator":
|
||||
if (Array.isArray(metadata[keys[i]])) {
|
||||
this.metadata.creator = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
|
||||
this.metadata.creatorFileAs = String(metadata[keys[i]][0] && metadata[keys[i]][0]['@'] && metadata[keys[i]][0]['@']["opf:file-as"] || this.metadata.creator).trim();
|
||||
}
|
||||
else {
|
||||
this.metadata.creator = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
|
||||
this.metadata.creatorFileAs = String(metadata[keys[i]]['@'] && metadata[keys[i]]['@']["opf:file-as"] || this.metadata.creator).trim();
|
||||
}
|
||||
break;
|
||||
case "date":
|
||||
if (Array.isArray(metadata[keys[i]])) {
|
||||
this.metadata.date = String(metadata[keys[i]][0] && metadata[keys[i]][0]["#"] || metadata[keys[i]][0] || "").trim();
|
||||
}
|
||||
else {
|
||||
this.metadata.date = String(metadata[keys[i]]["#"] || metadata[keys[i]] || "").trim();
|
||||
}
|
||||
break;
|
||||
case "identifier":
|
||||
if (metadata[keys[i]]["@"] && metadata[keys[i]]["@"]["opf:scheme"] == "ISBN") {
|
||||
this.metadata.ISBN = String(metadata[keys[i]]["#"] || "").trim();
|
||||
}
|
||||
else if (metadata[keys[i]]["@"] && metadata[keys[i]]["@"].id && metadata[keys[i]]["@"].id.match(/uuid/i)) {
|
||||
this.metadata.UUID = String(metadata[keys[i]]["#"] || "").replace('urn:uuid:', '').toUpperCase().trim();
|
||||
}
|
||||
else if (Array.isArray(metadata[keys[i]])) {
|
||||
for (j = 0; j < metadata[keys[i]].length; j++) {
|
||||
if (metadata[keys[i]][j]["@"]) {
|
||||
if (metadata[keys[i]][j]["@"]["opf:scheme"] == "ISBN") {
|
||||
this.metadata.ISBN = String(metadata[keys[i]][j]["#"] || "").trim();
|
||||
}
|
||||
else if (metadata[keys[i]][j]["@"].id && metadata[keys[i]][j]["@"].id.match(/uuid/i)) {
|
||||
this.metadata.UUID = String(metadata[keys[i]][j]["#"] || "").replace('urn:uuid:', '').toUpperCase().trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
var metas = metadata['meta'] || {};
|
||||
Object.keys(metas).forEach((key) => {
|
||||
var meta = metas[key];
|
||||
if (meta['@'] && meta['@'].name) {
|
||||
var name = meta['@'].name;
|
||||
this.metadata[name] = meta['@'].content;
|
||||
}
|
||||
if (meta['#'] && meta['@'].property) {
|
||||
this.metadata[meta['@'].property] = meta['#'];
|
||||
}
|
||||
if (meta.name && meta.name == "cover") {
|
||||
this.metadata[meta.name] = meta.content;
|
||||
}
|
||||
}, this);
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#parseManifest() -> undefined
|
||||
*
|
||||
* Parses "manifest" block (all items included, html files, images, styles)
|
||||
**/
|
||||
parseManifest(manifest) {
|
||||
var i, len, path = this.rootFile.split("/"), element, path_str;
|
||||
path.pop();
|
||||
path_str = path.join("/");
|
||||
if (manifest.item) {
|
||||
for (i = 0, len = manifest.item.length; i < len; i++) {
|
||||
if (manifest.item[i]['@']) {
|
||||
element = manifest.item[i]['@'];
|
||||
if (element.href && element.href.substr(0, path_str.length) != path_str) {
|
||||
element.href = path.concat([element.href]).join("/");
|
||||
}
|
||||
this.manifest[manifest.item[i]['@'].id] = element;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#parseSpine() -> undefined
|
||||
*
|
||||
* Parses "spine" block (all html elements that are shown to the reader)
|
||||
**/
|
||||
parseSpine(spine) {
|
||||
var i, len, path = this.rootFile.split("/"), element;
|
||||
path.pop();
|
||||
if (spine['@'] && spine['@'].toc) {
|
||||
this.spine.toc = this.manifest[spine['@'].toc] || false;
|
||||
}
|
||||
if (spine.itemref) {
|
||||
if (!Array.isArray(spine.itemref)) {
|
||||
spine.itemref = [spine.itemref];
|
||||
}
|
||||
for (i = 0, len = spine.itemref.length; i < len; i++) {
|
||||
if (spine.itemref[i]['@']) {
|
||||
if (element = this.manifest[spine.itemref[i]['@'].idref]) {
|
||||
this.spine.contents.push(element);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this.flow = this.spine.contents;
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#parseTOC() -> undefined
|
||||
*
|
||||
* Parses ncx file for table of contents (title, html file)
|
||||
**/
|
||||
async parseTOC() {
|
||||
var i, len, path = this.spine.toc.href.split("/"), id_list = {}, keys;
|
||||
path.pop();
|
||||
keys = Object.keys(this.manifest);
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
id_list[this.manifest[keys[i]].href] = keys[i];
|
||||
}
|
||||
let data = await this.zip.files[this.spine.toc.href].async("nodebuffer");
|
||||
var xml = data.toString("utf-8");
|
||||
await new Promise((resolve, reject) => {
|
||||
let xmlparser = new xml2js.Parser(xml2jsOptions);
|
||||
xmlparser.on("end", result => {
|
||||
if (result.navMap && result.navMap.navPoint) {
|
||||
this.toc = this.walkNavMap(result.navMap.navPoint, path, id_list);
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
xmlparser.on("error", (err) => {
|
||||
reject(err);
|
||||
});
|
||||
xmlparser.parseString(xml);
|
||||
});
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#walkNavMap(branch, path, id_list,[, level]) -> Array
|
||||
* - branch (Array | Object): NCX NavPoint object
|
||||
* - path (Array): Base path
|
||||
* - id_list (Object): map of file paths and id values
|
||||
* - level (Number): deepness
|
||||
*
|
||||
* Walks the NavMap object through all levels and finds elements
|
||||
* for TOC
|
||||
**/
|
||||
walkNavMap(branch, path, id_list, level) {
|
||||
level = level || 0;
|
||||
// don't go too far
|
||||
if (level > 7) {
|
||||
return [];
|
||||
}
|
||||
var output = [];
|
||||
if (!Array.isArray(branch)) {
|
||||
branch = [branch];
|
||||
}
|
||||
for (var i = 0; i < branch.length; i++) {
|
||||
if (branch[i].navLabel) {
|
||||
var title = '';
|
||||
if (branch[i].navLabel && typeof branch[i].navLabel.text == 'string') {
|
||||
title = branch[i].navLabel && branch[i].navLabel.text || branch[i].navLabel === branch[i].navLabel ?
|
||||
(branch[i].navLabel && branch[i].navLabel.text || branch[i].navLabel || "").trim() : '';
|
||||
}
|
||||
var order = Number(branch[i]["@"] && branch[i]["@"].playOrder || 0);
|
||||
if (isNaN(order)) {
|
||||
order = 0;
|
||||
}
|
||||
var href = '';
|
||||
if (branch[i].content && branch[i].content["@"] && typeof branch[i].content["@"].src == 'string') {
|
||||
href = branch[i].content["@"].src.trim();
|
||||
}
|
||||
var element = {
|
||||
level: level,
|
||||
order: order,
|
||||
title: title,
|
||||
href: undefined,
|
||||
id: undefined
|
||||
};
|
||||
if (href) {
|
||||
href = path.concat([href]).join("/");
|
||||
element.href = href;
|
||||
if (id_list[element.href]) {
|
||||
// link existing object
|
||||
element = this.manifest[id_list[element.href]];
|
||||
element.title = title;
|
||||
element.order = order;
|
||||
element.level = level;
|
||||
}
|
||||
else {
|
||||
// use new one
|
||||
element.href = href;
|
||||
element.id = (branch[i]["@"] && branch[i]["@"].id || "").trim();
|
||||
}
|
||||
output.push(element);
|
||||
}
|
||||
}
|
||||
if (branch[i].navPoint) {
|
||||
output = output.concat(this.walkNavMap(branch[i].navPoint, path, id_list, level + 1));
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#getChapter(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for a chapter
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Finds a chapter text for an id. Replaces image and link URL's, removes
|
||||
* <head> etc. elements. Return only chapters with mime type application/xhtml+xml
|
||||
**/
|
||||
async getChapter(id) {
|
||||
let str = await this.getChapterRaw(id);
|
||||
var i, len, path = this.rootFile.split("/"), keys = Object.keys(this.manifest);
|
||||
path.pop();
|
||||
// remove linebreaks (no multi line matches in JS regex!)
|
||||
str = str.replace(/\r?\n/g, "\u0000");
|
||||
// keep only <body> contents
|
||||
str.replace(/<body[^>]*?>(.*)<\/body[^>]*?>/i, function (o, d) {
|
||||
str = d.trim();
|
||||
return "";
|
||||
});
|
||||
// remove <script> blocks if any
|
||||
str = str.replace(/<script[^>]*?>(.*?)<\/script[^>]*?>/ig, function (o, s) {
|
||||
return "";
|
||||
});
|
||||
// remove <style> blocks if any
|
||||
str = str.replace(/<style[^>]*?>(.*?)<\/style[^>]*?>/ig, function (o, s) {
|
||||
return "";
|
||||
});
|
||||
// remove onEvent handlers
|
||||
str = str.replace(/(\s)(on\w+)(\s*=\s*["']?[^"'\s>]*?["'\s>])/g, function (o, a, b, c) {
|
||||
return a + "skip-" + b + c;
|
||||
});
|
||||
// replace images
|
||||
str = str.replace(/(\ssrc\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (o, a, b, c) => {
|
||||
var img = path.concat([b]).join("/").trim(), element;
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
if (this.manifest[keys[i]].href == img) {
|
||||
element = this.manifest[keys[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
// include only images from manifest
|
||||
if (element) {
|
||||
return a + this.imageroot + element.id + "/" + img + c;
|
||||
}
|
||||
else {
|
||||
return "";
|
||||
}
|
||||
});
|
||||
// replace links
|
||||
str = str.replace(/(\shref\s*=\s*["']?)([^"'\s>]*?)(["'\s>])/g, (o, a, b, c) => {
|
||||
var linkparts = b && b.split("#"), link = path.concat([(linkparts.shift() || "")]).join("/").trim(), element;
|
||||
for (i = 0, len = keys.length; i < len; i++) {
|
||||
if (this.manifest[keys[i]].href.split("#")[0] == link) {
|
||||
element = this.manifest[keys[i]];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (linkparts.length) {
|
||||
link += "#" + linkparts.join("#");
|
||||
}
|
||||
// include only images from manifest
|
||||
if (element) {
|
||||
return a + this.linkroot + element.id + "/" + link + c;
|
||||
}
|
||||
else {
|
||||
return a + b + c;
|
||||
}
|
||||
});
|
||||
// bring back linebreaks
|
||||
str = str.replace(/\u0000/g, "\n").trim();
|
||||
return str;
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#getChapterRaw(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for a chapter
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Returns the raw chapter text for an id.
|
||||
**/
|
||||
async getChapterRaw(id) {
|
||||
if (this.manifest[id]) {
|
||||
if (!(this.manifest[id]['media-type'] == "application/xhtml+xml" || this.manifest[id]['media-type'] == "image/svg+xml")) {
|
||||
throw new Error("Invalid mime type for chapter");
|
||||
}
|
||||
return this.zip.files[this.manifest[id].href].async("nodebuffer").then(b => b.toString("utf8"));
|
||||
}
|
||||
else {
|
||||
throw new Error("File not found");
|
||||
}
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#getImage(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for an image
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Finds an image for an id. Returns the image as Buffer. Callback gets
|
||||
* an error object, image buffer and image content-type.
|
||||
* Return only images with mime type image
|
||||
**/
|
||||
async getImage(id) {
|
||||
if (this.manifest[id]) {
|
||||
if ((this.manifest[id]['media-type'] || "").toLowerCase().trim().substr(0, 6) != "image/") {
|
||||
throw new Error("Invalid mime type for image");
|
||||
}
|
||||
return this.getFile(id);
|
||||
}
|
||||
else {
|
||||
throw new Error("File not found");
|
||||
}
|
||||
}
|
||||
;
|
||||
/**
|
||||
* EPub#getFile(id, callback) -> undefined
|
||||
* - id (String): Manifest id value for a file
|
||||
* - callback (Function): callback function
|
||||
*
|
||||
* Finds a file for an id. Returns the file as Buffer. Callback gets
|
||||
* an error object, file contents buffer and file content-type.
|
||||
**/
|
||||
async getFile(id) {
|
||||
if (this.manifest[id]) {
|
||||
let data = await this.zip.files[this.manifest[id].href].async("nodebuffer");
|
||||
// this.zip.files(this.manifest[id].href, (function (err, data) {
|
||||
// if (err) {
|
||||
// callback(new Error("Reading archive failed"));
|
||||
// return;
|
||||
// }
|
||||
// callback(null, data, this.manifest[id]['media-type']);
|
||||
// }).bind(this));
|
||||
}
|
||||
else {
|
||||
throw new Error("File not found");
|
||||
}
|
||||
}
|
||||
;
|
||||
}
|
||||
exports.EPub = EPub;
|
||||
//# sourceMappingURL=epub.js.map
|
1
lib/epub.js.map
Normal file
1
lib/epub.js.map
Normal file
File diff suppressed because one or more lines are too long
1
lib/test.d.ts
vendored
Normal file
1
lib/test.d.ts
vendored
Normal file
@ -0,0 +1 @@
|
||||
export {};
|
9
lib/test.js
Normal file
9
lib/test.js
Normal file
@ -0,0 +1,9 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
const epub_1 = require("./epub");
|
||||
var epub = new epub_1.EPub("alice.epub", "/imagewebroot/", "/articlewebroot/");
|
||||
epub.parse().then(async () => {
|
||||
console.log(epub);
|
||||
console.log(await epub.getChapter("item32"));
|
||||
}).catch(err => console.error(err));
|
||||
//# sourceMappingURL=test.js.map
|
1
lib/test.js.map
Normal file
1
lib/test.js.map
Normal file
@ -0,0 +1 @@
|
||||
{"version":3,"file":"test.js","sourceRoot":"","sources":["../src/test.ts"],"names":[],"mappings":";;AAAA,iCAA8B;AAE9B,IAAI,IAAI,GAAG,IAAI,WAAI,CAAC,YAAY,EAAE,gBAAgB,EAAE,kBAAkB,CAAC,CAAC;AACxE,IAAI,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE;IAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClB,OAAO,CAAC,GAAG,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAA;AAC/C,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC"}
|
Reference in New Issue
Block a user