Improved cover extraction for epub files
This commit is contained in:
parent
4379669cf8
commit
8e2536c53b
3 changed files with 95 additions and 66 deletions
78
cps/comic.py
78
cps/comic.py
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
|
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
|
||||||
# Copyright (C) 2018 OzzieIsaacs
|
# Copyright (C) 2018-2022 OzzieIsaacs
|
||||||
#
|
#
|
||||||
# This program is free software: you can redistribute it and/or modify
|
# This program is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
@ -18,19 +18,16 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from . import logger, isoLanguages
|
from . import logger, isoLanguages, cover
|
||||||
from .constants import BookMeta
|
from .constants import BookMeta
|
||||||
|
|
||||||
|
|
||||||
log = logger.create()
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from wand.image import Image
|
from wand.image import Image
|
||||||
use_IM = True
|
use_IM = True
|
||||||
except (ImportError, RuntimeError) as e:
|
except (ImportError, RuntimeError) as e:
|
||||||
use_IM = False
|
use_IM = False
|
||||||
|
|
||||||
|
log = logger.create()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from comicapi.comicarchive import ComicArchive, MetaDataStyle
|
from comicapi.comicarchive import ComicArchive, MetaDataStyle
|
||||||
|
@ -51,29 +48,8 @@ except (ImportError, LookupError) as e:
|
||||||
use_rarfile = False
|
use_rarfile = False
|
||||||
use_comic_meta = False
|
use_comic_meta = False
|
||||||
|
|
||||||
NO_JPEG_EXTENSIONS = ['.png', '.webp', '.bmp']
|
|
||||||
COVER_EXTENSIONS = ['.png', '.webp', '.bmp', '.jpg', '.jpeg']
|
|
||||||
|
|
||||||
def _cover_processing(tmp_file_name, img, extension):
|
def _extract_cover_from_archive(original_file_extension, tmp_file_name, rar_executable):
|
||||||
tmp_cover_name = os.path.join(os.path.dirname(tmp_file_name), 'cover.jpg')
|
|
||||||
if extension in NO_JPEG_EXTENSIONS:
|
|
||||||
if use_IM:
|
|
||||||
with Image(blob=img) as imgc:
|
|
||||||
imgc.format = 'jpeg'
|
|
||||||
imgc.transform_colorspace('rgb')
|
|
||||||
imgc.save(filename=tmp_cover_name)
|
|
||||||
return tmp_cover_name
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
if img:
|
|
||||||
with open(tmp_cover_name, 'wb') as f:
|
|
||||||
f.write(img)
|
|
||||||
return tmp_cover_name
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_Cover_from_archive(original_file_extension, tmp_file_name, rarExecutable):
|
|
||||||
cover_data = extension = None
|
cover_data = extension = None
|
||||||
if original_file_extension.upper() == '.CBZ':
|
if original_file_extension.upper() == '.CBZ':
|
||||||
cf = zipfile.ZipFile(tmp_file_name)
|
cf = zipfile.ZipFile(tmp_file_name)
|
||||||
|
@ -81,7 +57,7 @@ def _extract_Cover_from_archive(original_file_extension, tmp_file_name, rarExecu
|
||||||
ext = os.path.splitext(name)
|
ext = os.path.splitext(name)
|
||||||
if len(ext) > 1:
|
if len(ext) > 1:
|
||||||
extension = ext[1].lower()
|
extension = ext[1].lower()
|
||||||
if extension in COVER_EXTENSIONS:
|
if extension in cover.COVER_EXTENSIONS:
|
||||||
cover_data = cf.read(name)
|
cover_data = cf.read(name)
|
||||||
break
|
break
|
||||||
elif original_file_extension.upper() == '.CBT':
|
elif original_file_extension.upper() == '.CBT':
|
||||||
|
@ -90,44 +66,44 @@ def _extract_Cover_from_archive(original_file_extension, tmp_file_name, rarExecu
|
||||||
ext = os.path.splitext(name)
|
ext = os.path.splitext(name)
|
||||||
if len(ext) > 1:
|
if len(ext) > 1:
|
||||||
extension = ext[1].lower()
|
extension = ext[1].lower()
|
||||||
if extension in COVER_EXTENSIONS:
|
if extension in cover.COVER_EXTENSIONS:
|
||||||
cover_data = cf.extractfile(name).read()
|
cover_data = cf.extractfile(name).read()
|
||||||
break
|
break
|
||||||
elif original_file_extension.upper() == '.CBR' and use_rarfile:
|
elif original_file_extension.upper() == '.CBR' and use_rarfile:
|
||||||
try:
|
try:
|
||||||
rarfile.UNRAR_TOOL = rarExecutable
|
rarfile.UNRAR_TOOL = rar_executable
|
||||||
cf = rarfile.RarFile(tmp_file_name)
|
cf = rarfile.RarFile(tmp_file_name)
|
||||||
for name in cf.namelist():
|
for name in cf.namelist():
|
||||||
ext = os.path.splitext(name)
|
ext = os.path.splitext(name)
|
||||||
if len(ext) > 1:
|
if len(ext) > 1:
|
||||||
extension = ext[1].lower()
|
extension = ext[1].lower()
|
||||||
if extension in COVER_EXTENSIONS:
|
if extension in cover.COVER_EXTENSIONS:
|
||||||
cover_data = cf.read(name)
|
cover_data = cf.read(name)
|
||||||
break
|
break
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
log.debug('Rarfile failed with error: %s', ex)
|
log.debug('Rarfile failed with error: {}'.format(ex))
|
||||||
return cover_data, extension
|
return cover_data, extension
|
||||||
|
|
||||||
|
|
||||||
def _extractCover(tmp_file_name, original_file_extension, rarExecutable):
|
def _extract_cover(tmp_file_name, original_file_extension, rar_executable):
|
||||||
cover_data = extension = None
|
cover_data = extension = None
|
||||||
if use_comic_meta:
|
if use_comic_meta:
|
||||||
archive = ComicArchive(tmp_file_name, rar_exe_path=rarExecutable)
|
archive = ComicArchive(tmp_file_name, rar_exe_path=rar_executable)
|
||||||
for index, name in enumerate(archive.getPageNameList()):
|
for index, name in enumerate(archive.getPageNameList()):
|
||||||
ext = os.path.splitext(name)
|
ext = os.path.splitext(name)
|
||||||
if len(ext) > 1:
|
if len(ext) > 1:
|
||||||
extension = ext[1].lower()
|
extension = ext[1].lower()
|
||||||
if extension in COVER_EXTENSIONS:
|
if extension in cover.COVER_EXTENSIONS:
|
||||||
cover_data = archive.getPage(index)
|
cover_data = archive.getPage(index)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
cover_data, extension = _extract_Cover_from_archive(original_file_extension, tmp_file_name, rarExecutable)
|
cover_data, extension = _extract_cover_from_archive(original_file_extension, tmp_file_name, rar_executable)
|
||||||
return _cover_processing(tmp_file_name, cover_data, extension)
|
return cover.cover_processing(tmp_file_name, cover_data, extension)
|
||||||
|
|
||||||
|
|
||||||
def get_comic_info(tmp_file_path, original_file_name, original_file_extension, rarExecutable):
|
def get_comic_info(tmp_file_path, original_file_name, original_file_extension, rar_executable):
|
||||||
if use_comic_meta:
|
if use_comic_meta:
|
||||||
archive = ComicArchive(tmp_file_path, rar_exe_path=rarExecutable)
|
archive = ComicArchive(tmp_file_path, rar_exe_path=rar_executable)
|
||||||
if archive.seemsToBeAComicArchive():
|
if archive.seemsToBeAComicArchive():
|
||||||
if archive.hasMetadata(MetaDataStyle.CIX):
|
if archive.hasMetadata(MetaDataStyle.CIX):
|
||||||
style = MetaDataStyle.CIX
|
style = MetaDataStyle.CIX
|
||||||
|
@ -137,23 +113,23 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
|
||||||
style = None
|
style = None
|
||||||
|
|
||||||
# if style is not None:
|
# if style is not None:
|
||||||
loadedMetadata = archive.readMetadata(style)
|
loaded_metadata = archive.readMetadata(style)
|
||||||
|
|
||||||
lang = loadedMetadata.language or ""
|
lang = loaded_metadata.language or ""
|
||||||
loadedMetadata.language = isoLanguages.get_lang3(lang)
|
loaded_metadata.language = isoLanguages.get_lang3(lang)
|
||||||
|
|
||||||
return BookMeta(
|
return BookMeta(
|
||||||
file_path=tmp_file_path,
|
file_path=tmp_file_path,
|
||||||
extension=original_file_extension,
|
extension=original_file_extension,
|
||||||
title=loadedMetadata.title or original_file_name,
|
title=loaded_metadata.title or original_file_name,
|
||||||
author=" & ".join([credit["person"]
|
author=" & ".join([credit["person"]
|
||||||
for credit in loadedMetadata.credits if credit["role"] == "Writer"]) or u'Unknown',
|
for credit in loaded_metadata.credits if credit["role"] == "Writer"]) or 'Unknown',
|
||||||
cover=_extractCover(tmp_file_path, original_file_extension, rarExecutable),
|
cover=_extract_cover(tmp_file_path, original_file_extension, rar_executable),
|
||||||
description=loadedMetadata.comments or "",
|
description=loaded_metadata.comments or "",
|
||||||
tags="",
|
tags="",
|
||||||
series=loadedMetadata.series or "",
|
series=loaded_metadata.series or "",
|
||||||
series_id=loadedMetadata.issue or "",
|
series_id=loaded_metadata.issue or "",
|
||||||
languages=loadedMetadata.language,
|
languages=loaded_metadata.language,
|
||||||
publisher="")
|
publisher="")
|
||||||
|
|
||||||
return BookMeta(
|
return BookMeta(
|
||||||
|
@ -161,7 +137,7 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
|
||||||
extension=original_file_extension,
|
extension=original_file_extension,
|
||||||
title=original_file_name,
|
title=original_file_name,
|
||||||
author=u'Unknown',
|
author=u'Unknown',
|
||||||
cover=_extractCover(tmp_file_path, original_file_extension, rarExecutable),
|
cover=_extract_cover(tmp_file_path, original_file_extension, rar_executable),
|
||||||
description="",
|
description="",
|
||||||
tags="",
|
tags="",
|
||||||
series="",
|
series="",
|
||||||
|
|
48
cps/cover.py
Normal file
48
cps/cover.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
|
||||||
|
# Copyright (C) 2022 OzzieIsaacs
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
try:
|
||||||
|
from wand.image import Image
|
||||||
|
use_IM = True
|
||||||
|
except (ImportError, RuntimeError) as e:
|
||||||
|
use_IM = False
|
||||||
|
|
||||||
|
|
||||||
|
NO_JPEG_EXTENSIONS = ['.png', '.webp', '.bmp']
|
||||||
|
COVER_EXTENSIONS = ['.png', '.webp', '.bmp', '.jpg', '.jpeg']
|
||||||
|
|
||||||
|
|
||||||
|
def cover_processing(tmp_file_name, img, extension):
|
||||||
|
tmp_cover_name = os.path.join(os.path.dirname(tmp_file_name), 'cover.jpg')
|
||||||
|
if extension in NO_JPEG_EXTENSIONS:
|
||||||
|
if use_IM:
|
||||||
|
with Image(blob=img) as imgc:
|
||||||
|
imgc.format = 'jpeg'
|
||||||
|
imgc.transform_colorspace('rgb')
|
||||||
|
imgc.save(filename=tmp_cover_name)
|
||||||
|
return tmp_cover_name
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
if img:
|
||||||
|
with open(tmp_cover_name, 'wb') as f:
|
||||||
|
f.write(img)
|
||||||
|
return tmp_cover_name
|
||||||
|
else:
|
||||||
|
return None
|
35
cps/epub.py
35
cps/epub.py
|
@ -20,23 +20,26 @@ import os
|
||||||
import zipfile
|
import zipfile
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from . import isoLanguages
|
from . import isoLanguages, cover
|
||||||
from .helper import split_authors
|
from .helper import split_authors
|
||||||
from .constants import BookMeta
|
from .constants import BookMeta
|
||||||
|
|
||||||
|
|
||||||
def extract_cover(zip_file, cover_file, cover_path, tmp_file_name):
|
def _extract_cover(zip_file, cover_file, cover_path, tmp_file_name):
|
||||||
if cover_file is None:
|
if cover_file is None:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
|
cf = extension = None
|
||||||
zip_cover_path = os.path.join(cover_path, cover_file).replace('\\', '/')
|
zip_cover_path = os.path.join(cover_path, cover_file).replace('\\', '/')
|
||||||
cf = zip_file.read(zip_cover_path)
|
|
||||||
prefix = os.path.splitext(tmp_file_name)[0]
|
prefix = os.path.splitext(tmp_file_name)[0]
|
||||||
tmp_cover_name = prefix + '.' + os.path.basename(zip_cover_path)
|
tmp_cover_name = prefix + '.' + os.path.basename(zip_cover_path)
|
||||||
image = open(tmp_cover_name, 'wb')
|
ext = os.path.splitext(tmp_cover_name)
|
||||||
image.write(cf)
|
if len(ext) > 1:
|
||||||
image.close()
|
extension = ext[1].lower()
|
||||||
return tmp_cover_name
|
if extension in cover.COVER_EXTENSIONS:
|
||||||
|
cf = zip_file.read(zip_cover_path)
|
||||||
|
return cover.cover_processing(tmp_file_name, cf, extension)
|
||||||
|
|
||||||
|
|
||||||
def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
|
def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
|
||||||
|
@ -70,9 +73,9 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
|
||||||
else:
|
else:
|
||||||
epub_metadata[s] = tmp[0]
|
epub_metadata[s] = tmp[0]
|
||||||
else:
|
else:
|
||||||
epub_metadata[s] = u'Unknown'
|
epub_metadata[s] = 'Unknown'
|
||||||
|
|
||||||
if epub_metadata['subject'] == u'Unknown':
|
if epub_metadata['subject'] == 'Unknown':
|
||||||
epub_metadata['subject'] = ''
|
epub_metadata['subject'] = ''
|
||||||
|
|
||||||
if epub_metadata['description'] == u'Unknown':
|
if epub_metadata['description'] == u'Unknown':
|
||||||
|
@ -112,7 +115,7 @@ def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path):
|
||||||
cover_section = tree.xpath("/pkg:package/pkg:manifest/pkg:item[@id='cover-image']/@href", namespaces=ns)
|
cover_section = tree.xpath("/pkg:package/pkg:manifest/pkg:item[@id='cover-image']/@href", namespaces=ns)
|
||||||
cover_file = None
|
cover_file = None
|
||||||
if len(cover_section) > 0:
|
if len(cover_section) > 0:
|
||||||
cover_file = extract_cover(epub_zip, cover_section[0], cover_path, tmp_file_path)
|
cover_file = _extract_cover(epub_zip, cover_section[0], cover_path, tmp_file_path)
|
||||||
else:
|
else:
|
||||||
meta_cover = tree.xpath("/pkg:package/pkg:metadata/pkg:meta[@name='cover']/@content", namespaces=ns)
|
meta_cover = tree.xpath("/pkg:package/pkg:metadata/pkg:meta[@name='cover']/@content", namespaces=ns)
|
||||||
if len(meta_cover) > 0:
|
if len(meta_cover) > 0:
|
||||||
|
@ -123,10 +126,10 @@ def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path):
|
||||||
"/pkg:package/pkg:manifest/pkg:item[@properties='" + meta_cover[0] + "']/@href", namespaces=ns)
|
"/pkg:package/pkg:manifest/pkg:item[@properties='" + meta_cover[0] + "']/@href", namespaces=ns)
|
||||||
else:
|
else:
|
||||||
cover_section = tree.xpath("/pkg:package/pkg:guide/pkg:reference/@href", namespaces=ns)
|
cover_section = tree.xpath("/pkg:package/pkg:guide/pkg:reference/@href", namespaces=ns)
|
||||||
if len(cover_section) > 0:
|
for cs in cover_section:
|
||||||
filetype = cover_section[0].rsplit('.', 1)[-1]
|
filetype = cs.rsplit('.', 1)[-1]
|
||||||
if filetype == "xhtml" or filetype == "html": # if cover is (x)html format
|
if filetype == "xhtml" or filetype == "html": # if cover is (x)html format
|
||||||
markup = epub_zip.read(os.path.join(cover_path, cover_section[0]))
|
markup = epub_zip.read(os.path.join(cover_path, cs))
|
||||||
markup_tree = etree.fromstring(markup)
|
markup_tree = etree.fromstring(markup)
|
||||||
# no matter xhtml or html with no namespace
|
# no matter xhtml or html with no namespace
|
||||||
img_src = markup_tree.xpath("//*[local-name() = 'img']/@src")
|
img_src = markup_tree.xpath("//*[local-name() = 'img']/@src")
|
||||||
|
@ -137,9 +140,11 @@ def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path):
|
||||||
# img_src maybe start with "../"" so fullpath join then relpath to cwd
|
# img_src maybe start with "../"" so fullpath join then relpath to cwd
|
||||||
filename = os.path.relpath(os.path.join(os.path.dirname(os.path.join(cover_path, cover_section[0])),
|
filename = os.path.relpath(os.path.join(os.path.dirname(os.path.join(cover_path, cover_section[0])),
|
||||||
img_src[0]))
|
img_src[0]))
|
||||||
cover_file = extract_cover(epub_zip, filename, "", tmp_file_path)
|
cover_file = _extract_cover(epub_zip, filename, "", tmp_file_path)
|
||||||
else:
|
else:
|
||||||
cover_file = extract_cover(epub_zip, cover_section[0], cover_path, tmp_file_path)
|
cover_file = _extract_cover(epub_zip, cs, cover_path, tmp_file_path)
|
||||||
|
if cover_file:
|
||||||
|
break
|
||||||
return cover_file
|
return cover_file
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue