Improved cover extraction for epub files
This commit is contained in:
parent
4379669cf8
commit
8e2536c53b
3 changed files with 95 additions and 66 deletions
78
cps/comic.py
78
cps/comic.py
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
|
||||
# Copyright (C) 2018 OzzieIsaacs
|
||||
# Copyright (C) 2018-2022 OzzieIsaacs
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
@ -18,19 +18,16 @@
|
|||
|
||||
import os
|
||||
|
||||
from . import logger, isoLanguages
|
||||
from . import logger, isoLanguages, cover
|
||||
from .constants import BookMeta
|
||||
|
||||
|
||||
log = logger.create()
|
||||
|
||||
|
||||
try:
|
||||
from wand.image import Image
|
||||
use_IM = True
|
||||
except (ImportError, RuntimeError) as e:
|
||||
use_IM = False
|
||||
|
||||
log = logger.create()
|
||||
|
||||
try:
|
||||
from comicapi.comicarchive import ComicArchive, MetaDataStyle
|
||||
|
@ -51,29 +48,8 @@ except (ImportError, LookupError) as e:
|
|||
use_rarfile = False
|
||||
use_comic_meta = False
|
||||
|
||||
NO_JPEG_EXTENSIONS = ['.png', '.webp', '.bmp']
|
||||
COVER_EXTENSIONS = ['.png', '.webp', '.bmp', '.jpg', '.jpeg']
|
||||
|
||||
def _cover_processing(tmp_file_name, img, extension):
|
||||
tmp_cover_name = os.path.join(os.path.dirname(tmp_file_name), 'cover.jpg')
|
||||
if extension in NO_JPEG_EXTENSIONS:
|
||||
if use_IM:
|
||||
with Image(blob=img) as imgc:
|
||||
imgc.format = 'jpeg'
|
||||
imgc.transform_colorspace('rgb')
|
||||
imgc.save(filename=tmp_cover_name)
|
||||
return tmp_cover_name
|
||||
else:
|
||||
return None
|
||||
if img:
|
||||
with open(tmp_cover_name, 'wb') as f:
|
||||
f.write(img)
|
||||
return tmp_cover_name
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _extract_Cover_from_archive(original_file_extension, tmp_file_name, rarExecutable):
|
||||
def _extract_cover_from_archive(original_file_extension, tmp_file_name, rar_executable):
|
||||
cover_data = extension = None
|
||||
if original_file_extension.upper() == '.CBZ':
|
||||
cf = zipfile.ZipFile(tmp_file_name)
|
||||
|
@ -81,7 +57,7 @@ def _extract_Cover_from_archive(original_file_extension, tmp_file_name, rarExecu
|
|||
ext = os.path.splitext(name)
|
||||
if len(ext) > 1:
|
||||
extension = ext[1].lower()
|
||||
if extension in COVER_EXTENSIONS:
|
||||
if extension in cover.COVER_EXTENSIONS:
|
||||
cover_data = cf.read(name)
|
||||
break
|
||||
elif original_file_extension.upper() == '.CBT':
|
||||
|
@ -90,44 +66,44 @@ def _extract_Cover_from_archive(original_file_extension, tmp_file_name, rarExecu
|
|||
ext = os.path.splitext(name)
|
||||
if len(ext) > 1:
|
||||
extension = ext[1].lower()
|
||||
if extension in COVER_EXTENSIONS:
|
||||
if extension in cover.COVER_EXTENSIONS:
|
||||
cover_data = cf.extractfile(name).read()
|
||||
break
|
||||
elif original_file_extension.upper() == '.CBR' and use_rarfile:
|
||||
try:
|
||||
rarfile.UNRAR_TOOL = rarExecutable
|
||||
rarfile.UNRAR_TOOL = rar_executable
|
||||
cf = rarfile.RarFile(tmp_file_name)
|
||||
for name in cf.namelist():
|
||||
ext = os.path.splitext(name)
|
||||
if len(ext) > 1:
|
||||
extension = ext[1].lower()
|
||||
if extension in COVER_EXTENSIONS:
|
||||
if extension in cover.COVER_EXTENSIONS:
|
||||
cover_data = cf.read(name)
|
||||
break
|
||||
except Exception as ex:
|
||||
log.debug('Rarfile failed with error: %s', ex)
|
||||
log.debug('Rarfile failed with error: {}'.format(ex))
|
||||
return cover_data, extension
|
||||
|
||||
|
||||
def _extractCover(tmp_file_name, original_file_extension, rarExecutable):
|
||||
def _extract_cover(tmp_file_name, original_file_extension, rar_executable):
|
||||
cover_data = extension = None
|
||||
if use_comic_meta:
|
||||
archive = ComicArchive(tmp_file_name, rar_exe_path=rarExecutable)
|
||||
archive = ComicArchive(tmp_file_name, rar_exe_path=rar_executable)
|
||||
for index, name in enumerate(archive.getPageNameList()):
|
||||
ext = os.path.splitext(name)
|
||||
if len(ext) > 1:
|
||||
extension = ext[1].lower()
|
||||
if extension in COVER_EXTENSIONS:
|
||||
if extension in cover.COVER_EXTENSIONS:
|
||||
cover_data = archive.getPage(index)
|
||||
break
|
||||
else:
|
||||
cover_data, extension = _extract_Cover_from_archive(original_file_extension, tmp_file_name, rarExecutable)
|
||||
return _cover_processing(tmp_file_name, cover_data, extension)
|
||||
cover_data, extension = _extract_cover_from_archive(original_file_extension, tmp_file_name, rar_executable)
|
||||
return cover.cover_processing(tmp_file_name, cover_data, extension)
|
||||
|
||||
|
||||
def get_comic_info(tmp_file_path, original_file_name, original_file_extension, rarExecutable):
|
||||
def get_comic_info(tmp_file_path, original_file_name, original_file_extension, rar_executable):
|
||||
if use_comic_meta:
|
||||
archive = ComicArchive(tmp_file_path, rar_exe_path=rarExecutable)
|
||||
archive = ComicArchive(tmp_file_path, rar_exe_path=rar_executable)
|
||||
if archive.seemsToBeAComicArchive():
|
||||
if archive.hasMetadata(MetaDataStyle.CIX):
|
||||
style = MetaDataStyle.CIX
|
||||
|
@ -137,23 +113,23 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
|
|||
style = None
|
||||
|
||||
# if style is not None:
|
||||
loadedMetadata = archive.readMetadata(style)
|
||||
loaded_metadata = archive.readMetadata(style)
|
||||
|
||||
lang = loadedMetadata.language or ""
|
||||
loadedMetadata.language = isoLanguages.get_lang3(lang)
|
||||
lang = loaded_metadata.language or ""
|
||||
loaded_metadata.language = isoLanguages.get_lang3(lang)
|
||||
|
||||
return BookMeta(
|
||||
file_path=tmp_file_path,
|
||||
extension=original_file_extension,
|
||||
title=loadedMetadata.title or original_file_name,
|
||||
title=loaded_metadata.title or original_file_name,
|
||||
author=" & ".join([credit["person"]
|
||||
for credit in loadedMetadata.credits if credit["role"] == "Writer"]) or u'Unknown',
|
||||
cover=_extractCover(tmp_file_path, original_file_extension, rarExecutable),
|
||||
description=loadedMetadata.comments or "",
|
||||
for credit in loaded_metadata.credits if credit["role"] == "Writer"]) or 'Unknown',
|
||||
cover=_extract_cover(tmp_file_path, original_file_extension, rar_executable),
|
||||
description=loaded_metadata.comments or "",
|
||||
tags="",
|
||||
series=loadedMetadata.series or "",
|
||||
series_id=loadedMetadata.issue or "",
|
||||
languages=loadedMetadata.language,
|
||||
series=loaded_metadata.series or "",
|
||||
series_id=loaded_metadata.issue or "",
|
||||
languages=loaded_metadata.language,
|
||||
publisher="")
|
||||
|
||||
return BookMeta(
|
||||
|
@ -161,7 +137,7 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
|
|||
extension=original_file_extension,
|
||||
title=original_file_name,
|
||||
author=u'Unknown',
|
||||
cover=_extractCover(tmp_file_path, original_file_extension, rarExecutable),
|
||||
cover=_extract_cover(tmp_file_path, original_file_extension, rar_executable),
|
||||
description="",
|
||||
tags="",
|
||||
series="",
|
||||
|
|
48
cps/cover.py
Normal file
48
cps/cover.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
|
||||
# Copyright (C) 2022 OzzieIsaacs
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
|
||||
try:
|
||||
from wand.image import Image
|
||||
use_IM = True
|
||||
except (ImportError, RuntimeError) as e:
|
||||
use_IM = False
|
||||
|
||||
|
||||
NO_JPEG_EXTENSIONS = ['.png', '.webp', '.bmp']
|
||||
COVER_EXTENSIONS = ['.png', '.webp', '.bmp', '.jpg', '.jpeg']
|
||||
|
||||
|
||||
def cover_processing(tmp_file_name, img, extension):
|
||||
tmp_cover_name = os.path.join(os.path.dirname(tmp_file_name), 'cover.jpg')
|
||||
if extension in NO_JPEG_EXTENSIONS:
|
||||
if use_IM:
|
||||
with Image(blob=img) as imgc:
|
||||
imgc.format = 'jpeg'
|
||||
imgc.transform_colorspace('rgb')
|
||||
imgc.save(filename=tmp_cover_name)
|
||||
return tmp_cover_name
|
||||
else:
|
||||
return None
|
||||
if img:
|
||||
with open(tmp_cover_name, 'wb') as f:
|
||||
f.write(img)
|
||||
return tmp_cover_name
|
||||
else:
|
||||
return None
|
35
cps/epub.py
35
cps/epub.py
|
@ -20,23 +20,26 @@ import os
|
|||
import zipfile
|
||||
from lxml import etree
|
||||
|
||||
from . import isoLanguages
|
||||
from . import isoLanguages, cover
|
||||
from .helper import split_authors
|
||||
from .constants import BookMeta
|
||||
|
||||
|
||||
def extract_cover(zip_file, cover_file, cover_path, tmp_file_name):
|
||||
def _extract_cover(zip_file, cover_file, cover_path, tmp_file_name):
|
||||
if cover_file is None:
|
||||
return None
|
||||
else:
|
||||
cf = extension = None
|
||||
zip_cover_path = os.path.join(cover_path, cover_file).replace('\\', '/')
|
||||
cf = zip_file.read(zip_cover_path)
|
||||
|
||||
prefix = os.path.splitext(tmp_file_name)[0]
|
||||
tmp_cover_name = prefix + '.' + os.path.basename(zip_cover_path)
|
||||
image = open(tmp_cover_name, 'wb')
|
||||
image.write(cf)
|
||||
image.close()
|
||||
return tmp_cover_name
|
||||
ext = os.path.splitext(tmp_cover_name)
|
||||
if len(ext) > 1:
|
||||
extension = ext[1].lower()
|
||||
if extension in cover.COVER_EXTENSIONS:
|
||||
cf = zip_file.read(zip_cover_path)
|
||||
return cover.cover_processing(tmp_file_name, cf, extension)
|
||||
|
||||
|
||||
def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
|
||||
|
@ -70,9 +73,9 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
|
|||
else:
|
||||
epub_metadata[s] = tmp[0]
|
||||
else:
|
||||
epub_metadata[s] = u'Unknown'
|
||||
epub_metadata[s] = 'Unknown'
|
||||
|
||||
if epub_metadata['subject'] == u'Unknown':
|
||||
if epub_metadata['subject'] == 'Unknown':
|
||||
epub_metadata['subject'] = ''
|
||||
|
||||
if epub_metadata['description'] == u'Unknown':
|
||||
|
@ -112,7 +115,7 @@ def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path):
|
|||
cover_section = tree.xpath("/pkg:package/pkg:manifest/pkg:item[@id='cover-image']/@href", namespaces=ns)
|
||||
cover_file = None
|
||||
if len(cover_section) > 0:
|
||||
cover_file = extract_cover(epub_zip, cover_section[0], cover_path, tmp_file_path)
|
||||
cover_file = _extract_cover(epub_zip, cover_section[0], cover_path, tmp_file_path)
|
||||
else:
|
||||
meta_cover = tree.xpath("/pkg:package/pkg:metadata/pkg:meta[@name='cover']/@content", namespaces=ns)
|
||||
if len(meta_cover) > 0:
|
||||
|
@ -123,10 +126,10 @@ def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path):
|
|||
"/pkg:package/pkg:manifest/pkg:item[@properties='" + meta_cover[0] + "']/@href", namespaces=ns)
|
||||
else:
|
||||
cover_section = tree.xpath("/pkg:package/pkg:guide/pkg:reference/@href", namespaces=ns)
|
||||
if len(cover_section) > 0:
|
||||
filetype = cover_section[0].rsplit('.', 1)[-1]
|
||||
for cs in cover_section:
|
||||
filetype = cs.rsplit('.', 1)[-1]
|
||||
if filetype == "xhtml" or filetype == "html": # if cover is (x)html format
|
||||
markup = epub_zip.read(os.path.join(cover_path, cover_section[0]))
|
||||
markup = epub_zip.read(os.path.join(cover_path, cs))
|
||||
markup_tree = etree.fromstring(markup)
|
||||
# no matter xhtml or html with no namespace
|
||||
img_src = markup_tree.xpath("//*[local-name() = 'img']/@src")
|
||||
|
@ -137,9 +140,11 @@ def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path):
|
|||
# img_src maybe start with "../"" so fullpath join then relpath to cwd
|
||||
filename = os.path.relpath(os.path.join(os.path.dirname(os.path.join(cover_path, cover_section[0])),
|
||||
img_src[0]))
|
||||
cover_file = extract_cover(epub_zip, filename, "", tmp_file_path)
|
||||
cover_file = _extract_cover(epub_zip, filename, "", tmp_file_path)
|
||||
else:
|
||||
cover_file = extract_cover(epub_zip, cover_section[0], cover_path, tmp_file_path)
|
||||
cover_file = _extract_cover(epub_zip, cs, cover_path, tmp_file_path)
|
||||
if cover_file:
|
||||
break
|
||||
return cover_file
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue