Upload pdf fixes:
Handle no title Handle no author Fix import of more than one language Add missing pdf upload publisher handling
This commit is contained in:
parent
2be7b6480a
commit
fcf9e7a1ef
9 changed files with 148 additions and 39 deletions
|
@ -154,7 +154,8 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
|
|||
tags="",
|
||||
series=loadedMetadata.series or "",
|
||||
series_id=loadedMetadata.issue or "",
|
||||
languages=loadedMetadata.language)
|
||||
languages=loadedMetadata.language,
|
||||
publisher="")
|
||||
|
||||
return BookMeta(
|
||||
file_path=tmp_file_path,
|
||||
|
@ -166,4 +167,5 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
|
|||
tags="",
|
||||
series="",
|
||||
series_id="",
|
||||
languages="")
|
||||
languages="",
|
||||
publisher="")
|
||||
|
|
|
@ -130,7 +130,7 @@ def selected_roles(dictionary):
|
|||
|
||||
# :rtype: BookMeta
|
||||
BookMeta = namedtuple('BookMeta', 'file_path, extension, title, author, cover, description, tags, series, '
|
||||
'series_id, languages')
|
||||
'series_id, languages, publisher')
|
||||
|
||||
STABLE_VERSION = {'version': '0.6.12 Beta'}
|
||||
|
||||
|
|
|
@ -444,10 +444,10 @@ def edit_book_languages(languages, book, upload=False):
|
|||
return modify_database_object(input_l, book.languages, db.Languages, calibre_db.session, 'languages')
|
||||
|
||||
|
||||
def edit_book_publisher(to_save, book):
|
||||
def edit_book_publisher(publishers, book):
|
||||
changed = False
|
||||
if to_save["publisher"]:
|
||||
publisher = to_save["publisher"].rstrip().strip()
|
||||
if publishers:
|
||||
publisher = publishers.rstrip().strip()
|
||||
if len(book.publishers) == 0 or (len(book.publishers) > 0 and publisher != book.publishers[0].name):
|
||||
changed |= modify_database_object([publisher], book.publishers, db.Publishers, calibre_db.session,
|
||||
'publisher')
|
||||
|
@ -740,7 +740,7 @@ def edit_book(book_id):
|
|||
book.pubdate = db.Books.DEFAULT_PUBDATE
|
||||
|
||||
# handle book publisher
|
||||
modif_date |= edit_book_publisher(to_save, book)
|
||||
modif_date |= edit_book_publisher(to_save['publisher'], book)
|
||||
|
||||
# handle book languages
|
||||
modif_date |= edit_book_languages(to_save['languages'], book)
|
||||
|
@ -867,6 +867,9 @@ def create_book_on_upload(modif_date, meta):
|
|||
# handle tags
|
||||
modif_date |= edit_book_tags(meta.tags, db_book)
|
||||
|
||||
# handle publisher
|
||||
modif_date |= edit_book_publisher(meta.publisher, db_book)
|
||||
|
||||
# handle series
|
||||
modif_date |= edit_book_series(meta.series, db_book)
|
||||
|
||||
|
|
|
@ -142,4 +142,5 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
|
|||
tags=epub_metadata['subject'].encode('utf-8').decode('utf-8'),
|
||||
series=epub_metadata['series'].encode('utf-8').decode('utf-8'),
|
||||
series_id=epub_metadata['series_id'].encode('utf-8').decode('utf-8'),
|
||||
languages=epub_metadata['language'])
|
||||
languages=epub_metadata['language'],
|
||||
publisher="")
|
||||
|
|
|
@ -77,4 +77,5 @@ def get_fb2_info(tmp_file_path, original_file_extension):
|
|||
tags="",
|
||||
series="",
|
||||
series_id="",
|
||||
languages="")
|
||||
languages="",
|
||||
publisher="")
|
||||
|
|
|
@ -57,27 +57,29 @@ def get_language_name(locale, lang_code):
|
|||
|
||||
def get_language_codes(locale, language_names, remainder=None):
|
||||
language_names = set(x.strip().lower() for x in language_names if x)
|
||||
languages = list()
|
||||
lang = list()
|
||||
for k, v in get_language_names(locale).items():
|
||||
v = v.lower()
|
||||
if v in language_names:
|
||||
languages.append(k)
|
||||
lang.append(k)
|
||||
language_names.remove(v)
|
||||
if remainder is not None:
|
||||
remainder.extend(language_names)
|
||||
return languages
|
||||
return lang
|
||||
|
||||
|
||||
def get_valid_language_codes(locale, language_names, remainder=None):
|
||||
languages = list()
|
||||
lang = list()
|
||||
if "" in language_names:
|
||||
language_names.remove("")
|
||||
for k, __ in get_language_names(locale).items():
|
||||
if k in language_names:
|
||||
languages.append(k)
|
||||
lang.append(k)
|
||||
language_names.remove(k)
|
||||
if remainder is not None and len(language_names):
|
||||
remainder.extend(language_names)
|
||||
return languages
|
||||
return lang
|
||||
|
||||
|
||||
def get_lang3(lang):
|
||||
try:
|
||||
|
|
132
cps/uploader.py
132
cps/uploader.py
|
@ -44,12 +44,17 @@ except (ImportError, RuntimeError) as e:
|
|||
use_generic_pdf_cover = True
|
||||
|
||||
try:
|
||||
from PyPDF2 import PdfFileReader
|
||||
from PyPDF2 import __version__ as PyPdfVersion
|
||||
from PyPDF3 import PdfFileReader
|
||||
from PyPDF3 import __version__ as PyPdfVersion
|
||||
use_pdf_meta = True
|
||||
except ImportError as e:
|
||||
log.debug('Cannot import PyPDF2, extracting pdf metadata will not work: %s', e)
|
||||
use_pdf_meta = False
|
||||
except ImportError as ex:
|
||||
try:
|
||||
from PyPDF2 import PdfFileReader
|
||||
from PyPDF2 import __version__ as PyPdfVersion
|
||||
use_pdf_meta = True
|
||||
except ImportError as e:
|
||||
log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e)
|
||||
use_pdf_meta = False
|
||||
|
||||
try:
|
||||
from . import epub
|
||||
|
@ -102,7 +107,98 @@ def default_meta(tmp_file_path, original_file_name, original_file_extension):
|
|||
tags="",
|
||||
series="",
|
||||
series_id="",
|
||||
languages="")
|
||||
languages="",
|
||||
publisher="")
|
||||
|
||||
|
||||
def parse_xmp(pdf_file):
|
||||
"""
|
||||
Parse XMP Metadata and prepare for BookMeta object
|
||||
"""
|
||||
try:
|
||||
xmp_info = pdf_file.getXmpMetadata()
|
||||
except Exception as e:
|
||||
log.debug('Can not read XMP metadata', e)
|
||||
return None
|
||||
|
||||
if xmp_info:
|
||||
try:
|
||||
xmp_author = xmp_info.dc_creator # list
|
||||
except AttributeError:
|
||||
xmp_author = ['']
|
||||
|
||||
if xmp_info.dc_title:
|
||||
xmp_title = xmp_info.dc_title['x-default']
|
||||
else:
|
||||
xmp_title = ''
|
||||
|
||||
if xmp_info.dc_description:
|
||||
xmp_description = xmp_info.dc_description['x-default']
|
||||
else:
|
||||
xmp_description = ''
|
||||
|
||||
languages = []
|
||||
try:
|
||||
for i in xmp_info.dc_language:
|
||||
#calibre-web currently only takes one language.
|
||||
languages.append(isoLanguages.get_lang3(i))
|
||||
except:
|
||||
languages.append('')
|
||||
|
||||
xmp_tags = ', '.join(xmp_info.dc_subject)
|
||||
xmp_publisher = ', '.join(xmp_info.dc_publisher)
|
||||
|
||||
return {'author': xmp_author,
|
||||
'title': xmp_title,
|
||||
'subject': xmp_description,
|
||||
'tags': xmp_tags, 'languages': languages,
|
||||
'publisher': xmp_publisher
|
||||
}
|
||||
|
||||
|
||||
def parse_xmp(pdf_file):
|
||||
"""
|
||||
Parse XMP Metadata and prepare for BookMeta object
|
||||
"""
|
||||
try:
|
||||
xmp_info = pdf_file.getXmpMetadata()
|
||||
except Exception as e:
|
||||
log.debug('Can not read XMP metadata', e)
|
||||
return None
|
||||
|
||||
if xmp_info:
|
||||
try:
|
||||
xmp_author = xmp_info.dc_creator # list
|
||||
except:
|
||||
xmp_author = ['']
|
||||
|
||||
if xmp_info.dc_title:
|
||||
xmp_title = xmp_info.dc_title['x-default']
|
||||
else:
|
||||
xmp_title = ''
|
||||
|
||||
if xmp_info.dc_description:
|
||||
xmp_description = xmp_info.dc_description['x-default']
|
||||
else:
|
||||
xmp_description = ''
|
||||
|
||||
languages = []
|
||||
try:
|
||||
for i in xmp_info.dc_language:
|
||||
languages.append(isoLanguages.get_lang3(i))
|
||||
except AttributeError:
|
||||
languages= [""]
|
||||
|
||||
xmp_tags = ', '.join(xmp_info.dc_subject)
|
||||
xmp_publisher = ', '.join(xmp_info.dc_publisher)
|
||||
|
||||
return {'author': xmp_author,
|
||||
'title': xmp_title,
|
||||
'subject': xmp_description,
|
||||
'tags': xmp_tags,
|
||||
'languages': languages,
|
||||
'publisher': xmp_publisher
|
||||
}
|
||||
|
||||
|
||||
def parse_xmp(pdf_file):
|
||||
|
@ -154,6 +250,8 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
|
|||
|
||||
if use_pdf_meta:
|
||||
with open(tmp_file_path, 'rb') as f:
|
||||
languages = [""]
|
||||
publisher = ""
|
||||
pdf_file = PdfFileReader(f)
|
||||
doc_info = pdf_file.getDocumentInfo()
|
||||
xmp_info = parse_xmp(pdf_file)
|
||||
|
@ -166,20 +264,22 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
|
|||
languages = xmp_info['languages']
|
||||
publisher = xmp_info['publisher']
|
||||
|
||||
elif doc_info:
|
||||
author = ' & '.join(split_authors([doc_info.author]))
|
||||
title = doc_info.title
|
||||
subject = doc_info.subject
|
||||
tags = doc_info['/Keywords']
|
||||
languages = ""
|
||||
publisher = ""
|
||||
if doc_info:
|
||||
if author == '':
|
||||
author = ' & '.join(split_authors([doc_info.author])) if doc_info.author else u'Unknown'
|
||||
if title == '':
|
||||
title = doc_info.title if doc_info.title else original_file_name
|
||||
if subject == '':
|
||||
subject = doc_info.subject
|
||||
if tags == '' and '/Keywords' in doc_info:
|
||||
tags = doc_info['/Keywords']
|
||||
|
||||
else:
|
||||
author= u'Unknown'
|
||||
title = original_file_name
|
||||
subject = ""
|
||||
tags = ""
|
||||
languages = ""
|
||||
languages = [""]
|
||||
publisher = ""
|
||||
|
||||
return BookMeta(
|
||||
|
@ -192,8 +292,8 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
|
|||
tags=tags,
|
||||
series="",
|
||||
series_id="",
|
||||
languages=', '.join(languages)
|
||||
)
|
||||
languages=','.join(languages),
|
||||
publisher=publisher)
|
||||
|
||||
|
||||
def pdf_preview(tmp_file_path, tmp_dir):
|
||||
|
|
|
@ -6,7 +6,7 @@ singledispatch>=3.4.0.0,<3.5.0.0
|
|||
backports_abc>=0.4
|
||||
Flask>=1.0.2,<1.2.0
|
||||
iso-639>=0.4.5,<0.5.0
|
||||
PyPDF2>=1.26.0,<1.27.0
|
||||
PyPDF3>=1.0.0,<1.0.4
|
||||
pytz>=2016.10
|
||||
requests>=2.11.1,<2.25.0
|
||||
SQLAlchemy>=1.3.0,<1.4.0
|
||||
|
|
14
setup.cfg
14
setup.cfg
|
@ -42,7 +42,7 @@ install_requires =
|
|||
backports_abc>=0.4
|
||||
Flask>=1.0.2,<1.2.0
|
||||
iso-639>=0.4.5,<0.5.0
|
||||
PyPDF2>=1.26.0,<1.27.0
|
||||
PyPDF3>=1.0.0,<1.0.4
|
||||
pytz>=2016.10
|
||||
requests>=2.11.1,<2.25.0
|
||||
SQLAlchemy>=1.3.0,<1.4.0
|
||||
|
@ -52,9 +52,9 @@ install_requires =
|
|||
|
||||
[options.extras_require]
|
||||
gdrive =
|
||||
google-api-python-client>=1.7.11,<1.8.0
|
||||
gevent>=1.2.1,<20.6.0
|
||||
greenlet>=0.4.12,<0.4.17
|
||||
google-api-python-client>=1.7.11,<1.13.0
|
||||
gevent>20.6.0,<21.2.0
|
||||
greenlet>=0.4.17,<1.1.0
|
||||
httplib2>=0.9.2,<0.18.0
|
||||
oauth2client>=4.0.0,<4.1.4
|
||||
uritemplate>=3.0.0,<3.1.0
|
||||
|
@ -68,16 +68,16 @@ goodreads =
|
|||
goodreads>=0.3.2,<0.4.0
|
||||
python-Levenshtein>=0.12.0,<0.13.0
|
||||
ldap =
|
||||
python-ldap>=3.0.0,<3.3.0
|
||||
python-ldap>=3.0.0,<3.4.0
|
||||
Flask-SimpleLDAP>=1.4.0,<1.5.0
|
||||
oauth =
|
||||
Flask-Dance>=1.4.0,<3.1.0
|
||||
SQLAlchemy-Utils>=0.33.5,<0.37.0
|
||||
metadata =
|
||||
lxml>=3.8.0,<4.6.0
|
||||
lxml>=3.8.0,<4.7.0
|
||||
rarfile>=2.7
|
||||
comics =
|
||||
natsort>=2.2.0
|
||||
natsort>=2.2.0,<7.1.0
|
||||
comicapi>= 2.1.3,<2.2.0
|
||||
kobo =
|
||||
jsonschema>=3.2.0,<3.3.0
|
||||
|
|
Loading…
Reference in a new issue