Fix metadata recognition fb2 files

This commit is contained in:
Ozzie Isaacs 2021-03-19 15:26:41 +01:00
parent 8f5c649d0f
commit 2760a7816d
4 changed files with 59 additions and 183 deletions

View file

@ -30,50 +30,50 @@ def get_fb2_info(tmp_file_path, original_file_extension):
} }
fb2_file = open(tmp_file_path) fb2_file = open(tmp_file_path)
tree = etree.fromstring(fb2_file.read()) tree = etree.fromstring(fb2_file.read().encode())
authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns) authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns)
def get_author(element): def get_author(element):
last_name = element.xpath('fb:last-name/text()', namespaces=ns) last_name = element.xpath('fb:last-name/text()', namespaces=ns)
if len(last_name): if len(last_name):
last_name = last_name[0].encode('utf-8') last_name = last_name[0]
else: else:
last_name = u'' last_name = u''
middle_name = element.xpath('fb:middle-name/text()', namespaces=ns) middle_name = element.xpath('fb:middle-name/text()', namespaces=ns)
if len(middle_name): if len(middle_name):
middle_name = middle_name[0].encode('utf-8') middle_name = middle_name[0]
else: else:
middle_name = u'' middle_name = u''
first_name = element.xpath('fb:first-name/text()', namespaces=ns) first_name = element.xpath('fb:first-name/text()', namespaces=ns)
if len(first_name): if len(first_name):
first_name = first_name[0].encode('utf-8') first_name = first_name[0]
else: else:
first_name = u'' first_name = u''
return (first_name.decode('utf-8') + u' ' return (first_name + u' '
+ middle_name.decode('utf-8') + u' ' + middle_name + u' '
+ last_name.decode('utf-8')).encode('utf-8') + last_name)
author = str(", ".join(map(get_author, authors))) author = str(", ".join(map(get_author, authors)))
title = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns) title = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns)
if len(title): if len(title):
title = str(title[0].encode('utf-8')) title = str(title[0])
else: else:
title = u'' title = u''
description = tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns) description = tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns)
if len(description): if len(description):
description = str(description[0].encode('utf-8')) description = str(description[0])
else: else:
description = u'' description = u''
return BookMeta( return BookMeta(
file_path=tmp_file_path, file_path=tmp_file_path,
extension=original_file_extension, extension=original_file_extension,
title=title.decode('utf-8'), title=title,
author=author.decode('utf-8'), author=author,
cover=None, cover=None,
description=description.decode('utf-8'), description=description,
tags="", tags="",
series="", series="",
series_id="", series_id="",

View file

@ -251,10 +251,11 @@ class WebServer(object):
finally: finally:
self.wsgiserver = None self.wsgiserver = None
# prevent irritating log of pending tasks message from asyncio
logger.get('asyncio').setLevel(logger.logging.CRITICAL)
if not self.restart: if not self.restart:
log.info("Performing shutdown of Calibre-Web") log.info("Performing shutdown of Calibre-Web")
# prevent irritating log of pending tasks message from asyncio
logger.get('asyncio').setLevel(logger.logging.CRITICAL)
return True return True
log.info("Performing restart of Calibre-Web") log.info("Performing restart of Calibre-Web")

View file

@ -214,7 +214,7 @@ def parse_xmp(pdf_file):
if xmp_info: if xmp_info:
try: try:
xmp_author = xmp_info.dc_creator # list xmp_author = xmp_info.dc_creator # list
except: except AttributeError:
xmp_author = ['Unknown'] xmp_author = ['Unknown']
if xmp_info.dc_title: if xmp_info.dc_title:
@ -228,20 +228,22 @@ def parse_xmp(pdf_file):
xmp_description = '' xmp_description = ''
languages = [] languages = []
for i in xmp_info.dc_language: try:
#calibre-web currently only takes one language. for i in xmp_info.dc_language:
languages.append(isoLanguages.get_lang3(i)) languages.append(isoLanguages.get_lang3(i))
except AttributeError:
languages.append('')
xmp_tags = ', '.join(xmp_info.dc_subject) xmp_tags = ', '.join(xmp_info.dc_subject)
xmp_publisher = ', '.join(xmp_info.dc_publisher) xmp_publisher = ', '.join(xmp_info.dc_publisher)
xmp_languages = xmp_info.dc_language
return {'author': xmp_author, return {'author': xmp_author,
'title': xmp_title, 'title': xmp_title,
'subject': xmp_description, 'subject': xmp_description,
'tags': xmp_tags, 'languages': languages, 'tags': xmp_tags,
'publisher': xmp_publisher 'languages': languages,
} 'publisher': xmp_publisher
}
def pdf_meta(tmp_file_path, original_file_name, original_file_extension): def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
@ -250,8 +252,6 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
if use_pdf_meta: if use_pdf_meta:
with open(tmp_file_path, 'rb') as f: with open(tmp_file_path, 'rb') as f:
languages = [""]
publisher = ""
pdf_file = PdfFileReader(f) pdf_file = PdfFileReader(f)
doc_info = pdf_file.getDocumentInfo() doc_info = pdf_file.getDocumentInfo()
xmp_info = parse_xmp(pdf_file) xmp_info = parse_xmp(pdf_file)
@ -263,6 +263,13 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
tags = xmp_info['tags'] tags = xmp_info['tags']
languages = xmp_info['languages'] languages = xmp_info['languages']
publisher = xmp_info['publisher'] publisher = xmp_info['publisher']
else:
author = u'Unknown'
title = ''
languages = [""]
publisher = ""
subject = ""
tags = ""
if doc_info: if doc_info:
if author == '': if author == '':
@ -273,14 +280,8 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
subject = doc_info.subject subject = doc_info.subject
if tags == '' and '/Keywords' in doc_info: if tags == '' and '/Keywords' in doc_info:
tags = doc_info['/Keywords'] tags = doc_info['/Keywords']
else: else:
author= u'Unknown'
title = original_file_name title = original_file_name
subject = ""
tags = ""
languages = [""]
publisher = ""
return BookMeta( return BookMeta(
file_path=tmp_file_path, file_path=tmp_file_path,

View file

@ -37,20 +37,20 @@
<div class="row"> <div class="row">
<div class="col-xs-6 col-md-6 col-sm-offset-3" style="margin-top:50px;"> <div class="col-xs-6 col-md-6 col-sm-offset-3" style="margin-top:50px;">
<p class='text-justify attribute'><strong>Start Time: </strong>2021-03-19 06:57:32</p> <p class='text-justify attribute'><strong>Start Time: </strong>2021-03-19 16:22:43</p>
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div class="col-xs-6 col-md-6 col-sm-offset-3"> <div class="col-xs-6 col-md-6 col-sm-offset-3">
<p class='text-justify attribute'><strong>Stop Time: </strong>2021-03-19 09:30:30</p> <p class='text-justify attribute'><strong>Stop Time: </strong>2021-03-19 18:56:39</p>
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div class="col-xs-6 col-md-6 col-sm-offset-3"> <div class="col-xs-6 col-md-6 col-sm-offset-3">
<p class='text-justify attribute'><strong>Duration: </strong>2h 4 min</p> <p class='text-justify attribute'><strong>Duration: </strong>2h 5 min</p>
</div> </div>
</div> </div>
</div> </div>
@ -802,11 +802,11 @@
<tr id="su" class="failClass"> <tr id="su" class="skipClass">
<td>TestEditBooks</td> <td>TestEditBooks</td>
<td class="text-center">33</td> <td class="text-center">33</td>
<td class="text-center">26</td> <td class="text-center">31</td>
<td class="text-center">5</td> <td class="text-center">0</td>
<td class="text-center">0</td> <td class="text-center">0</td>
<td class="text-center">2</td> <td class="text-center">2</td>
<td class="text-center"> <td class="text-center">
@ -870,151 +870,47 @@
<tr id="ft10.7" class="none bg-danger"> <tr id='pt10.7' class='hiddenRow bg-success'>
<td> <td>
<div class='testcase'>TestEditBooks - test_edit_custom_float</div> <div class='testcase'>TestEditBooks - test_edit_custom_float</div>
</td> </td>
<td colspan='6'> <td colspan='6' align='center'>PASS</td>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft10.7')">FAIL</a>
</div>
<!--css div popup start-->
<div id="div_ft10.7" class="popup_window test_output" style="display:none;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus='this.blur();'
onclick='document.getElementById('div_ft10.7').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File "/home/ozzie/Development/calibre-web-test/test/test_edit_books.py", line 527, in test_edit_custom_float
self.assertEqual(len(self.adv_search({u'custom_column_8': u'-2.5'})), 1)
AssertionError: 0 != 1</pre>
</div>
<div class="clearfix"></div>
</div>
<!--css div popup end-->
</td>
</tr> </tr>
<tr id="ft10.8" class="none bg-danger"> <tr id='pt10.8' class='hiddenRow bg-success'>
<td> <td>
<div class='testcase'>TestEditBooks - test_edit_custom_int</div> <div class='testcase'>TestEditBooks - test_edit_custom_int</div>
</td> </td>
<td colspan='6'> <td colspan='6' align='center'>PASS</td>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft10.8')">FAIL</a>
</div>
<!--css div popup start-->
<div id="div_ft10.8" class="popup_window test_output" style="display:none;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus='this.blur();'
onclick='document.getElementById('div_ft10.8').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File "/home/ozzie/Development/calibre-web-test/test/test_edit_books.py", line 548, in test_edit_custom_int
self.assertEqual(0, len(vals['cust_columns']))
AssertionError: 0 != 1</pre>
</div>
<div class="clearfix"></div>
</div>
<!--css div popup end-->
</td>
</tr> </tr>
<tr id="ft10.9" class="none bg-danger"> <tr id='pt10.9' class='hiddenRow bg-success'>
<td> <td>
<div class='testcase'>TestEditBooks - test_edit_custom_rating</div> <div class='testcase'>TestEditBooks - test_edit_custom_rating</div>
</td> </td>
<td colspan='6'> <td colspan='6' align='center'>PASS</td>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft10.9')">FAIL</a>
</div>
<!--css div popup start-->
<div id="div_ft10.9" class="popup_window test_output" style="display:none;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus='this.blur();'
onclick='document.getElementById('div_ft10.9').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File "/home/ozzie/Development/calibre-web-test/test/test_edit_books.py", line 467, in test_edit_custom_rating
self.assertEqual(0, len(vals['cust_columns']))
AssertionError: 0 != 1</pre>
</div>
<div class="clearfix"></div>
</div>
<!--css div popup end-->
</td>
</tr> </tr>
<tr id="ft10.10" class="none bg-danger"> <tr id='pt10.10' class='hiddenRow bg-success'>
<td> <td>
<div class='testcase'>TestEditBooks - test_edit_custom_single_select</div> <div class='testcase'>TestEditBooks - test_edit_custom_single_select</div>
</td> </td>
<td colspan='6'> <td colspan='6' align='center'>PASS</td>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft10.10')">FAIL</a>
</div>
<!--css div popup start-->
<div id="div_ft10.10" class="popup_window test_output" style="display:none;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus='this.blur();'
onclick='document.getElementById('div_ft10.10').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File "/home/ozzie/Development/calibre-web-test/test/test_edit_books.py", line 475, in test_edit_custom_single_select
self.assertEqual(u'人物', vals['cust_columns'][0]['value'])
AssertionError: '人物' != '-2.5'
- 人物
+ -2.5</pre>
</div>
<div class="clearfix"></div>
</div>
<!--css div popup end-->
</td>
</tr> </tr>
<tr id="ft10.11" class="none bg-danger"> <tr id='pt10.11' class='hiddenRow bg-success'>
<td> <td>
<div class='testcase'>TestEditBooks - test_edit_custom_text</div> <div class='testcase'>TestEditBooks - test_edit_custom_text</div>
</td> </td>
<td colspan='6'> <td colspan='6' align='center'>PASS</td>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_ft10.11')">FAIL</a>
</div>
<!--css div popup start-->
<div id="div_ft10.11" class="popup_window test_output" style="display:none;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus='this.blur();'
onclick='document.getElementById('div_ft10.11').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File "/home/ozzie/Development/calibre-web-test/test/test_edit_books.py", line 491, in test_edit_custom_text
self.assertEqual(u'Lulu 人 Ä', vals['cust_columns'][0]['value'])
AssertionError: 'Lulu 人 Ä' != '-2.5'
- Lulu 人 Ä
+ -2.5</pre>
</div>
<div class="clearfix"></div>
</div>
<!--css div popup end-->
</td>
</tr> </tr>
@ -1699,12 +1595,12 @@ AssertionError: 'Lulu 人 Ä' != '-2.5'
<tr id="su" class="errorClass"> <tr id="su" class="passClass">
<td>TestKoboSync</td> <td>TestKoboSync</td>
<td class="text-center">9</td> <td class="text-center">9</td>
<td class="text-center">8</td> <td class="text-center">9</td>
<td class="text-center">0</td>
<td class="text-center">0</td> <td class="text-center">0</td>
<td class="text-center">1</td>
<td class="text-center">0</td> <td class="text-center">0</td>
<td class="text-center"> <td class="text-center">
<a onclick="showClassDetail('c18', 9)">Detail</a> <a onclick="showClassDetail('c18', 9)">Detail</a>
@ -1713,33 +1609,11 @@ AssertionError: 'Lulu 人 Ä' != '-2.5'
<tr id="et18.1" class="none bg-info"> <tr id='pt18.1' class='hiddenRow bg-success'>
<td> <td>
<div class='testcase'>TestKoboSync - test_book_download</div> <div class='testcase'>TestKoboSync - test_book_download</div>
</td> </td>
<td colspan='6'> <td colspan='6' align='center'>PASS</td>
<div class="text-center">
<a class="popup_link text-center" onfocus='blur()' onclick="showTestDetail('div_et18.1')">ERROR</a>
</div>
<!--css div popup start-->
<div id="div_et18.1" class="popup_window test_output" style="display:none;">
<div class='close_button pull-right'>
<button type="button" class="close" aria-label="Close" onfocus='this.blur();'
onclick='document.getElementById('div_et18.1').style.display='none'"><span
aria-hidden="true">&times;</span></button>
</div>
<div class="text-left pull-left">
<pre class="text-left">Traceback (most recent call last):
File "/home/ozzie/Development/calibre-web-test/test/test_kobo_sync.py", line 585, in test_book_download
data = self.inital_sync()
File "/home/ozzie/Development/calibre-web-test/test/test_kobo_sync.py", line 133, in inital_sync
self.assertEqual(data[3]['NewEntitlement']['BookMetadata']['DownloadUrls'][1]['Format'], 'EPUB')
IndexError: list index out of range</pre>
</div>
<div class="clearfix"></div>
</div>
<!--css div popup end-->
</td>
</tr> </tr>
@ -3529,9 +3403,9 @@ IndexError: list index out of range</pre>
<tr id='total_row' class="text-center bg-grey"> <tr id='total_row' class="text-center bg-grey">
<td>Total</td> <td>Total</td>
<td>300</td> <td>300</td>
<td>287</td> <td>293</td>
<td>5</td> <td>0</td>
<td>1</td> <td>0</td>
<td>7</td> <td>7</td>
<td>&nbsp;</td> <td>&nbsp;</td>
</tr> </tr>
@ -3902,7 +3776,7 @@ IndexError: list index out of range</pre>
</div> </div>
<script> <script>
drawCircle(287, 5, 1, 7); drawCircle(293, 0, 0, 7);
</script> </script>
</div> </div>