import os
from unittest.mock import patch
from pelican import readers
from pelican.tests.support import get_settings, unittest
from pelican.utils import SafeDatetime
CUR_DIR = os.path.dirname(__file__)
CONTENT_PATH = os.path.join(CUR_DIR, 'content')
def _path(*args):
return os.path.join(CONTENT_PATH, *args)
class ReaderTest(unittest.TestCase):
def read_file(self, path, **kwargs):
# Isolate from future API changes to readers.read_file
r = readers.Readers(settings=get_settings(**kwargs))
return r.read_file(base_path=CONTENT_PATH, path=path)
def assertDictHasSubset(self, dictionary, subset):
for key, value in subset.items():
if key in dictionary:
real_value = dictionary.get(key)
self.assertEqual(
value,
real_value,
'Expected %s to have value %s, but was %s' %
(key, value, real_value))
else:
self.fail(
'Expected %s to have value %s, but was not in Dict' %
(key, value))
class TestAssertDictHasSubset(ReaderTest):
def setUp(self):
self.dictionary = {
'key-a': 'val-a',
'key-b': 'val-b'
}
def tearDown(self):
self.dictionary = None
def test_subset(self):
self.assertDictHasSubset(self.dictionary, {'key-a': 'val-a'})
def test_equal(self):
self.assertDictHasSubset(self.dictionary, self.dictionary)
def test_fail_not_set(self):
self.assertRaisesRegex(
AssertionError,
r'Expected.*key-c.*to have value.*val-c.*but was not in Dict',
self.assertDictHasSubset,
self.dictionary,
{'key-c': 'val-c'})
def test_fail_wrong_val(self):
self.assertRaisesRegex(
AssertionError,
r'Expected .*key-a.* to have value .*val-b.* but was .*val-a.*',
self.assertDictHasSubset,
self.dictionary,
{'key-a': 'val-b'})
class DefaultReaderTest(ReaderTest):
def test_readfile_unknown_extension(self):
with self.assertRaises(TypeError):
self.read_file(path='article_with_metadata.unknownextension')
def test_readfile_path_metadata_implicit_dates(self):
test_file = 'article_with_metadata_implicit_dates.html'
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
expected = {
'date': SafeDatetime.fromtimestamp(
os.stat(_path(test_file)).st_mtime),
'modified': SafeDatetime.fromtimestamp(
os.stat(_path(test_file)).st_mtime)
}
self.assertDictHasSubset(page.metadata, expected)
def test_readfile_path_metadata_explicit_dates(self):
test_file = 'article_with_metadata_explicit_dates.html'
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
expected = {
'date': SafeDatetime(2010, 12, 2, 10, 14),
'modified': SafeDatetime(2010, 12, 31, 23, 59)
}
self.assertDictHasSubset(page.metadata, expected)
def test_readfile_path_metadata_implicit_date_explicit_modified(self):
test_file = 'article_with_metadata_implicit_date_explicit_modified.html'
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
expected = {
'date': SafeDatetime.fromtimestamp(
os.stat(_path(test_file)).st_mtime),
'modified': SafeDatetime(2010, 12, 2, 10, 14),
}
self.assertDictHasSubset(page.metadata, expected)
def test_readfile_path_metadata_explicit_date_implicit_modified(self):
test_file = 'article_with_metadata_explicit_date_implicit_modified.html'
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
expected = {
'date': SafeDatetime(2010, 12, 2, 10, 14),
'modified': SafeDatetime.fromtimestamp(
os.stat(_path(test_file)).st_mtime)
}
self.assertDictHasSubset(page.metadata, expected)
def test_find_empty_alt(self):
with patch('pelican.readers.logger') as log_mock:
content = ['',
'
']
for tag in content:
readers.find_empty_alt(tag, '/test/path')
log_mock.warning.assert_called_with(
'Empty alt attribute for image %s in %s',
'test-image.png',
'/test/path',
extra={'limit_msg':
'Other images have empty alt attributes'}
)
class RstReaderTest(ReaderTest):
def test_article_with_metadata(self):
page = self.read_file(path='article_with_metadata.rst')
expected = {
'category': 'yeah',
'author': 'Alexis Métaireau',
'title': 'This is a super article !',
'summary': '
Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...
\n', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_capitalized_metadata(self): page = self.read_file(path='article_with_capitalized_metadata.rst') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...
\n', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_filename_metadata(self): page = self.read_file( path='2012-11-29_rst_w_filename_meta#foo-bar.rst', FILENAME_METADATA=None) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'reader': 'rst', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='2012-11-29_rst_w_filename_meta#foo-bar.rst', FILENAME_METADATA=r'(?PTHIS is some content. With some stuff to ' '"typogrify"...
\nNow with added ' 'support for ' 'TLA.
\n') self.assertEqual(page.content, expected) try: # otherwise, typogrify should be applied page = self.read_file(path='article.rst', TYPOGRIFY=True) expected = ( 'THIS is some content. ' 'With some stuff to “typogrify”…
\n' 'Now with added support for TLA.
\n') self.assertEqual(page.content, expected) except ImportError: return unittest.skip('need the typogrify distribution') def test_typogrify_summary(self): # if nothing is specified in the settings, the summary should be # unmodified page = self.read_file(path='article_with_metadata.rst') expected = ('Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to "typogrify' '"...
\n') self.assertEqual(page.metadata['summary'], expected) try: # otherwise, typogrify should be applied page = self.read_file(path='article_with_metadata.rst', TYPOGRIFY=True) expected = ('Multi-line metadata should be' ' supported\nas well as inline' ' markup and stuff to “typogrify' '”…
\n') self.assertEqual(page.metadata['summary'], expected) except ImportError: return unittest.skip('need the typogrify distribution') def test_typogrify_ignore_tags(self): try: # typogrify should be able to ignore user specified tags, # but tries to be clever with widont extension page = self.read_file(path='article.rst', TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=['p']) expected = ('THIS is some content. With some stuff to ' '"typogrify"...
\nNow with added ' 'support for ' 'TLA.
\n') self.assertEqual(page.content, expected) # typogrify should ignore code blocks by default because # code blocks are composed inside the pre tag page = self.read_file(path='article_with_code_block.rst', TYPOGRIFY=True) expected = ('An article with some code
\n' ''
'x'
' &'
' y\n
A block quote:
\n\nx ' '& y\n' '
Normal:\nx' ' &' ' y' '
\n') self.assertEqual(page.content, expected) # instruct typogrify to also ignore blockquotes page = self.read_file(path='article_with_code_block.rst', TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=['blockquote']) expected = ('An article with some code
\n' ''
'x'
' &'
' y\n
A block quote:
\n\nx ' '& y\n' '
Normal:\nx' ' &' ' y' '
\n') self.assertEqual(page.content, expected) except ImportError: return unittest.skip('need the typogrify distribution') except TypeError: return unittest.skip('need typogrify version 2.0.4 or later') def test_article_with_multiple_authors(self): page = self.read_file(path='article_with_multiple_authors.rst') expected = { 'authors': ['First Author', 'Second Author'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_semicolon(self): page = self.read_file( path='article_with_multiple_authors_semicolon.rst') expected = { 'authors': ['Author, First', 'Author, Second'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_list(self): page = self.read_file(path='article_with_multiple_authors_list.rst') expected = { 'authors': ['Author, First', 'Author, Second'] } self.assertDictHasSubset(page.metadata, expected) def test_default_date_formats(self): tuple_date = self.read_file(path='article.rst', DEFAULT_DATE=(2012, 5, 1)) string_date = self.read_file(path='article.rst', DEFAULT_DATE='2012-05-01') self.assertEqual(tuple_date.metadata['date'], string_date.metadata['date']) def test_parse_error(self): # Verify that it raises an Exception, not nothing and not SystemExit or # some such with self.assertRaisesRegex(Exception, "underline too short"): self.read_file(path='../parse_error/parse_error.rst') def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='default') expected = "One: -; Two: —; Three: —-
\n" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool') expected = "One: -; Two: –; Three: —
\n" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path='article_with_typogrify_dashes.rst', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool_inverted') expected = "One: -; Two: —; Three: –
\n" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) @unittest.skipUnless(readers.Markdown, "markdown isn't installed") class MdReaderTest(ReaderTest): def test_article_with_metadata(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_md_extension.md')) expected = { 'category': 'test', 'title': 'Test md File', 'summary': 'I have a lot to test
', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], } self.assertDictHasSubset(metadata, expected) content, metadata = reader.read( _path('article_with_markdown_and_nonascii_summary.md')) expected = { 'title': 'マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'summary': 'パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。
', 'category': '指導書', 'date': SafeDatetime(2012, 12, 20), 'modified': SafeDatetime(2012, 12, 22), 'tags': ['パイソン', 'マック'], 'slug': 'python-virtualenv-on-mac-osx-mountain-lion-10.8', } self.assertDictHasSubset(metadata, expected) def test_article_with_footnote(self): settings = get_settings() ec = settings['MARKDOWN']['extension_configs'] ec['markdown.extensions.footnotes'] = {'SEPARATOR': '-'} reader = readers.MarkdownReader(settings) content, metadata = reader.read( _path('article_with_markdown_and_footnote.md')) expected_content = ( 'This is some content' '1' ' with some footnotes' '2
\n' '') expected_metadata = { 'title': 'Article with markdown containing footnotes', 'summary': ( 'Summary with inline markup ' 'should be supported.
'), 'date': SafeDatetime(2012, 10, 31), 'modified': SafeDatetime(2012, 11, 1), 'multiline': [ 'Line Metadata should be handle properly.', 'See syntax of Meta-Data extension of ' 'Python Markdown package:', 'If a line is indented by 4 or more spaces,', 'that line is assumed to be an additional line of the value', 'for the previous keyword.', 'A keyword may have as many lines as desired.', ] } self.assertEqual(content, expected_content) self.assertDictHasSubset(metadata, expected_metadata) def test_article_with_file_extensions(self): reader = readers.MarkdownReader(settings=get_settings()) # test to ensure the md file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_md_extension.md')) expected = ( "The quick brown fox jumped over the lazy dog's back.
") self.assertEqual(content, expected) # test to ensure the mkd file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_mkd_extension.mkd')) expected = ("This is another markdown test file. Uses" " the mkd extension.
") self.assertEqual(content, expected) # test to ensure the markdown file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_markdown_extension.markdown')) expected = ("This is another markdown test file. Uses" " the markdown extension.
") self.assertEqual(content, expected) # test to ensure the mdown file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_mdown_extension.mdown')) expected = ("This is another markdown test file. Uses" " the mdown extension.
") self.assertEqual(content, expected) def test_article_with_markdown_markup_extension(self): # test to ensure the markdown markup extension is being processed as # expected page = self.read_file( path='article_with_markdown_markup_extensions.md', MARKDOWN={ 'extension_configs': { 'markdown.extensions.toc': {}, 'markdown.extensions.codehilite': {}, 'markdown.extensions.extra': {} } } ) expected = ('\n' 'Test: This metadata value looks like metadata
', } self.assertDictHasSubset(metadata, expected) def test_empty_file(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('empty.md')) self.assertEqual(metadata, {}) self.assertEqual(content, '') def test_empty_file_with_bom(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('empty_with_bom.md')) self.assertEqual(metadata, {}) self.assertEqual(content, '') def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='default') expected = "One: -; Two: —; Three: —-
" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool') expected = "One: -; Two: –; Three: —
" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path='article_with_typogrify_dashes.md', TYPOGRIFY=True, TYPOGRIFY_DASHES='oldschool_inverted') expected = "One: -; Two: —; Three: –
" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) def test_metadata_has_no_discarded_data(self): md_filename = 'article_with_markdown_and_empty_tags.md' r = readers.Readers(cache_name='cache', settings=get_settings( CACHE_CONTENT=True)) page = r.read_file(base_path=CONTENT_PATH, path=md_filename) __, cached_metadata = r.get_cached_data( _path(md_filename), (None, None)) expected = { 'title': 'Article with markdown and empty tags' } self.assertEqual(cached_metadata, expected) self.assertNotIn('tags', page.metadata) self.assertDictHasSubset(page.metadata, expected) class HTMLReaderTest(ReaderTest): def test_article_with_comments(self): page = self.read_file(path='article_with_comments.html') self.assertEqual(''' Body content ''', page.content) def test_article_with_keywords(self): page = self.read_file(path='article_with_keywords.html') expected = { 'tags': ['foo', 'bar', 'foobar'], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata(self): page = self.read_file(path='article_with_metadata.html') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_similar_metadata_tags(self): page = self.read_file(path='article_with_multiple_metadata_tags.html') expected = { 'custom_field': ['https://getpelican.com', 'https://www.eff.org'], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors(self): page = self.read_file(path='article_with_multiple_authors.html') expected = { 'authors': ['First Author', 'Second Author'] } self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata_and_contents_attrib(self): page = self.read_file(path='article_with_metadata_and_contents.html') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_null_attributes(self): page = self.read_file(path='article_with_null_attributes.html') self.assertEqual(''' Ensure that empty attributes are copied properly. ''', page.content) def test_article_with_attributes_containing_double_quotes(self): page = self.read_file(path='article_with_attributes_containing_' + 'double_quotes.html') self.assertEqual(''' Ensure that if an attribute value contains a double quote, it is surrounded with single quotes, otherwise with double quotes. Span content Span content Span content ''', page.content) def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. page = self.read_file(path='article_with_uppercase_metadata.html') # Key should be lowercase self.assertIn('category', page.metadata, 'Key should be lowercase.') # Value should keep cases self.assertEqual('Yeah', page.metadata.get('category')) def test_article_with_nonconformant_meta_tags(self): page = self.read_file(path='article_with_nonconformant_meta_tags.html') expected = { 'summary': 'Summary and stuff', 'title': 'Article with Nonconformant HTML meta tags', } self.assertDictHasSubset(page.metadata, expected) def test_article_with_inline_svg(self): page = self.read_file(path='article_with_inline_svg.html') expected = { 'title': 'Article with an inline SVG', } self.assertDictHasSubset(page.metadata, expected)