Source code for pytablereader.mediawiki.core

"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""

from .._common import get_file_encoding
from .._constant import SourceType
from .._constant import TableNameTemplate as tnt
from .._logger import FileSourceLogger, TextSourceLogger
from .._validator import FileValidator, TextValidator
from ..interface import AbstractTableReader
from .formatter import MediaWikiTableFormatter


class MediaWikiTableLoader(AbstractTableReader):
    """
    The abstract class of MediaWiki table loaders.
    """

    @property
    def format_name(self):
        return "mediawiki"


[docs]class MediaWikiTableFileLoader(MediaWikiTableLoader): """ A file loader class to extract tabular data from MediaWiki files. :param str file_path: Path to the loading file. .. py:attribute:: table_name Table name string. Defaults to ``%(filename)s_%(key)s``. """ def __init__(self, file_path=None, quoting_flags=None, type_hints=None, type_hint_rules=None): super().__init__(file_path, quoting_flags, type_hints, type_hint_rules) self.encoding = None self._validator = FileValidator(file_path) self._logger = FileSourceLogger(self)
[docs] def load(self): """ Extract tabular data as |TableData| instances from a MediaWiki file. |load_source_desc_file| :return: Loaded table data iterator. |load_table_name_desc| =================== ============================================== Format specifier Value after the replacement =================== ============================================== ``%(filename)s`` |filename_desc| ``%(key)s`` | This replaced to: | **(1)** ``caption`` mark of the table | **(2)** ``%(format_name)s%(format_id)s`` | if ``caption`` mark not included | in the table. ``%(format_name)s`` ``"mediawiki"`` ``%(format_id)s`` |format_id_desc| ``%(global_id)s`` |global_id| =================== ============================================== :rtype: |TableData| iterator :raises pytablereader.DataError: If the MediaWiki data is invalid or empty. """ self._validate() self._logger.logging_load() self.encoding = get_file_encoding(self.source, self.encoding) with open(self.source, encoding=self.encoding) as fp: formatter = MediaWikiTableFormatter(fp.read()) formatter.accept(self) return formatter.to_table_data()
def _get_default_table_name_template(self): return f"{tnt.FILENAME:s}_{tnt.KEY:s}"
[docs]class MediaWikiTableTextLoader(MediaWikiTableLoader): """ A text loader class to extract tabular data from MediaWiki text data. :param str text: MediaWiki text to load. .. py:attribute:: table_name Table name string. Defaults to ``%(key)s``. """ @property def source_type(self): return SourceType.TEXT def __init__(self, text=None, quoting_flags=None, type_hints=None, type_hint_rules=None): super().__init__(text, quoting_flags, type_hints) self._validator = TextValidator(text) self._logger = TextSourceLogger(self)
[docs] def load(self): """ Extract tabular data as |TableData| instances from a MediaWiki text object. |load_source_desc_text| :return: Loaded table data iterator. |load_table_name_desc| =================== ============================================== Format specifier Value after the replacement =================== ============================================== ``%(filename)s`` ``""`` ``%(key)s`` | This replaced to: | **(1)** ``caption`` mark of the table | **(2)** ``%(format_name)s%(format_id)s`` | if ``caption`` mark not included | in the table. ``%(format_name)s`` ``"mediawiki"`` ``%(format_id)s`` |format_id_desc| ``%(global_id)s`` |global_id| =================== ============================================== :rtype: |TableData| iterator :raises pytablereader.DataError: If the MediaWiki data is invalid or empty. """ self._validate() self._logger.logging_load() formatter = MediaWikiTableFormatter(self.source) formatter.accept(self) return formatter.to_table_data()
def _get_default_table_name_template(self): return f"{tnt.KEY:s}"