Source code for pytablereader.csv.core

"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""

import csv
import io
import warnings

import typepy
from mbstrdecoder import MultiByteStrDecoder

from pytablereader import DataError

from .._common import get_file_encoding
from .._constant import TableNameTemplate as tnt
from .._logger import FileSourceLogger, TextSourceLogger
from .._validator import FileValidator, TextValidator
from ..interface import AbstractTableReader
from .formatter import CsvTableFormatter


[docs]class CsvTableLoader(AbstractTableReader): """ The abstract class of CSV table loaders. .. py:attribute:: headers Attribute names of the table. Use the first line of the CSV file as attribute list if ``headers`` is empty. .. py:attribute:: delimiter A one-character string used to separate fields. Defaults to ``","``. .. py:attribute:: quotechar A one-character string used to quote fields containing special characters, such as the ``delimiter`` or ``quotechar``, or which contain new-line characters. Defaults to ``'"'``. .. py:attribute:: encoding Encoding of the CSV data. """ @property def format_name(self): return "csv" @property def delimiter(self): # "delimiter" must be a string, not an unicode return str(MultiByteStrDecoder(self.__delimiter).unicode_str) @delimiter.setter def delimiter(self, value): self.__delimiter = value @property def quotechar(self): # "quotechar" must be a string, not an unicode return str(MultiByteStrDecoder(self.__quotechar).unicode_str) @quotechar.setter def quotechar(self, value): self.__quotechar = value @property def header_list(self): warnings.warn("'header_list' has moved to 'headers'", DeprecationWarning) return self.headers @header_list.setter def header_list(self, value): warnings.warn("'header_list' has moved to 'headers'", DeprecationWarning) self.headers = value def __init__(self, source, quoting_flags, type_hints, type_hint_rules): super().__init__(source, quoting_flags, type_hints, type_hint_rules) self._csv_reader = None self.headers = () self.delimiter = "," self.quotechar = '"' self.encoding = None def _to_data_matrix(self): try: return [ [self.__modify_item(data, col) for col, data in enumerate(row)] for row in self._csv_reader if typepy.is_not_empty_sequence(row) ] except (csv.Error, UnicodeDecodeError) as e: raise DataError(e) def __modify_item(self, data, col: int): if self.type_hints and (col in self.type_hints): try: return self.type_hints[col](data).convert() except typepy.TypeConversionError: pass return MultiByteStrDecoder(data).unicode_str
[docs]class CsvTableFileLoader(CsvTableLoader): """ A file loader class to extract tabular data from CSV files. :param str file_path: Path to the loading CSV file. .. py:attribute:: table_name Table name string. Defaults to ``%(filename)s``. :Examples: :ref:`example-csv-table-loader` """ def __init__(self, file_path, quoting_flags=None, type_hints=None, type_hint_rules=None): super().__init__(file_path, quoting_flags, type_hints, type_hint_rules) self._validator = FileValidator(file_path) self._logger = FileSourceLogger(self)
[docs] def load(self): """ Extract tabular data as |TableData| instances from a CSV file. |load_source_desc_file| :return: Loaded table data. |load_table_name_desc| =================== ======================================== Format specifier Value after the replacement =================== ======================================== ``%(filename)s`` |filename_desc| ``%(format_name)s`` ``"csv"`` ``%(format_id)s`` |format_id_desc| ``%(global_id)s`` |global_id| =================== ======================================== :rtype: |TableData| iterator :raises pytablereader.DataError: If the CSV data is invalid. .. seealso:: :py:func:`csv.reader` """ self._validate() self._logger.logging_load() self.encoding = get_file_encoding(self.source, self.encoding) self._csv_reader = csv.reader( open(self.source, encoding=self.encoding), delimiter=self.delimiter, quotechar=self.quotechar, strict=True, skipinitialspace=True, ) formatter = CsvTableFormatter(self._to_data_matrix()) formatter.accept(self) return formatter.to_table_data()
def _get_default_table_name_template(self): return tnt.FILENAME
[docs]class CsvTableTextLoader(CsvTableLoader): """ A text loader class to extract tabular data from CSV text data. :param str text: CSV text to load. .. py:attribute:: table_name Table name string. Defaults to ``%(format_name)s%(format_id)s``. :Examples: :ref:`example-csv-table-loader` """ def __init__(self, text, quoting_flags=None, type_hints=None, type_hint_rules=None): super().__init__(text, quoting_flags, type_hints, type_hint_rules) self._validator = TextValidator(text) self._logger = TextSourceLogger(self)
[docs] def load(self): """ Extract tabular data as |TableData| instances from a CSV text object. |load_source_desc_text| :return: Loaded table data. |load_table_name_desc| =================== ======================================== Format specifier Value after the replacement =================== ======================================== ``%(filename)s`` ``""`` ``%(format_name)s`` ``"csv"`` ``%(format_id)s`` |format_id_desc| ``%(global_id)s`` |global_id| =================== ======================================== :rtype: |TableData| iterator :raises pytablereader.DataError: If the CSV data is invalid. .. seealso:: :py:func:`csv.reader` """ self._validate() self._logger.logging_load() self._csv_reader = csv.reader( io.StringIO(self.source.strip()), delimiter=self.delimiter, quotechar=self.quotechar, strict=True, skipinitialspace=True, ) formatter = CsvTableFormatter(self._to_data_matrix()) formatter.accept(self) return formatter.to_table_data()
def _get_default_table_name_template(self): return f"{tnt.FORMAT_NAME:s}{tnt.FORMAT_ID:s}"