util/lib/analysis_package/utils/pd_util.py

# -*- coding: UTF-8 -*-
"""
@Project -> File   ：IoD_data_analysis_tool -> pd_util
@IDE    ：PyCharm
@Author ：rengengchen
@Date   ：2022/7/13 11:00
@Desc   ：
"""
from __future__ import annotations

import os
from functools import partial
from multiprocessing import Pool
from typing import Hashable, Callable

import pandas as pd
from pandas._typing import CompressionOptions, FilePath, StorageOptions, WriteBuffer
from pandas.core.generic import bool_t


class to_same_csv:

    def __init__(self,
                 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
                 sep: str = ",",
                 na_rep: str = "",
                 float_format: str | None = None,
                 columns: pd.Sequence[Hashable] | None = None,
                 header: bool_t | list[str] = True,
                 index: bool_t = False,
                 index_label: pd.IndexLabel | None = None,
                 mode: str = "w",
                 encoding: str = 'utf8',
                 compression: CompressionOptions = "infer",
                 quoting: int | None = None,
                 quotechar: str = '"',
                 line_terminator: str | None = None,
                 chunksize: int | None = None,
                 date_format: str | None = None,
                 doublequote: bool_t = True,
                 escapechar: str | None = None,
                 decimal: str = ".",
                 errors: str = "strict",
                 storage_options: StorageOptions = None,
                 prepare: Callable = None):
        self.not_first = False
        self.mode = mode
        if self.mode == 'a' and isinstance(path_or_buf, str) and os.path.exists(path_or_buf):
            header = False
        self.header = header
        self.prepare = prepare
        self.kwargs = {'path_or_buf': path_or_buf,
                       'sep': sep,
                       'na_rep': na_rep,
                       'float_format': float_format,
                       'columns': columns,
                       'index': index,
                       'index_label': index_label,
                       'encoding': encoding,
                       'compression': compression,
                       'quoting': quoting,
                       'quotechar': quotechar,
                       'line_terminator': line_terminator,
                       'chunksize': chunksize,
                       'date_format': date_format,
                       'doublequote': doublequote,
                       'escapechar': escapechar,
                       'decimal': decimal,
                       'errors': errors,
                       'storage_options': storage_options}

    def __call__(self, df_or_series: pd.Series | pd.DataFrame):
        if self.not_first:
            df_or_series.to_csv(mode=self.mode, header=self.header, **self.kwargs)
        else:
            if self.prepare:
                result = self.prepare(df_or_series)
                if result:
                    df_or_series = result
            df_or_series.to_csv(mode=self.mode, header=self.header, **self.kwargs)
            self.mode = 'a'
            self.header = False