util/lib/analysis_package/utils/pd_util.py

83 lines
3.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: UTF-8 -*-
"""
@Project -> File IoD_data_analysis_tool -> pd_util
@IDE PyCharm
@Author rengengchen
@Date 2022/7/13 11:00
@Desc
"""
from __future__ import annotations
import os
from functools import partial
from multiprocessing import Pool
from typing import Hashable, Callable
import pandas as pd
from pandas._typing import CompressionOptions, FilePath, StorageOptions, WriteBuffer
from pandas.core.generic import bool_t
class to_same_csv:
def __init__(self,
path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
sep: str = ",",
na_rep: str = "",
float_format: str | None = None,
columns: pd.Sequence[Hashable] | None = None,
header: bool_t | list[str] = True,
index: bool_t = False,
index_label: pd.IndexLabel | None = None,
mode: str = "w",
encoding: str = 'utf8',
compression: CompressionOptions = "infer",
quoting: int | None = None,
quotechar: str = '"',
line_terminator: str | None = None,
chunksize: int | None = None,
date_format: str | None = None,
doublequote: bool_t = True,
escapechar: str | None = None,
decimal: str = ".",
errors: str = "strict",
storage_options: StorageOptions = None,
prepare: Callable = None):
self.not_first = False
self.mode = mode
if self.mode == 'a' and isinstance(path_or_buf, str) and os.path.exists(path_or_buf):
header = False
self.header = header
self.prepare = prepare
self.kwargs = {'path_or_buf': path_or_buf,
'sep': sep,
'na_rep': na_rep,
'float_format': float_format,
'columns': columns,
'index': index,
'index_label': index_label,
'encoding': encoding,
'compression': compression,
'quoting': quoting,
'quotechar': quotechar,
'line_terminator': line_terminator,
'chunksize': chunksize,
'date_format': date_format,
'doublequote': doublequote,
'escapechar': escapechar,
'decimal': decimal,
'errors': errors,
'storage_options': storage_options}
def __call__(self, df_or_series: pd.Series | pd.DataFrame):
if self.not_first:
df_or_series.to_csv(mode=self.mode, header=self.header, **self.kwargs)
else:
if self.prepare:
result = self.prepare(df_or_series)
if result:
df_or_series = result
df_or_series.to_csv(mode=self.mode, header=self.header, **self.kwargs)
self.mode = 'a'
self.header = False