util/lib/analysis_package/utils/pd_util.py

83 lines
3.1 KiB
Python
Raw Normal View History

2024-05-12 12:18:24 +00:00
# -*- coding: UTF-8 -*-
"""
@Project -> File IoD_data_analysis_tool -> pd_util
@IDE PyCharm
@Author rengengchen
@Date 2022/7/13 11:00
@Desc
"""
from __future__ import annotations
import os
from functools import partial
from multiprocessing import Pool
from typing import Hashable, Callable
import pandas as pd
from pandas._typing import CompressionOptions, FilePath, StorageOptions, WriteBuffer
from pandas.core.generic import bool_t
class to_same_csv:
def __init__(self,
path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
sep: str = ",",
na_rep: str = "",
float_format: str | None = None,
columns: pd.Sequence[Hashable] | None = None,
header: bool_t | list[str] = True,
index: bool_t = False,
index_label: pd.IndexLabel | None = None,
mode: str = "w",
encoding: str = 'utf8',
compression: CompressionOptions = "infer",
quoting: int | None = None,
quotechar: str = '"',
line_terminator: str | None = None,
chunksize: int | None = None,
date_format: str | None = None,
doublequote: bool_t = True,
escapechar: str | None = None,
decimal: str = ".",
errors: str = "strict",
storage_options: StorageOptions = None,
prepare: Callable = None):
self.not_first = False
self.mode = mode
if self.mode == 'a' and isinstance(path_or_buf, str) and os.path.exists(path_or_buf):
header = False
self.header = header
self.prepare = prepare
self.kwargs = {'path_or_buf': path_or_buf,
'sep': sep,
'na_rep': na_rep,
'float_format': float_format,
'columns': columns,
'index': index,
'index_label': index_label,
'encoding': encoding,
'compression': compression,
'quoting': quoting,
'quotechar': quotechar,
'line_terminator': line_terminator,
'chunksize': chunksize,
'date_format': date_format,
'doublequote': doublequote,
'escapechar': escapechar,
'decimal': decimal,
'errors': errors,
'storage_options': storage_options}
def __call__(self, df_or_series: pd.Series | pd.DataFrame):
if self.not_first:
df_or_series.to_csv(mode=self.mode, header=self.header, **self.kwargs)
else:
if self.prepare:
result = self.prepare(df_or_series)
if result:
df_or_series = result
df_or_series.to_csv(mode=self.mode, header=self.header, **self.kwargs)
self.mode = 'a'
self.header = False