From 707997d4e17d2db437975e99714edd1321a73975 Mon Sep 17 00:00:00 2001 From: wystan_rin Date: Sun, 12 May 2024 20:18:24 +0800 Subject: [PATCH] init --- .idea/.gitignore | 8 + .idea/inspectionProfiles/Project_Default.xml | 12 ++ .../inspectionProfiles/profiles_settings.xml | 6 + .idea/modules.xml | 8 + .idea/utils.iml | 8 + lib/README.md | 5 + lib/__init__.py | 0 lib/analysis_package/__init__.py | 9 + lib/analysis_package/categorical/README.md | 30 +++ lib/analysis_package/categorical/__init__.py | 8 + .../categorical/categorical_process.py | 180 ++++++++++++++++++ .../code_template/__init__.py | 9 + .../code_template/concurrency/__init__.py | 9 + .../concurrency/producer_consumer.py | 127 ++++++++++++ .../concurrency/task_distribution.py | 28 +++ lib/analysis_package/continuous/Crime_R.csv | 48 +++++ lib/analysis_package/continuous/README.md | 29 +++ lib/analysis_package/continuous/__init__.py | 8 + lib/analysis_package/continuous/analyzer.py | 38 ++++ .../continuous/correlation.py | 155 +++++++++++++++ .../continuous/process_tool.py | 48 +++++ lib/analysis_package/preprocess/README.md | 20 ++ lib/analysis_package/preprocess/__init__.py | 8 + .../preprocess/data_insight.py | 133 +++++++++++++ lib/analysis_package/preprocess/normalizer.py | 17 ++ lib/analysis_package/preprocess/outlier.py | 51 +++++ lib/analysis_package/timeseries/README.md | 24 +++ lib/analysis_package/timeseries/__init__.py | 26 +++ .../timeseries/anomaly_detection.py | 0 .../timeseries/frequent_analysis.py | 0 .../timeseries/seasonal_detection.py | 0 .../timeseries/stationary_test.py | 62 ++++++ lib/analysis_package/timeseries/time_base.py | 133 +++++++++++++ lib/analysis_package/utils/IDcode_util.py | 53 ++++++ lib/analysis_package/utils/__init__.py | 8 + lib/analysis_package/utils/datetime_util.py | 97 ++++++++++ lib/analysis_package/utils/file_util.py | 81 ++++++++ lib/analysis_package/utils/pd_util.py | 82 ++++++++ lib/analysis_package/utils/phone_util.py | 17 ++ lib/analysis_package/utils/project_util.py | 61 ++++++ .../analysis_package-0.1.3-py3-none-any.whl | Bin 0 -> 24892 bytes lib/package_project.py | 14 ++ lib/setup.py | 36 ++++ 43 files changed, 1696 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/utils.iml create mode 100644 lib/README.md create mode 100644 lib/__init__.py create mode 100644 lib/analysis_package/__init__.py create mode 100644 lib/analysis_package/categorical/README.md create mode 100644 lib/analysis_package/categorical/__init__.py create mode 100644 lib/analysis_package/categorical/categorical_process.py create mode 100644 lib/analysis_package/code_template/__init__.py create mode 100644 lib/analysis_package/code_template/concurrency/__init__.py create mode 100644 lib/analysis_package/code_template/concurrency/producer_consumer.py create mode 100644 lib/analysis_package/code_template/concurrency/task_distribution.py create mode 100644 lib/analysis_package/continuous/Crime_R.csv create mode 100644 lib/analysis_package/continuous/README.md create mode 100644 lib/analysis_package/continuous/__init__.py create mode 100644 lib/analysis_package/continuous/analyzer.py create mode 100644 lib/analysis_package/continuous/correlation.py create mode 100644 lib/analysis_package/continuous/process_tool.py create mode 100644 lib/analysis_package/preprocess/README.md create mode 100644 lib/analysis_package/preprocess/__init__.py create mode 100644 lib/analysis_package/preprocess/data_insight.py create mode 100644 lib/analysis_package/preprocess/normalizer.py create mode 100644 lib/analysis_package/preprocess/outlier.py create mode 100644 lib/analysis_package/timeseries/README.md create mode 100644 lib/analysis_package/timeseries/__init__.py create mode 100644 lib/analysis_package/timeseries/anomaly_detection.py create mode 100644 lib/analysis_package/timeseries/frequent_analysis.py create mode 100644 lib/analysis_package/timeseries/seasonal_detection.py create mode 100644 lib/analysis_package/timeseries/stationary_test.py create mode 100644 lib/analysis_package/timeseries/time_base.py create mode 100644 lib/analysis_package/utils/IDcode_util.py create mode 100644 lib/analysis_package/utils/__init__.py create mode 100644 lib/analysis_package/utils/datetime_util.py create mode 100644 lib/analysis_package/utils/file_util.py create mode 100644 lib/analysis_package/utils/pd_util.py create mode 100644 lib/analysis_package/utils/phone_util.py create mode 100644 lib/analysis_package/utils/project_util.py create mode 100644 lib/dist/analysis_package-0.1.3-py3-none-any.whl create mode 100644 lib/package_project.py create mode 100644 lib/setup.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..06bb031 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..2213c52 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/utils.iml b/.idea/utils.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/utils.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/lib/README.md b/lib/README.md new file mode 100644 index 0000000..6208bdc --- /dev/null +++ b/lib/README.md @@ -0,0 +1,5 @@ +模块职责: +1. continuous:针对数值型数据进行特征分析 +2. categorical:针对离散型数据进行特征分析 +3. timeseries:对时序数据的分析方法 +4. pre-process:解析配置文件,在数据进入下一步前进行一定的预处理(如补充空值、采样等) \ No newline at end of file diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/analysis_package/__init__.py b/lib/analysis_package/__init__.py new file mode 100644 index 0000000..38914b1 --- /dev/null +++ b/lib/analysis_package/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +''' +@Project :IoD_data_analysis_tool +@File :__init__.py.py +@IDE :PyCharm +@Author :rengengchen +@Time :2022/8/3 17:07 +''' diff --git a/lib/analysis_package/categorical/README.md b/lib/analysis_package/categorical/README.md new file mode 100644 index 0000000..18c5570 --- /dev/null +++ b/lib/analysis_package/categorical/README.md @@ -0,0 +1,30 @@ +数值模块: +针对离散型数据进行特征分析 + +分析方法: + +1> 描述性统计: + - 记录数据中该列包含的分类 + + - 分类个数 + + - 频数表 + - 列联表 + +2> 卡方独立性检验 + +3> 信息熵 + +4> 互信息 + +功能: + +多列离散数据循环进行数据分析 + +运行环境: +python3.7.10以上 +- numpy +- pandas +- matplotlib +- sklearn +- scipy.stats diff --git a/lib/analysis_package/categorical/__init__.py b/lib/analysis_package/categorical/__init__.py new file mode 100644 index 0000000..05f96a1 --- /dev/null +++ b/lib/analysis_package/categorical/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> __init__.py +@IDE :PyCharm +@Author :rengengchen +@Date :2022/7/4 16:34 +@Desc : +""" diff --git a/lib/analysis_package/categorical/categorical_process.py b/lib/analysis_package/categorical/categorical_process.py new file mode 100644 index 0000000..2e1608a --- /dev/null +++ b/lib/analysis_package/categorical/categorical_process.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +# @Time : 2022/3/17 17:36 +# @Author : Leng Yang +# @FileName: categorical_process.py +# @Software: PyCharm + + +import pandas as pd +import numpy as np +from sklearn import metrics +from scipy.stats import chi2_contingency, chi2 + + +def test(): + pass + + +class CategorySelfDescribe(object): + """ + 描述性统计量 + """ + + def __init__(self): + pass + + @staticmethod + def category_describe(data: pd.Series) -> pd.DataFrame: + """ + 描述该列数据包含的分类名称和分类种类数量 + :param data: 输入数据,格式为pd.Series + :return: pd.DataFrame, 返回dataframe形式包含分类名称列表和分类种类数量 + Examples + -------- + >>> data1 = pd.DataFrame({'天气':['晴','晴','阴','雨'], '温度':['高','高','高','低']}) + >>> CategorySelfDescribe().category_describe('天气') + categories types + 0 [晴, 阴, 雨] 3.0 + """ + results = pd.DataFrame() + results = results.append({'categories': data.unique(), 'types': len(data.unique())}, ignore_index=True) + return results + + @staticmethod + def category_frequency(data: pd.Series) -> pd.DataFrame: + """ + 频数表 + :param data: 输入数据,格式为pd.Series + :return: pd.DataFrame, 返回频数表 + Examples + -------- + >>> data1 = pd.DataFrame({'天气':['晴','晴','阴','雨','雨','雨','阴','晴','晴','雨','晴','阴','阴','雨'], + '温度':['高','高','高','低','低','低','低','低','低','低','低','低','高','低']}) + >>> CategorySelfDescribe().category_frequency('天气') + unique_values count frequency + 0 晴 5 0.357143 + 1 雨 5 0.357143 + 2 阴 4 0.285714 + + """ + df_freq = data.value_counts(ascending=False).rename_axis('unique_values').reset_index(name='count') + df_freq['frequency'] = df_freq['count'] / len(data) + return df_freq + + +class CategorySelfAnalyse(object): + """ + 对单列分类数据进行统计分析 + """ + + def __init__(self): + pass + + @staticmethod + def entropy(data: pd.Series) -> float: + """ + 计算信息熵 + :param data: 输入数据,格式为pd.Series + :return: float, 信息熵 + """ + prob = pd.value_counts(data) / len(data) + return sum(np.log2(prob) * prob * (-1)) + + +class CategoryMutualDescribe(object): + """ + 对两列不同的分类数据进行描述性统计 + """ + + def __init__(self): + pass + + @staticmethod + def crosstab(row_data: pd.Series, col_data: pd.Series) -> pd.DataFrame: + """ + 对两列不同的分类数据进行列联表分析 + :param row_data: categorical数据1, 数据1分类作为列联表的行 + :param col_data: categorical数据2, 数据2分类作为列联表的列 + :return: pd.DataFrame, 列联表 + Examples + -------- + >>> data1 = pd.DataFrame({'天气':['晴','晴','阴','雨'], '温度':['高','高','高','低']}) + >>> CategoryMutualDescribe().crosstab('天气','温度') + 温度 高 低 + 天气 + 晴 2 0 + 阴 1 0 + 雨 0 1 + """ + return pd.crosstab(row_data, col_data) + + +class MutualCategoricalAnalyse(object): + """ + 对两列分类数据进行统计分析 + """ + + def __init__(self): + pass + + @staticmethod + def info_gain(df: pd.DataFrame, attr_col: str, data_col: str) -> float: + """ + 计算信息增益: Gain(D,A) = Ent(D) - Ent(D|A) + 使用某个特征A划分数据集D + :param df_data: 输入数据,格式为dataframe + :param attr_col: 特征数据列名 + :param data_col: 数据集列名 + :return: float, 信息增益 + """ + # e: 条件信息熵 + e1 = df.groupby(attr_col).apply(lambda x: CategorySelfAnalyse.entropy(x[data_col])) + p1 = pd.value_counts(df[attr_col]) / len(df[attr_col]) # p(x) + e2 = sum(e1 * p1) # Ent(D|A) + return CategorySelfAnalyse.entropy(df[data_col]) - e2 + + @staticmethod + def normalized_mutual_information(data1: pd.Series, data2: pd.Series) -> float: + """ + Mutual Information between two clusterings. The Mutual Information is a measure of the similarity + between two labels of the same data. + Normalized Mutual Information (NMI) is a normalization of the Mutual + Information (MI) score to scale the results between 0 (no mutual + information) and 1 (perfect correlation). + :param df_data: 输入数据,格式为dataframe + :param data1: 分类数据1 + :param data2: 分类数据2 + :return: nmi : float, score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling + """ + return metrics.normalized_mutual_info_score(data1, data2) + + @staticmethod + def chi2_independence(data1: pd.Series, data2: pd.Series, alpha=0.05) -> pd.DataFrame: + """ + 卡方独立性检验 + :param alpha: 置信度,用来确定临界值 + :param data1: categroical数据1 + :param data2: categorical数据2 + :return: pd.DataFrame,内容如下: + g: 卡方值,也就是统计量 + p: P值(统计学名词),与置信度对比,也可进行假设检验,P值小于置信度,即可拒绝原假设 + dof: 自由度 + re: 判读变量,1表示拒绝原假设,0表示接受原假设 + expctd: 原数据数组同维度的对应理论值 + """ + data = CategoryMutualDescribe.crosstab(data1, data2) + result = pd.DataFrame(columns=['g', 'p', 'dof', 'expctd']) + g, p, dof, expctd = chi2_contingency(data) + result = result.append({'g': g, 'p': p, 'dof': dof, 'expctd': expctd}, ignore_index=True) + if dof == 0: + raise ValueError('自由度应该大于等于1') + elif dof == 1: + cv = chi2.isf(alpha * 0.5, dof) # critical value + else: + cv = chi2.isf(alpha * 0.5, dof - 1) + + if g > cv: + result.loc[0, 're'] = 1 # 表示拒绝原假设 + else: + result.loc[0, 're'] = 0 # 表示接受原假设 + return result diff --git a/lib/analysis_package/code_template/__init__.py b/lib/analysis_package/code_template/__init__.py new file mode 100644 index 0000000..e2163b0 --- /dev/null +++ b/lib/analysis_package/code_template/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Project :IoD_data_analysis_tool +@File :__init__.py.py +@IDE :PyCharm +@Author :rengengchen +@Time :2022/8/5 11:52 +""" diff --git a/lib/analysis_package/code_template/concurrency/__init__.py b/lib/analysis_package/code_template/concurrency/__init__.py new file mode 100644 index 0000000..e2163b0 --- /dev/null +++ b/lib/analysis_package/code_template/concurrency/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Project :IoD_data_analysis_tool +@File :__init__.py.py +@IDE :PyCharm +@Author :rengengchen +@Time :2022/8/5 11:52 +""" diff --git a/lib/analysis_package/code_template/concurrency/producer_consumer.py b/lib/analysis_package/code_template/concurrency/producer_consumer.py new file mode 100644 index 0000000..23b4b60 --- /dev/null +++ b/lib/analysis_package/code_template/concurrency/producer_consumer.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Project :IoD_data_analysis_tool +@File :producer_consumer.py +@IDE :PyCharm +@Author :rengengchen +@Time :2022/8/5 11:53 +""" +import multiprocessing +from typing import Iterable, Callable + +from tqdm import tqdm + + +class Stop: + pass + + +class AbstractPCConcurrencySystem: + """ + @todo 对启动进程的维护 + @todo 进程数量 + """ + + def __init__(self, num_producer: int = 1, num_consumer: int = 1, num_callback: int = 0, + len_task_queue: int = 0, len_result_queue: int = 0, len_callback_queue: int = 0, + producer_lock=None, consumer_lock=None, callback_lock=None, + meta=None, enable_progressbar=False, num_total_result=None): + self.task_queue = multiprocessing.Queue(len_task_queue) + + self.num_producer = num_producer + self.num_consumer = num_consumer + self.num_callback = num_callback + self.producer_lock = producer_lock or multiprocessing.Lock() + self.consumer_lock = consumer_lock or multiprocessing.Lock() + self.meta = meta + self.enable_progressbar = enable_progressbar + if enable_progressbar and self.num_callback == 0: + self.num_callback = 1 + self.result_queue = multiprocessing.Queue(len_result_queue) + if self.num_callback: + self.callback_lock = callback_lock or multiprocessing.Lock() + self.num_total_result = num_total_result + self.callback_queue = multiprocessing.Queue(len_callback_queue) + + def get_result(self): + return self.callback_queue.get() + + def produce(self): + """ + Must return an iterable object or a Stop object. + """ + raise NotImplementedError + + def consume(self, consumer_params): + """ + @return: task result or Stop() + """ + raise NotImplementedError + + def callback(self, result): + return result + + def _produce(self): + producer = self.produce() + if isinstance(producer, Iterable): + for params in producer: + self.task_queue.put(params, block=True) + stop = Stop() + for _ in range(self.num_consumer): + self.task_queue.put(stop, block=True) + elif isinstance(producer, Callable): + while True: + task = producer() + if isinstance(task, Stop): + break + self.task_queue.put(task, block=True) + + def _consume(self): + consumer_params = self.task_queue.get(block=True) + while not isinstance(consumer_params, Stop): + info = self.consume(consumer_params) + self.result_queue.put(info) + consumer_params = self.task_queue.get(block=True) + self.result_queue.put(Stop()) + + def _callback(self): + if self.enable_progressbar: + bar = tqdm(total=self.num_total_result) + over_flag = 0 + while over_flag < self.num_consumer: + result = self.result_queue.get(block=True) + if isinstance(result, Stop): + over_flag += 1 + else: + callback = self.callback(result) + self.callback_queue.put(callback) + if self.enable_progressbar: + bar.update(1) + else: + if self.enable_progressbar: + bar.close() + + def run(self): + consumers = [] + callbackers = [] + # 创建并启动生产者 + for i in range(self.num_producer): + multiprocessing.Process(target=self._produce, name=f'producer_{i}').start() + # 创建并启动消费者 + for i in range(self.num_consumer): + p = multiprocessing.Process(target=self._consume, name=f'consumer_{i}') + consumers.append(p) + p.start() + # 处理结果 + if self.num_callback: + for i in range(self.num_callback): + p = multiprocessing.Process(target=self._callback, name=f'callback_{i}') + callbackers.append(p) + p.start() + return self + + def close(self): + self.task_queue.close() + self.result_queue.close() + self.callback_queue.close() diff --git a/lib/analysis_package/code_template/concurrency/task_distribution.py b/lib/analysis_package/code_template/concurrency/task_distribution.py new file mode 100644 index 0000000..c52299c --- /dev/null +++ b/lib/analysis_package/code_template/concurrency/task_distribution.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Project :IoD_data_analysis_tool +@File :distribute_task.py +@IDE :PyCharm +@Author :rengengchen +@Time :2022/8/8 16:55 +""" +import math +import multiprocessing + + +def equally_distributing_task(target, tasks, *args, results=None, num_processors=8): + len_tasks = len(tasks) + process_offset = math.ceil(len_tasks / num_processors) + for i in range(num_processors): + sub_tasks = tasks[i * process_offset: (i + 1) * process_offset] + if sub_tasks: + if results: + multiprocessing.Process(target=target, + args=(sub_tasks, results, *args)).start() + else: + multiprocessing.Process(target=target, + args=(sub_tasks, *args)).start() + else: + break + return results diff --git a/lib/analysis_package/continuous/Crime_R.csv b/lib/analysis_package/continuous/Crime_R.csv new file mode 100644 index 0000000..ec78801 --- /dev/null +++ b/lib/analysis_package/continuous/Crime_R.csv @@ -0,0 +1,48 @@ +CrimeRate,Youth,Southern,Education,ExpenditureYear0,LabourForce,Males,MoreMales,StateSize,YouthUnemployment,MatureUnemployment,HighYouthUnemploy,Wage,BelowWage,CrimeRate10,Youth10,Education10,ExpenditureYear10,LabourForce10,Males10,MoreMales10,StateSize10,YouthUnemploy10,MatureUnemploy10,HighYouthUnemploy10,Wage10,BelowWage10 +45.5,135,0,12.4,69,540,965,0,6,80,22,1,564,139,26.5,135,12.5,71,564,974,0,6,82,20,1,632,142 +52.3,140,0,10.9,55,535,1045,1,6,135,40,1,453,200,35.9,135,10.9,54,540,1039,1,7,138,39,1,521,210 +56.6,157,1,11.2,47,512,962,0,22,97,34,0,288,276,37.1,153,11,44,529,959,0,24,98,33,0,359,256 +60.3,139,1,11.9,46,480,968,0,19,135,53,0,457,249,42.7,139,11.8,41,497,983,0,20,131,50,0,510,235 +64.2,126,0,12.2,106,599,989,0,40,78,25,1,593,171,46.7,125,12.2,97,602,989,0,42,79,24,1,660,162 +67.6,128,0,13.5,67,624,972,0,28,77,25,1,507,206,47.9,128,13.8,60,621,983,0,28,81,24,1,571,199 +70.5,130,0,14.1,63,641,984,0,14,70,21,1,486,196,50.6,153,14.1,57,641,993,0,14,71,23,1,556,176 +73.2,143,0,12.9,66,537,977,0,10,114,35,1,487,166,55.9,143,13,63,549,973,0,11,119,36,1,561,168 +75,141,0,12.9,56,523,968,0,4,107,37,0,489,170,61.8,153,12.9,54,538,968,0,5,110,36,1,550,126 +78.1,133,0,11.4,51,599,1024,1,7,99,27,1,425,225,65.4,134,11.2,47,600,1024,1,7,97,28,1,499,215 +79.8,142,1,12.9,45,533,969,0,18,94,33,0,318,250,71.4,142,13.1,44,552,969,0,19,93,36,0,378,247 +82.3,123,0,12.5,97,526,948,0,113,124,50,0,572,158,75.4,134,12.4,87,529,949,0,117,125,49,0,639,146 +83.1,135,0,13.6,62,595,986,0,22,77,27,0,529,190,77.3,137,13.7,61,599,993,0,23,80,28,0,591,189 +84.9,121,0,13.2,118,547,964,0,25,84,29,0,689,126,78.6,132,13.3,115,538,968,0,25,82,30,0,742,127 +85.6,166,1,11.4,58,521,973,0,46,72,26,0,396,237,80.6,153,11.2,54,543,983,0,47,76,25,1,568,246 +88,140,0,12.9,71,632,1029,1,7,100,24,1,526,174,82.2,130,12.9,68,620,1024,1,8,104,25,1,570,182 +92.3,126,0,12.7,74,602,984,0,34,102,33,1,557,195,87.5,134,12.9,67,599,982,0,33,107,34,1,621,199 +94.3,130,0,13.3,128,536,934,0,51,78,34,0,627,135,92.9,127,13.3,128,530,949,0,52,79,33,0,692,140 +95.3,125,0,12,90,586,964,0,97,105,43,0,617,163,94.1,134,11.9,81,571,971,0,99,106,41,0,679,162 +96.8,151,1,10,58,510,950,0,33,108,41,0,394,261,96.2,161,10.1,56,515,1001,1,32,110,40,0,465,254 +97.4,152,1,10.8,57,530,986,0,30,92,43,0,405,264,97.8,152,11,53,541,989,0,30,92,41,0,470,243 +98.7,162,1,12.1,75,522,996,0,40,73,27,0,496,224,99.9,162,12,70,533,992,0,41,80,28,0,562,229 +99.9,149,1,10.7,61,515,953,0,36,86,35,0,395,251,101.4,150,10.7,54,520,952,0,35,84,32,0,476,249 +103,177,1,11,58,638,974,0,24,76,28,0,382,254,103.5,164,10.9,56,638,978,0,25,79,28,0,456,257 +104.3,134,0,12.5,75,595,972,0,47,83,31,0,580,172,104.5,133,12.7,71,599,982,0,50,87,32,0,649,182 +105.9,130,0,13.4,90,623,1049,1,3,113,40,0,588,160,106.4,153,13.4,91,622,1050,1,3,119,41,0,649,159 +106.6,157,1,11.1,65,553,955,0,39,81,28,0,421,239,107.8,156,11.2,62,562,956,0,39,85,29,0,499,243 +107.2,148,0,13.7,72,601,998,0,9,84,20,1,590,144,110.1,134,13.9,66,602,999,0,9,87,15,0,656,151 +108.3,126,0,13.8,97,542,990,0,18,102,35,0,589,166,110.5,126,13.8,97,549,993,0,19,103,34,1,659,160 +109.4,135,1,11.4,123,537,978,0,31,89,34,0,631,165,113.5,134,11.3,115,529,978,0,32,93,35,0,703,175 +112.1,142,1,10.9,81,497,956,0,33,116,47,0,427,247,116.3,147,10.7,77,501,962,0,33,117,44,0,500,256 +114.3,127,1,12.8,82,519,982,0,4,97,38,0,620,168,119.7,125,12.9,79,510,945,0,4,99,39,0,696,170 +115.1,131,0,13.7,78,574,1038,1,7,142,42,1,540,176,124.5,134,13.6,73,581,1029,1,7,143,41,1,615,177 +117.2,136,0,12.9,95,574,1012,1,29,111,37,1,622,162,127.8,140,13,96,581,1011,1,29,115,36,1,691,169 +119.7,119,0,11.9,166,521,938,0,168,92,36,0,637,154,129.8,120,11.9,157,524,935,0,180,93,27,1,698,169 +121.6,147,1,13.9,63,560,972,0,23,76,24,1,462,233,130.7,139,14,64,571,970,0,24,78,24,1,511,220 +123.4,145,1,11.7,82,560,981,0,96,88,31,0,488,228,132.5,154,11.8,74,563,980,0,99,89,29,1,550,230 +127.2,132,0,10.4,87,564,953,0,43,83,32,0,513,227,134.6,135,10.2,83,560,948,0,44,83,32,0,589,234 +132.4,152,0,12,82,571,1018,1,10,103,28,1,537,215,137.5,151,12.1,76,567,1079,1,11,105,27,1,617,204 +135.5,125,0,12.5,113,567,985,0,78,130,58,0,626,166,140.5,140,12.5,105,571,993,0,77,131,59,0,684,174 +137.8,141,0,14.2,109,591,985,0,18,91,20,1,578,174,145.7,142,14.2,101,590,987,0,19,94,19,1,649,180 +140.8,150,0,12,109,531,964,0,9,87,38,0,559,153,150.6,153,12,98,539,982,0,10,88,36,0,635,151 +145.4,131,1,12.2,115,542,969,0,50,79,35,0,472,206,157.3,131,12.1,109,548,976,0,52,82,34,0,539,219 +149.3,143,0,12.3,103,583,1012,1,13,96,36,0,557,194,162.7,142,12.2,95,612,1003,1,13,97,36,0,625,196 +154.3,124,0,12.3,121,580,966,0,101,77,35,0,657,170,169.6,134,12.2,116,580,987,0,104,79,36,0,719,172 +157.7,136,0,15.1,149,577,994,0,157,102,39,0,673,167,177.2,140,15.2,141,578,995,0,160,110,40,0,739,169 +161.8,131,0,13.2,160,631,1071,1,3,102,41,0,674,152,178.2,132,13.2,143,632,1058,1,4,100,40,0,748,150 diff --git a/lib/analysis_package/continuous/README.md b/lib/analysis_package/continuous/README.md new file mode 100644 index 0000000..a0ccfe6 --- /dev/null +++ b/lib/analysis_package/continuous/README.md @@ -0,0 +1,29 @@ +# **Numerical data analysis and process tools** + + +### **Project Description**: + +- Numerical data correlation analysis and processing, using image visualization to help understanding. + + + +#### Numerical analysis tools part + +- Spearman_correlation is to determine whether there is a Monotonic component between two features, +which can be apply only for non_linear relationship and ordinal data. + +#### Numercial process tools part + +- Detecting outlier by using the Interquartile range(IQR). +- When highly correlated features will be used to remove. + + + +#### How to use the tools + +Input an only numerical data (data type:DataFrame). + + + + + diff --git a/lib/analysis_package/continuous/__init__.py b/lib/analysis_package/continuous/__init__.py new file mode 100644 index 0000000..05f96a1 --- /dev/null +++ b/lib/analysis_package/continuous/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> __init__.py +@IDE :PyCharm +@Author :rengengchen +@Date :2022/7/4 16:34 +@Desc : +""" diff --git a/lib/analysis_package/continuous/analyzer.py b/lib/analysis_package/continuous/analyzer.py new file mode 100644 index 0000000..ea90a7f --- /dev/null +++ b/lib/analysis_package/continuous/analyzer.py @@ -0,0 +1,38 @@ +import os + +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt +from scipy.stats import spearmanr +import logging + +logger = logging.getLogger(__name__) + + +def Spearman_rank_test(data_frame, feature_a, feature_b, save_path, file_name, sample_size=4000): + """ + Spearman_correlation is to determine whether there is a + Monotonic component between two features, which can be apply + only for non_linear relationship and ordinal data + + @param feature_a: Input first feature for Spearman's rank test + @param feature_b: Input second feature for Spearman's rank test + @param sample_size: Choose a sample for representing the population + @param:save_path: output path + @param:file_name: output name + + """ + a = data_frame[feature_a].sample(n=sample_size, random_state=1) + b = data_frame[feature_b].sample(n=sample_size, random_state=1) + coef, p = spearmanr(a, b) + logger.info("Spearmans' correlation coefficient is:" + str(coef)) + alpha = 0.05 + plt.scatter(a, b) + plt.xlabel("Feature A") + plt.ylabel("Feature B") + plt.title("Spearman's Rank Test") + plt.savefig(os.path.join(save_path, file_name)) + if p > alpha: + logger.info("Feature are uncorrelated(failed to reject H0) p=" + str(p)) + else: + logger.info("Features have a monotonic relationship(reject H0) p=" + str(p)) diff --git a/lib/analysis_package/continuous/correlation.py b/lib/analysis_package/continuous/correlation.py new file mode 100644 index 0000000..2e99090 --- /dev/null +++ b/lib/analysis_package/continuous/correlation.py @@ -0,0 +1,155 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> correlation +@IDE :PyCharm +@Author :rengengchen +@Date :2022/7/4 16:48 +@Desc : +""" +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from scipy.stats import spearmanr + + +def spearmanr(a: pd.Series, b: pd.Series = None, axis=0, nan_policy='propagate', + alternative='two-sided', sample_size=4000, random_state=None): + """Calculate a Spearman correlation coefficient with associated p-value. + + The Spearman rank-order correlation coefficient is a nonparametric measure + of the monotonicity of the relationship between two datasets. Unlike the + Pearson correlation, the Spearman correlation does not assume that both + datasets are normally distributed. Like other correlation coefficients, + this one varies between -1 and +1 with 0 implying no correlation. + Correlations of -1 or +1 imply an exact monotonic relationship. Positive + correlations imply that as x increases, so does y. Negative correlations + imply that as x increases, y decreases. + + The p-value roughly indicates the probability of an uncorrelated system + producing datasets that have a Spearman correlation at least as extreme + as the one computed from these datasets. The p-values are not entirely + reliable but are probably reasonable for datasets larger than 500 or so. + + Parameters + ---------- + a, b : 1D or 2D array_like, b is optional + One or two 1-D or 2-D arrays containing multiple variables and + observations. When these are 1-D, each represents a vector of + observations of a single variable. For the behavior in the 2-D case, + see under ``axis``, below. + Both arrays need to have the same length in the ``axis`` dimension. + axis : int or None, optional + If axis=0 (default), then each column represents a variable, with + observations in the rows. If axis=1, the relationship is transposed: + each row represents a variable, while the columns contain observations. + If axis=None, then both arrays will be raveled. + nan_policy : {'propagate', 'raise', 'omit'}, optional + Defines how to handle when input contains nan. + The following options are available (default is 'propagate'): + + * 'propagate': returns nan + * 'raise': throws an error + * 'omit': performs the calculations ignoring nan values + + alternative : {'two-sided', 'less', 'greater'}, optional + Defines the alternative hypothesis. Default is 'two-sided'. + The following options are available: + + * 'two-sided': the correlation is nonzero + * 'less': the correlation is negative (less than zero) + * 'greater': the correlation is positive (greater than zero) + + sample_size : int, optional + Number of items from column to return. Default is 4000. + + random_state : int, array-like, BitGenerator, np.random.RandomState, optional + If int, array-like, or BitGenerator (NumPy>=1.17), seed for + random number generator + If np.random.RandomState, use as numpy RandomState object. + + Returns + ------- + correlation : float or ndarray (2-D square) + Spearman correlation matrix or correlation coefficient (if only 2 + variables are given as parameters. Correlation matrix is square with + length equal to total number of variables (columns or rows) in ``a`` + and ``b`` combined. + pvalue : float + The p-value for a hypothesis test whose null hypotheisis + is that two sets of data are uncorrelated. See `alternative` above + for alternative hypotheses. `pvalue` has the same + shape as `correlation`. + + References + ---------- + .. [1] Zwillinger, D. and Kokoska, S. (2000). CRC Standard + Probability and Statistics Tables and Formulae. Chapman & Hall: New + York. 2000. + Section 14.7 + + Examples + -------- + >>> from scipy import stats + >>> stats.spearmanr([1,2,3,4,5], [5,6,7,8,7]) + SpearmanrResult(correlation=0.82078..., pvalue=0.08858...) + >>> rng = np.random.default_rng() + >>> x2n = rng.standard_normal((100, 2)) + >>> y2n = rng.standard_normal((100, 2)) + >>> stats.spearmanr(x2n) + SpearmanrResult(correlation=-0.07960396039603959, pvalue=0.4311168705769747) + >>> stats.spearmanr(x2n[:,0], x2n[:,1]) + SpearmanrResult(correlation=-0.07960396039603959, pvalue=0.4311168705769747) + >>> rho, pval = stats.spearmanr(x2n, y2n) + >>> rho + array([[ 1. , -0.07960396, -0.08314431, 0.09662166], + [-0.07960396, 1. , -0.14448245, 0.16738074], + [-0.08314431, -0.14448245, 1. , 0.03234323], + [ 0.09662166, 0.16738074, 0.03234323, 1. ]]) + >>> pval + array([[0. , 0.43111687, 0.41084066, 0.33891628], + [0.43111687, 0. , 0.15151618, 0.09600687], + [0.41084066, 0.15151618, 0. , 0.74938561], + [0.33891628, 0.09600687, 0.74938561, 0. ]]) + >>> rho, pval = stats.spearmanr(x2n.T, y2n.T, axis=1) + >>> rho + array([[ 1. , -0.07960396, -0.08314431, 0.09662166], + [-0.07960396, 1. , -0.14448245, 0.16738074], + [-0.08314431, -0.14448245, 1. , 0.03234323], + [ 0.09662166, 0.16738074, 0.03234323, 1. ]]) + >>> stats.spearmanr(x2n, y2n, axis=None) + SpearmanrResult(correlation=0.044981624540613524, pvalue=0.5270803651336189) + >>> stats.spearmanr(x2n.ravel(), y2n.ravel()) + SpearmanrResult(correlation=0.044981624540613524, pvalue=0.5270803651336189) + + >>> rng = np.random.default_rng() + >>> xint = rng.integers(10, size=(100, 2)) + >>> stats.spearmanr(xint) + SpearmanrResult(correlation=0.09800224850707953, pvalue=0.3320271757932076) + + """ + # a = a.sample(n=sample_size, random_state=random_state) + # if b: + # b = b.sample(n=sample_size, random_state=random_state) + return spearmanr(a, b, axis=axis, nan_policy=nan_policy, alternative=alternative) + + +def corr(df, method='pearson', drop=False, threshold=0, plot=True, filepath=None, figsize=None): + plt.rcParams['font.sans-serif'] = ['SimHei'] + plt.rcParams['axes.unicode_minus'] = False + cmap = sns.diverging_palette(250, 15, s=95, l=40, n=9, center="light", as_cmap=True) + cov = df.corr(method=method) + if drop: + uncorr = ~np.any(np.abs(np.tril(cov, k=-1)) > threshold, axis=1) + cov = cov[uncorr] + cov = cov[cov.index] + if plot or filepath: + mask = np.triu(np.ones_like(cov, dtype=bool)) + fig, ax = plt.subplots(figsize=figsize) + sns.heatmap(cov, mask=mask, center=0, annot=True, fmt='.2f', cmap=cmap, square=True, ax=ax) + plt.title("相关性矩阵") + if filepath: + plt.savefig(filepath) + if plot: + plt.show() + return cov diff --git a/lib/analysis_package/continuous/process_tool.py b/lib/analysis_package/continuous/process_tool.py new file mode 100644 index 0000000..50f8b92 --- /dev/null +++ b/lib/analysis_package/continuous/process_tool.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +# @Time : 2022/3/25 9:09 +# @Software : PyCharm +# @File : process_tool.py +# @Author : QT +# @Email : taoqimin@sics.ac.cn +import numpy as np +from tqdm import tqdm +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(level=logging.INFO) +handler = logging.FileHandler("log.txt") +handler.setLevel(logging.INFO) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +logger.addHandler(handler) + +console = logging.StreamHandler() +console.setLevel(logging.INFO) + +logger.addHandler(handler) +logger.addHandler(console) + + +class NumericProcess: + @staticmethod + def drop_feature(data_frame, thresh_hold): + """ + A function for detecting and dropping highly correlated features. + when two variables are highly correlated, it usually cause problem + such as Multicolinearity. The following function will be used to + remove the correlated features. + + @param data_frame: Input dataframe + @param thresh_hold: A Number from -1 to 1 to determine whether features will be dropped at certain correlation level + + """ + + matrix = data_frame.corr().abs() + mask = np.triu(np.ones_like(matrix, dtype=bool)) + reduced_matrix = matrix.mask(mask) + feature_drop = [c for c in tqdm(reduced_matrix) if + any(reduced_matrix[c] > thresh_hold)] + data_frame.drop(feature_drop, axis=1, inplace=True) + logger.info("The following features are dropped due to Multicollinearity:" + str(feature_drop)) + return data_frame diff --git a/lib/analysis_package/preprocess/README.md b/lib/analysis_package/preprocess/README.md new file mode 100644 index 0000000..aac6bab --- /dev/null +++ b/lib/analysis_package/preprocess/README.md @@ -0,0 +1,20 @@ +解析配置文件,在数据进入下一步前进行一定的预处理(如补充空值、采样等) + + + +目前完成了Pre-process Lib的部分预处理功能,如下: + +- data_insight + - DuplicateInsight - 重复数据的检测 + - NullInsight - 空值数据的检测 + - ValidationInsight - 数据有效性检测 +- data_process + - FilteringProcessor - 数据过滤 + + + +另外: + +- TypeInsight - 其中对date日期的检验方法还未完成 + +还未完成 \ No newline at end of file diff --git a/lib/analysis_package/preprocess/__init__.py b/lib/analysis_package/preprocess/__init__.py new file mode 100644 index 0000000..0bf2fbc --- /dev/null +++ b/lib/analysis_package/preprocess/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> __init__.py +@IDE :PyCharm +@Author :rengengchen +@Date :2022/4/26 10:40 +@Desc : +""" diff --git a/lib/analysis_package/preprocess/data_insight.py b/lib/analysis_package/preprocess/data_insight.py new file mode 100644 index 0000000..508c00c --- /dev/null +++ b/lib/analysis_package/preprocess/data_insight.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# file: data_insight +# author: shenwentao, wangkanglong +# description: +# date: 2022-03-30 16:45 +# IDE: PyCharm + +import pandas as pd +import datetime +from typing import List, Union +from pandas.core.dtypes.api import is_bool_dtype, is_float_dtype, is_integer_dtype, is_string_dtype, is_datetime64_dtype + +from iod_data_analysis_tool.utils.assertion import assert_range + + +class DuplicateInsight: + + @staticmethod + def num_duplicate(data, subset=None, keep='first') -> pd.DataFrame: + """ + 用户自定义重复数据的计数 + :param data: 来源数据 + :param subset: 选中列/字段,同pd.DataFrame里的dulplicated函数subset参数 + :param keep: 确定要标记的重复项(如果有)。同pd.DataFrame里的dulplicated函数keep参数 + :return: 返回计数结果 + """ + result = data.duplicated(subset, keep=keep).sum() + return pd.DataFrame([result], columns=['duplicate_num']) + + +class NullInsight: + + @staticmethod + def num_null(data, column: str = None) -> pd.DataFrame: + """ + 用户自定义计数数据中的空值 + :param data: 来源数据 + :param column: 选中列/字段 + :return: 返回计数结果 + """ + if column is not None: + return pd.DataFrame([data[column].isna().sum()], columns=['null_num'], index=[column]) + else: + return pd.DataFrame(data.isna().sum(), columns=['null_num']) + + +class ValidationInsight: + """ + 自定义验证数据有效性,比如数据里有坏数,针对不同类型的数据限定范围 + """ + + @staticmethod + def validation_continuous_range(data: pd.DataFrame, column: str, + min_val: Union[int, float], max_val: Union[int, float]) -> pd.DataFrame: + """ + 用户自定义对连续数值型数据进行验证,返回数据在指定范围内外的计数结果 + :param data: 来源数据 + :param column: 选中列/字段 + :param min_val: 范围最小值 + :param max_val: 范围最大值 + :return: 计数结果 + """ + assert_range(min_val, max_val) + nums = dict() + nums['column'] = column + nums['num_lt_min'] = data.query(f'{column} < {min_val}').shape[0] + nums['num_gt_max'] = data.query(f'{column} > {max_val}').shape[0] + nums['num_within_range'] = data.shape[0] - nums['num_lt_min'] - nums['num_gt_max'] + return pd.DataFrame([nums], index=['result']) + + @staticmethod + def validation_categorical_range(data, column: str, values: List) -> pd.DataFrame: + """ + 用户自定义对离散型数据进行验证,返回数据在指定范围内外的计数结果 + :param data: 来源数据 + :param column: 选中列/字段 + :param values: 用户自定义的离散值,也就是数值所在的"范围" + :return: 计数结果 + """ + nums = dict() + nums['column'] = column + nums['num_within_range'] = data[data[column].isin(values)].shape[0] + nums['num_out_range'] = len(data[column]) - nums['num_within_range'] + return pd.DataFrame([nums], index=['result']) + + @staticmethod + def validation_date_range(data, column: str, start_date: datetime.date, + end_date: datetime.date) -> pd.DataFrame: + """ + 用户自定义对日期型数据范围进行验证,返回数据在指定范围内外的计数结果,前提:数据类型是 datetime.date + :param data: 来源数据 + :param column: 选中列/字段 + :param start_date: 开始日期 + :param end_date: 结束日期 + :return: 计数结果 + """ + assert_range(start_date, end_date) + nums = dict() + nums['column'] = column + nums['date_lt_start'] = sum(data[column] < start_date) + nums['date_gt_end'] = sum(data[column] > end_date) + nums['date_within_range'] = data.shape[0] - nums['date_lt_start'] - nums['date_lt_start'] + return pd.DataFrame([nums], index=['result']) + + +class TypeInsight: + """ + 使用户能够检测数据的数据类型是否为自己所预期的 + """ + + # TODO: 还缺一个timestamp checker + _checkers = { + 'int': is_integer_dtype, + 'float': is_float_dtype, + 'string': is_string_dtype, + 'bool': is_bool_dtype, + 'datetime': is_datetime64_dtype + } + + @staticmethod + def type_check(data, column: str, check_type: str) -> pd.DataFrame: + """ + 用户检测数据类型是否为自己所需要的类型 + :param data: 来源数据 + :param column: 选中的列/字段 + :param check_type: 选择检测的数据类型,{'int', 'float', 'string', 'bool', 'datetime'} + :return: 检测结果 + """ + flag = True + if not TypeInsight._checkers[check_type](data[column]): + flag = False + return pd.DataFrame([flag], columns=['result'], index=[column]) diff --git a/lib/analysis_package/preprocess/normalizer.py b/lib/analysis_package/preprocess/normalizer.py new file mode 100644 index 0000000..6b60b57 --- /dev/null +++ b/lib/analysis_package/preprocess/normalizer.py @@ -0,0 +1,17 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> normalizer +@IDE :PyCharm +@Author :rengengchen +@Date :2022/4/26 10:40 +@Desc : +""" +import pandas as pd +from scipy.stats import zscore as scipy_zscore + + +def zscore(a, axis=0, ddof=0, nan_policy='propagate'): + """ + Zi = (Xi - μ) / σ + """ + return scipy_zscore(a, axis, ddof, nan_policy) diff --git a/lib/analysis_package/preprocess/outlier.py b/lib/analysis_package/preprocess/outlier.py new file mode 100644 index 0000000..1cf3492 --- /dev/null +++ b/lib/analysis_package/preprocess/outlier.py @@ -0,0 +1,51 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> outlierprocessing +@IDE :PyCharm +@Author :rengengchen +@Date :2022/4/26 10:24 +@Desc : +""" +from typing import Union + +import pandas as pd + + +def MAD(data: pd.Series, n: float = 2.5, constant=1.4826, axis=0): + """ + MAD = median(|Xi - median(X)|) + @return pandas.Index + """ + x = data.median() + MC = (data - x).abs().median() + MAD = MC * constant + offset = n * MAD + if isinstance(data, pd.DataFrame): + return data.clip(lower=x - offset, upper=x + offset, axis=axis) + else: + return data.clip(lower=x - offset, upper=x + offset) + + +def three_sigma(data: pd.Series): + miu = data.mean() + sigma = data.std() + low = miu - 3 * sigma + up = miu + 3 * sigma + return data.index[(data < low) | (data > up)] + + +def box_plot(data: pd.Series, q1: float = 0.25, q3: float = 0.75, k: float = 1.5): + q = data.quantile(q=[q1, q3]) + IQR = q[q3] - q[q1] + lower_whisker_limit = q[q1] - k * IQR + upper_whisker_limit = q[q3] + k * IQR + return data.index[(data < lower_whisker_limit) | (data > upper_whisker_limit)] + + +def regex_match(data: pd.Series, *patterns): + pattern = '|'.join(patterns) + return data.index[data.astype(str).str.contains(pattern, regex=True)] + + +def empty(data: Union[pd.Series, pd.DataFrame]): + return any(data.isnull()) diff --git a/lib/analysis_package/timeseries/README.md b/lib/analysis_package/timeseries/README.md new file mode 100644 index 0000000..1a8b0e9 --- /dev/null +++ b/lib/analysis_package/timeseries/README.md @@ -0,0 +1,24 @@ +## 对时序数据的分析方法 + +-------- + +|模块|涉及方法| +| ---- | ---- | +|基础模块| | +|平稳性| | +|异常检测| | +|频率检测| | +|周期性检测| | +|其他| | + +### 基础模块 + +### 平稳性 + +### 异常检测 + +### 频率检测 + +### 周期性检测 + +### 其他 diff --git a/lib/analysis_package/timeseries/__init__.py b/lib/analysis_package/timeseries/__init__.py new file mode 100644 index 0000000..5a55468 --- /dev/null +++ b/lib/analysis_package/timeseries/__init__.py @@ -0,0 +1,26 @@ +import pandas as pd + + +def describe_datetime_info(data: pd.Series, datetime_is_numeric: bool = False) -> pd.Series: + """ + if the type of data is str and data dont have date, it will be populated by the + date of today. + @param data: data + @param datetime_is_numeric : bool, default False + Whether to treat datetime dtypes as numeric. This affects statistics + calculated for the column. For DataFrame input, this also + controls whether datetime columns are included by default. + @return: Summary statistics of the Series. + @example: Describing a numeric ``Series``. + + >>> s = pd.read_csv() + >>> s.describe() + count 1427132 + unique 25111 + top 2022-04-26 09:25:00.260000 + freq 32994 + first 2022-04-26 09:25:00 + last 2022-04-26 09:34:46.340000 + Name: TradTime, dtype: object + """ + return pd.to_datetime(data).describe(datetime_is_numeric=datetime_is_numeric) diff --git a/lib/analysis_package/timeseries/anomaly_detection.py b/lib/analysis_package/timeseries/anomaly_detection.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/analysis_package/timeseries/frequent_analysis.py b/lib/analysis_package/timeseries/frequent_analysis.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/analysis_package/timeseries/seasonal_detection.py b/lib/analysis_package/timeseries/seasonal_detection.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/analysis_package/timeseries/stationary_test.py b/lib/analysis_package/timeseries/stationary_test.py new file mode 100644 index 0000000..3bbd224 --- /dev/null +++ b/lib/analysis_package/timeseries/stationary_test.py @@ -0,0 +1,62 @@ +import pandas as pd +import numpy as np +from time_base import timeBase + +import statsmodels.api as sm +import statsmodels.tsa.api as smt +import statsmodels.formula as smf + +import scipy.stats as scs + + +class stationaryTest(Time_base): + """ + 时间序列稳定性检验 + """ + def __init__(self): + pass + + def test_stationary(self, x, window_size): + """ + 时间序列稳定性检验 + x : 时间序列数据 + window_size : 窗口大小 + """ + x_ma = self.moving_average(x, window_size) + x_std = self.moving_std(x, window_size) + x_max = self.moving_max(x, window_size) + x_min = self.moving_min(x, window_size) + x_median = self.moving_median(x, window_size) + x_normalized = self.normalize(x) + x_ma_normalized = self.normalize(x_ma) + x_std_normalized = self.normalize(x_std) + x_max_normalized = self.normalize(x_max) + x_min_normalized = self.normalize(x_min) + x_median_normalized = self.normalize(x_median) + x_normalized_ma_normalized = self.normalize(x_normalized - x_ma_normalized) + x_normalized_std_normalized = self.normalize(x_normalized - x_std_normalized) + x_normalized_max_normalized = self.normalize(x_normalized - x_max_normalized) + x_normalized_min_normalized = self.normalize(x_normalized - x_min_normalized) + x_normalized_median_normalized = self.normalize(x_normalized - x_median_normalized) + x_normalized_ma_normalized_std_normalized = self.normalize(x_normalized_ma_normalized - x_std) + + return x_normalized, x_ma_normalized, x_std_normalized, x_max_normalized, x_min_normalized, x_median_normalized, x_normalized_ma_normalized, x_normalized_std_normalized, x_normalized_max_normalized, x_normalized_min_normalized, x_normalized_median_normalized, x_normalized_ma_normalized_std_normalized + + def adf_test(self, x, window_size): + """ + 时间序列稳定性检验 + x : 时间序列数据 + window_size : 窗口大小 + """ + x_normalized, x_ma_normalized, x_std_normalized, x_max_normalized, x_min_normalized, x_median_normalized, x_normalized_ma_normalized, x_normalized_std_normalized, x_normalized_max_normalized, x_normalized_min_normalized, x_normalized_median_normalized, x_normalized_ma_normalized_std_normalized = self.test_stationary(x, window_size) + adf_test_normalized = smt.adfuller(x_normalized) + adf_test_ma_normalized = smt.adfuller(x_ma_normalized) + adf_test_std_normalized = smt.adfuller(x_std_normalized) + adf_test_max_normalized = smt.adfuller(x_max_normalized) + adf_test_min_normalized = smt.adfuller(x_min_normalized) + adf_test_median_normalized = smt.adfuller(x_median_normalized) + adf_test_normalized_ma_normalized = smt.adfuller(x_normalized_ma_normalized) + adf_test_normalized_std_normalized = smt.adfuller(x_normalized_std_normalized) + adf_test_normalized_max_normalized = smt.adfuller(x_normalized_max_normalized) + adf_test_normalized_min_normalized = smt.adfuller(x_normalized_min_normalized) + return adf_test_normalized, adf_test_ma_normalized, adf_test_std_normalized, adf_test_max_normalized, adf_test_min_normalized, adf_test_median_normalized, adf_test_normalized_ma_normalized, adf_test_normalized_std_normalized, adf_test_normalized_max_normalized, adf_test_normalized_min_normalized diff --git a/lib/analysis_package/timeseries/time_base.py b/lib/analysis_package/timeseries/time_base.py new file mode 100644 index 0000000..87f3640 --- /dev/null +++ b/lib/analysis_package/timeseries/time_base.py @@ -0,0 +1,133 @@ + +import pandas as pd +import numpy as np + + +class Time_base(object): + """ + 时间序列基础模块 + """ + def __init__(self): + pass + + @staticmethod + def normalize(x): + """ + 将时间序列数据归一化 + x : 时间序列数据 + """ + x = np.array(x) + return np.log2(x / np.sqrt(np.sum(x**2))) + + @staticmethod + def lag(x, lag): + """ + 滞后 + x : 时间序列数据 + lag : 滞后时间 + """ + return pd.Series(x).shift(lag) + + @staticmethod + def moving_average(x, window_size): + """ + 移动平均窗口 + x : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).mean() + + @staticmethod + def moving_median(x, window_size): + """ + 移动中值窗口 + x : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).median() + + @staticmethod + def moving_std(x, window_size): + """ + 移动标准差窗口 + x : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).std() + + @staticmethod + def moving_max(x, window_size): + """ + 移动最大值窗口 + x : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).max() + + @staticmethod + def moving_min(x, window_size): + """ + 移动最小值窗口 + x : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).min() + + @staticmethod + def moving_sum(x, window_size): + """ + 移动和窗口 + x : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).sum() + + @staticmethod + def moving_quantile(x, window_size, quantile): + """ + 移动分位数窗口 + x : 时间序列数据 + window_size : 窗口大小 + quantile : 分位数 + """ + return pd.Series(x).rolling(window_size).quantile(quantile) + + @staticmethod + def moving_corr(x, y, window_size): + """ + 移动相关窗口 + x : 时间序列数据 + y : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).corr(pd.Series(y)) + + @staticmethod + def moving_cov(x, y, window_size): + """ + 移动协方差窗口 + x : 时间序列数据 + y : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).cov(pd.Series(y)) + + @staticmethod + def moving_skew(x, window_size): + """ + 移动偏度窗口 + x : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).skew() + + @staticmethod + def moving_kurt(x, window_size): + """ + 移动峰度窗口 + x : 时间序列数据 + window_size : 窗口大小 + """ + return pd.Series(x).rolling(window_size).kurt() + + \ No newline at end of file diff --git a/lib/analysis_package/utils/IDcode_util.py b/lib/analysis_package/utils/IDcode_util.py new file mode 100644 index 0000000..252796e --- /dev/null +++ b/lib/analysis_package/utils/IDcode_util.py @@ -0,0 +1,53 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> ID_code +@IDE :PyCharm +@Author :rengengchen +@Date :2022/5/17 16:00 +@Desc : +""" +import re + +re_ID = re.compile(r'^\d{6}(?:18|19|20)?\d{2}(?:0[1-9]|1[012])(?:(?:[0-2][1-9])|10|20|30|31)\d{3}[0-9xX]$') + + +def validate_identity_code(code: str): + """ + 身份证格式校验 + :param code: + :return: + """ + city = {'11': "北京", '12': "天津", '13': "河北", '14': "山西", '15': "内蒙古", '21': "辽宁", '22': "吉林", '23': "黑龙江 ", + '31': "上海", '32': "江苏", '33': "浙江", '34': "安徽", '35': "福建", '36': "江西", '37': "山东", '41': "河南", '42': "湖北 ", + '43': "湖南", '44': "广东", '45': "广西", '46': "海南", '50': "重庆", '51': "四川", '52': "贵州", '53': "云南", '54': "西藏 ", + '61': "陕西", '62': "甘肃", '63': "青海", '64': "宁夏", '65': "新疆", '71': "台湾", '81': "香港", '82': "澳门", '91': "国外 "} + tip = "" + p = True + + if re_ID.match(code) is None: + tip = "身份证号格式错误" + p = False + + + elif not city[code[:2]]: + tip = "地址编码错误" + p = False + else: + # 18位身份证需要验证最后一位校验位 + if len(code) == 18: + code = code.split('') + # ∑(ai × Wi)(mod 11) + # 加权因子 + factor = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] + # 校验位 + parity = [1, 0, 'X', 9, 8, 7, 6, 5, 4, 3, 2] + sum = 0 + for i in range(17): + ai = code[i] + wi = factor[i] + sum += ai * wi + i += 1 + if parity[sum % 11] != code[17]: + tip = "校验位错误" + p = False + return p, tip diff --git a/lib/analysis_package/utils/__init__.py b/lib/analysis_package/utils/__init__.py new file mode 100644 index 0000000..0784ae5 --- /dev/null +++ b/lib/analysis_package/utils/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> __init__.py +@IDE :PyCharm +@Author :rengengchen +@Date :2022/5/17 15:59 +@Desc : +""" diff --git a/lib/analysis_package/utils/datetime_util.py b/lib/analysis_package/utils/datetime_util.py new file mode 100644 index 0000000..6da2846 --- /dev/null +++ b/lib/analysis_package/utils/datetime_util.py @@ -0,0 +1,97 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> timeutil +@IDE :PyCharm +@Author :rengengchen +@Date :2022/4/26 10:02 +@Desc : +""" +import datetime +import types +import typing + +from dateutil import parser + + +class cnparserinfo(parser.parserinfo): + """ + 匹配中文日期格式 + 用法: + from dateutil import parser + parser.parse('1998年12月11日 8点20分30秒', cnparserinfo()) + """ + parser.parserinfo.JUMP.extend('年月日') + WEEKDAYS = [list(weekdays) for weekdays in parser.parserinfo.WEEKDAYS] + WEEKDAYS[0].extend(('星期一', '周一')) + WEEKDAYS[1].extend(('星期二', '周二')) + WEEKDAYS[2].extend(('星期三', '周三')) + WEEKDAYS[3].extend(('星期四', '周四')) + WEEKDAYS[4].extend(('星期五', '周五')) + WEEKDAYS[5].extend(('星期六', '周六')) + WEEKDAYS[6].extend(('星期天', '周日', '周天', '周末')) + WEEKDAYS = [tuple(weekdays) for weekdays in WEEKDAYS] + + # MONTHS = [list(months) for months in parser.parserinfo.MONTHS] + # MONTHS[0].extend(('一月', '1月')) + # MONTHS[1].extend(('二月', '2月')) + # MONTHS[2].extend(('三月', '3月')) + # MONTHS[3].extend(('四月', '4月')) + # MONTHS[4].extend(('五月', '5月')) + # MONTHS[5].extend(('六月', '6月')) + # MONTHS[6].extend(('七月', '7月')) + # MONTHS[7].extend(('八月', '8月')) + # MONTHS[8].extend(('九月', '9月')) + # MONTHS[9].extend(('十月', '10月')) + # MONTHS[10].extend(('十一月', '11月')) + # MONTHS[11].extend(('十二月', '12月')) + # MONTHS = [tuple(months) for months in MONTHS] + + HMS = [list(hms) for hms in parser.parserinfo.HMS] + HMS[0].extend('时点') + HMS[1].append('分') + HMS[2].append('秒') + HMS = [tuple(hms) for hms in HMS] + + AMPM = [list(ampm) for ampm in parser.parserinfo.AMPM] + AMPM[0].append('上午') + AMPM[1].append('下午') + AMPM = [tuple(ampm) for ampm in AMPM] + + def __init__(self, dayfirst=False, yearfirst=False): + super().__init__(dayfirst, yearfirst) + + +def utctimestamp(): + """ + @return: utc时间戳 + """ + return int(datetime.datetime.utcnow().timestamp()) + + +def timestamp2datetime(ts: float): + return datetime.datetime.fromtimestamp(ts) + + +def timestamp2str(ts: float, fmt: str = '%F %H:%M:%S'): + """ + @param ts: timestamp + @param fmt: format + """ + return datetime.datetime.strftime(timestamp2datetime(ts), fmt) + + +cnparser = cnparserinfo() + + +def str2datetime(datetime_str: str, fmt: str = None): + if fmt: + return datetime.datetime.strptime(datetime_str, fmt) + return parser.parse(datetime_str, cnparser) + + +def int2date(date_int: int): + return str2datetime(str(date_int), '%Y%m%d') + + +def date2int(a: typing.Union[datetime.datetime, datetime.date]): + return int(a.strftime('%Y%m%d')) diff --git a/lib/analysis_package/utils/file_util.py b/lib/analysis_package/utils/file_util.py new file mode 100644 index 0000000..c24ee11 --- /dev/null +++ b/lib/analysis_package/utils/file_util.py @@ -0,0 +1,81 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> file_util +@IDE :PyCharm +@Author :rengengchen +@Date :2022/5/10 17:21 +@Desc : +""" +import os +import queue +import shutil + +import paramiko + + +def list_files(dir_paths): + files = [] + for root, dir_path, filepath in walk(dir_paths): + if filepath: + files.append(os.path.join(root, filepath)) + return files + + +def walk(dir_paths): + dir_queue = queue.Queue() + if isinstance(dir_paths, str): + dir_paths = [dir_paths] + for dir_path in dir_paths: + dir_queue.put(dir_path) + while not dir_queue.empty(): + dirname = dir_queue.get() + for root, dirs, files in os.walk(dirname): + for dirname in dirs: + dir_queue.put(os.path.join(root, dirname)) + yield root, dirname, None + for filename in files: + yield root, None, filename + + +def copy(s, t): + if os.path.isfile(s): + shutil.copy(s, t) + else: + if not os.path.exists(t): + os.mkdir(t) + s = os.path.abspath(s) + t = os.path.abspath(t) + for root, dirname, filename in walk(s): + if dirname: + os.mkdir(os.path.join(t, dirname)) + else: + shutil.copy(os.path.join(root, filename), os.path.join(root.replace(s, t), filename)) + + +class RemoteFileUtil: + + def __init__(self, ip, username, password, port=22, local_dir=None, remote_dir=None): + tran = paramiko.Transport((ip, port)) + tran.connect(username=username, password=password) + self.sftp = paramiko.SFTPClient.from_transport(tran).getfo() + self.local_dir = local_dir + self.remote_dir = remote_dir + + def ls(self, remote_dir=None): + if remote_dir is None: + remote_dir = self.remote_dir + return self.sftp.listdir_attr(remote_dir) + + def upload_file(self, local_filepath=None, remote_filepath=None, filename=None): + if local_filepath is None: + local_filepath = os.path.join(self.local_dir, filename) + if remote_filepath is None: + remote_filepath = os.path.join(self.remote_dir, filename) + self.sftp.put(local_filepath, remote_filepath) + + def download_file(self, local_filepath=None, remote_filepath=None, filename=None): + if local_filepath is None: + local_filepath = os.path.join(self.local_dir, filename) + if remote_filepath is None: + remote_filepath = os.path.join(self.remote_dir, filename) + self.sftp.get(remote_filepath, local_filepath) diff --git a/lib/analysis_package/utils/pd_util.py b/lib/analysis_package/utils/pd_util.py new file mode 100644 index 0000000..b67a7c5 --- /dev/null +++ b/lib/analysis_package/utils/pd_util.py @@ -0,0 +1,82 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> pd_util +@IDE :PyCharm +@Author :rengengchen +@Date :2022/7/13 11:00 +@Desc : +""" +from __future__ import annotations + +import os +from functools import partial +from multiprocessing import Pool +from typing import Hashable, Callable + +import pandas as pd +from pandas._typing import CompressionOptions, FilePath, StorageOptions, WriteBuffer +from pandas.core.generic import bool_t + + +class to_same_csv: + + def __init__(self, + path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + sep: str = ",", + na_rep: str = "", + float_format: str | None = None, + columns: pd.Sequence[Hashable] | None = None, + header: bool_t | list[str] = True, + index: bool_t = False, + index_label: pd.IndexLabel | None = None, + mode: str = "w", + encoding: str = 'utf8', + compression: CompressionOptions = "infer", + quoting: int | None = None, + quotechar: str = '"', + line_terminator: str | None = None, + chunksize: int | None = None, + date_format: str | None = None, + doublequote: bool_t = True, + escapechar: str | None = None, + decimal: str = ".", + errors: str = "strict", + storage_options: StorageOptions = None, + prepare: Callable = None): + self.not_first = False + self.mode = mode + if self.mode == 'a' and isinstance(path_or_buf, str) and os.path.exists(path_or_buf): + header = False + self.header = header + self.prepare = prepare + self.kwargs = {'path_or_buf': path_or_buf, + 'sep': sep, + 'na_rep': na_rep, + 'float_format': float_format, + 'columns': columns, + 'index': index, + 'index_label': index_label, + 'encoding': encoding, + 'compression': compression, + 'quoting': quoting, + 'quotechar': quotechar, + 'line_terminator': line_terminator, + 'chunksize': chunksize, + 'date_format': date_format, + 'doublequote': doublequote, + 'escapechar': escapechar, + 'decimal': decimal, + 'errors': errors, + 'storage_options': storage_options} + + def __call__(self, df_or_series: pd.Series | pd.DataFrame): + if self.not_first: + df_or_series.to_csv(mode=self.mode, header=self.header, **self.kwargs) + else: + if self.prepare: + result = self.prepare(df_or_series) + if result: + df_or_series = result + df_or_series.to_csv(mode=self.mode, header=self.header, **self.kwargs) + self.mode = 'a' + self.header = False diff --git a/lib/analysis_package/utils/phone_util.py b/lib/analysis_package/utils/phone_util.py new file mode 100644 index 0000000..3b11992 --- /dev/null +++ b/lib/analysis_package/utils/phone_util.py @@ -0,0 +1,17 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :IoD_data_analysis_tool -> phone_util +@IDE :PyCharm +@Author :rengengchen +@Date :2022/5/17 15:59 +@Desc : +""" +import re + +re_phone = re.compile(r'^(?:(?:13[0-9])' + r'|(?:14(?:0|[5-7]|9))' + r'|(?:15(?:[0-3]|[5-9]))' + r'|(?:16(?:2|[5-7]))' + r'|(?:17[0-8])' + r'|(?:18[0-9])' + r'|(?:19(?:[0-3]|[5-9])))\d{8}$') diff --git a/lib/analysis_package/utils/project_util.py b/lib/analysis_package/utils/project_util.py new file mode 100644 index 0000000..1edc69d --- /dev/null +++ b/lib/analysis_package/utils/project_util.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Project :IoD_data_analysis_tool +@File :project_util.py +@IDE :PyCharm +@Author :rengengchen +@Time :2022/9/15 9:45 +""" +import compileall +import os +import re +import shutil +from os.path import join + +from lib.analysis_package.utils.file_util import walk + +re_pyc = re.compile(r'cpython-\d+\.') + + +def compile_project(source, target=None): + """ + 编译项目为pyc文件到指定目录 + @param source: 项目路径 + @param target: 编译文件存放路径 + """ + source = os.path.abspath(source) + if target is None: + target = source + else: + target = os.path.abspath(target) + compileall.compile_dir(source) + pycache_paths = set() + if target == source: + for root, dirname, filename in walk(source): + if root[-11:] == '__pycache__': + pycache_paths.add(root) + shutil.move(join(root, filename), join(root, '../', re_pyc.sub('', filename))) + if filename and filename.endswith('py'): + os.remove(join(root, filename)) + else: + if target is None: + target = join(source, 'dist') + len_t = len(target) + for root, dirname, filename in walk(source): + t_root = root.replace(source, target) + if target == root[:len_t]: + continue + if dirname and dirname != '__pycache__': + t_root = join(t_root, dirname) + if not os.path.exists(t_root) and join(source, dirname) != target: + os.makedirs(t_root) + elif filename and not filename.endswith('py'): + if root[-11:] == '__pycache__': + pycache_paths.add(root) + t_root = t_root[:-11] + shutil.move(join(root, filename), join(t_root, re_pyc.sub('', filename))) + else: + shutil.copyfile(join(root, filename), join(t_root, filename)) + for p in pycache_paths: + os.rmdir(p) diff --git a/lib/dist/analysis_package-0.1.3-py3-none-any.whl b/lib/dist/analysis_package-0.1.3-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..49d16c9eed1963e4e4530ad4f0b17f469aec7e06 GIT binary patch literal 24892 zcmdSAV{~oZwk?`uC0Vg;+sTS;+qP}nwy|Q{wr$%sS8($k?eCqp&)x6-yKk1#XPM(q zpQDY@P+M)a%S!wJMgjl;00RIN`=PWrl4M)_eKq{Ou)dd`wVtJ$gPDVlt)78}o{15y zj*gkNnWK&ljjh`+X$gn`I;0KAtzBeP{@$UrpuI79$`%K`W&Fk8v2X)2Q);xci;)XI^(PM`D$Q)%T>l|K{U64#V6TeZ$E31mN&TH4D}p; zP$)@j7^4}*Lp!4|{t}1wE#im5x|tCyjqQ_lPo)BCWa#cIQhJ(W^r^vpARQY zS~66!BUSbII2~XFO7TgFpLqw73!J}OB=o@-90`9A5_&m!A%X}nQW_w}Dw^9d2tu4u z0Q|!^6zMX>4UkJuLofhrn>$D4s1= zS)|}&pg4dGs_!=`yccDsh4s0xidwud>-V43f$#M1>T@5|=qfNjV5mYsCto2!?w3cd zK$aSp1uM)`d-knDGx~$k1aLC`7G!3r^z-KmPop_Ta+G963Z2l+HdRoBd!_l(YPvNH zOd@rD_=(Lge`@Sk044n&v>28YWooFUQC1iLWb@{^C1`EX?g(0?#~sL1t%74CBnv2Y zcRK+7!vl$T)ItF=j((I1-TV!AthwQZ((t{hZ_siV0EY{u3h()iEi2q>)>=MCSJjOy z%P}_8LjW(N`zlmrZRV&EKp^RWajZ5^ALI&rO1!OR`AEBOO_it;epJMk2Ix>E+W5)h zrYO)+bvpYso}>Dop(i2!!hKE6*{lxF8qZA{QBo$KMfDGJUN@DDEazQoF3A{I^;XAM zoL{Hh7Co4F0*xz#8EC0^G=hI7b+R&KSZkYq-dA?xvi^*i%<7bFLCY4pNG}`*mvbob zYPhK2)?XO2>}B(WLTb?j&po|T3jr|g&%+JryzCgeb6D4@Z-?x09 zS<{{xK}=hEu4ZKW^6J>@RLLBygcg=?j2C(W+^M@xyBO?q5O; zkht~8_<(9i;j0Vlihl(Lq-bX=3(1gD=$7$@^XFi;2L2MS%M)@8O(MvaD~%Of!S81# zuXSU9fU+doj|Uo9QhB(jBtqo^2`hg}UcD2u$8p$eI!1A;I&*NG;(B_LCfgSl=))mTA- zO4C3N9vm4nhYgTCjNA_84ImcWObpwmKseu97Ld7z9OxoA(3hcrFFip zlp&GI4gbN#XHb&JE=RW7EQh+wRtu)7A!>!_gH3q^mKTb{6(E z&B5VbyM>!;^eKPT!@Po*XOPwt*qUINGkY-K6pnJ!MC&1q9OWZMJcJEMgOxTWlggyIH}Y{h7GJW<3m=^4A{)0hQ>Jem$~hi5 zrkjt!Zdko*_fJ{|0}5wwQ<@69jQ;h_ujI+?Kh=I58Tz{?EzK7+lT9qo2;S1!#l7S+ zB6G(Brf+AWfuhvP8SEx%Cxne<(jp`bNl;LgBp!ZtE4VZ+#tg#w@dC!t=(NWcAP4NU zmF+Q>+&Z7|T2o3441{2uM#75*ln#4#J3_a9$d*WyY&HUDNK=S*p&{C7NLv?U{M-EUB2+?_2l!Pvlt2y$A?27!(Rpt zPj-5I0RI76oJ7HtO8Z&{i@w3i!1to~r=Z2g&`8J8$ja988=Czcll_Y3`wM^yKKr}8 zENb-@fdbnZl_GDn)||(h&mc(=4e?ja)jvpg^7}-5Oq@;9`Sf9{p$b^b!{_4Aad4O( zdf*HG0aM=;TQsq?8YBBa*oO4)dA?gaX*Z?r(3uuTt=Layta>>?X{9TF3{8`d2D+i^ zPgR^_a8AXB=qzjLIa<8pv}MHStbAF2{|#Rz(iqS~?M7JagUBT>E+5Z}Wdgo=siBjBk-g6M z<8W}YGP3^$s)Z_i5tuAUU6}8<3!4H%mCMo1!2YFr{Unf(Rx9zBrHOnv<6EawqKIM% zy5hMZ4bZ2#eoZL_4Jo`160rC+tNdS*om<&&s1%c;jPb&Vna-*!X~o6G*PYL&2^}9( zbS93hQMYir9(Wpei2m(aRyFrmFrgzowOBO>AS8*j!BG&<9*|8;;ZH=wp0z=u+8`{@WXW!5buTCZ zA7T#FSJz%g0qK`0g`b7h6PU~3CCw_S2r(GA)dE9m!CrEYGJ|an&7J(zLn9;eTnwS- zECfu?!#qpq}Pz)Pt+s}wg<|<5Cb`J7s9 zA5$w=W79DvV}q7Q5im8J$}S{40}GrGXYoq&3hx9&1zmV@6k3w`*l~WQ9X7H)93}Ek zLv<|SJjI$BQzNv7z+5_Gunm%Tx=DmYf{ED+l)}JtU!?w|I%UskydZK5p%#@q%pc@n zyiWW(@g)<`3my2=@$jD^})>(mvk9Jc14JR1l9AaK)RxJ#1PBb(lPO z0G#C<$%IH8&h3Z-4a%132)FSid=`|x2c|jj8o%V@df~fY+pW;dBA#^Apv;$NT+^Jg zIJS;D;W|f4hDAvEHI1~t(KMCvTMn{ZZ;GBsqe$xWqggE=QW*M9@*RV}cSJr~7{&%! zJva)XFV@R=4y*Y}cJxq&hw*}9{Ah$(mCLh@-KUe?L+KMw?d#=Sr`o6e{p@^Aq|D+% zPK8?AT46I|K<<@7F$7{Q@nJDxPztNal%ZRGQ?fg>yJHR$>`KN&im*$) zg(J6eJf}VPpKb^)p-ai$65x^Nc}}KxusxZmAuUc3_C7GWf|EP$qg277f zWvovTfB*o=KmLz1rK6sMg^r<_gQLBfzLTSwjrDh?9F*g;?xTb25E;X5vK1qX^%o3^ zC6G%Zuq$PqQBV>hvFsVFExlA6Q+Z4NEY&H&hHA*oY!5ron!a-7wAL01il2?Y1AFDT z5buQFcut=&bHYQW99t%IRhQ%tXy5mU67GKw`c`{i_mRiI?F0VMIIrx0h+y`}2i5HnPDwoDbY2`;^ zAJqokBx_}wHb;e!lq-Lmzq10WeGq1VEI-(9d*GprDWDxk7HWl~=+-i<=|fN_nad2V zY|0`u=0C5hnD1PlW)}!_h!<8%RPxx;Z&jl2h9p}#jila~F9Ijq60GeS+>se->b&>X zdyRK23|%9~*uk+~Jhixz=E3bkUh&7yHqqV_?3qq)MJZU}=dO*oA;g!!rjVQun7TO8 z`bV;$mZ*12#_^NMKgXlCtXYNWyo*_tbXyA6IBvSQv}Ti6-Yg74CeBIpR_R9dRY*j{ad_X#jE zF72Df-P(Na;d!Yr4i(7)coF(5eqne}*x*64tp4kMhN7?MAq(O2)3__OHH_2^ndZ(1 zz!>ow1V8P^**aUKpo*}63#DB6Zmr)B^h{;v9s@*cSt(FK#m`2KU#fnpx*25L7gL8+i{&|#o){%!A z%*#z4ha`Tfg5O)uBNapw_T+&3+(SC+_;8mozOlV3M5~t8+L(HDX>Hv#A@5aL)FAGn zYS_8h$Btu>WzNpIDoHf?K{Od4s$S#`$t^djAzyTARdr;>L9y>n6H(#l2|OOB7{1jY zEHW|R1sH=bPKLatX*To5XR)Q!Zh{z((iBcDp1yCy8RVmCIg&h>VUeJN0=fSRyI)=? z7gEtw;ufE2pf*`c!A{Ads$;H03e3 z>VkgOsjbn^x&+y^echMGFwGN&pLxz_gb<2+oQZ3{V{<%;>I-`Z^&f#(iDIBM;=~ee z0|5Ylj{QHj9RC$~1~%XD`@8e__gs3ZrW3O-3h$$1)-7NmtsmciyZXu#SLeUpdMN$d zprM@=IV>xF1I1fxm1q(ChX|$wZQHIwvbDi_7P!vt08_lL+LzL$}(H0?pv- z^UpQUd`dH<_KilBYa8R-28wzy6X~oQjl8yKCEok$A?bve=B0L(TUYPObMv73F;g;R zW8aq{wdoZxt@wlW!lgIUTgmj^ZUIk`Mjz3Ya%s|rAv^6#FsfQvgyK}crHD!-W^&=6 zxBr@T9X52CB~P+lV-MJ*=Ecvb9fvGRj9+7%K1~yk!D)iV?K4OnbQS3-;Hte!pYP}& z8lgs93wwyX!8U?4LEtMJe};-K94ZBSUh0gEk{LrIyOSe?m zOQ~lmZ0jnvrB0Se6U~Kq!i)IjO2-}-t6EdxFg;%hHo7HV-E1NEL+`LXdJ2`m2SLH# zrl?`eb&4f##Hf$iKf{W~T(Z*;M3+VVl%Em|rQN)ukI5pfwv`MgP!`IOE}V}L0VP7vI@TX_wQL~i60Rz@c}`>If+sD^_y@=v@4pZ=NL8W(YKnq6nJxv!zgn-d9Nsj zK^GzgAc5H={jSs?gBfOI0vapjEvN3+7RhS|_! zpc)V`D(~GM$;=42PSZxk0zkAe7F>T`)kOc##H(hJQAf{AWm&SwOL`XhKoRJ`!yq$0 z#q(AJMCI#8!upS9g%aaNlo%lzW*n)FQa)|T2a5M3N#>o{Vc7h z5S#evI5T)T=X6FmXGAXbRS$_5=W3s^d*nT6V>0GYg1h!vXq4)WF2vU3$f?iadJwY+ zDOFD~v`q=Bocm4cOR6@$+H`n)PAif!+Tl6ZNM)g zRJt=wr!fO697t-Q-)SC;+*cf$>I+6&*4wGkkaK{eB2X^yY9#vZ-lbP3FL$VVe|#Qd zd2G)dLx@4oZ%xuwUJ+KgpeLz4WaCe+CMZ_lO`dWOnn$sa|ERBmjK=_dEF5`^OKrSv z5L6=6U>Y{E@;AlRQ{0eTADrtCF_I&ZqmQE0H=5$L zY>zdMmC7>7pL&rpZA{$^##RyFP9B5}L|_wX`|i1er13nhc(!6!mJgZ;SqlVk;~I7Q z7Ow`GVD{k|a&jlyIU_BbiLjwZ)|7}n8`coltV)s6?hJP~`GWHtj zjWWS-)SQX$5x}f>L|l&Ee>!JVc{*%vPmr4nXv}Cb1qCvA;Sn65Y&gk?8OAuhTpw_w z0g+jYcpF_s3$^Ox+3zi!gwQIdYoQ&Se1&_)VRbcw+yxFFv2tlO(0iT&1uuiYX1Q6_ zA)Fa#rmD0JhpQp_qMW$Wr{y7QrU?1F#ZhP5BU-GmN; zAMs~*r1?2PB;!n4RU@Xu_11qe<~q8-mNRIz~#{#+u8iWIFAvmZN%+T@4|3Z zpW?>vVWcJE<}1xL^T7wj-|O6PD_#nI0ys5$9_br?e)$o43+rDnG@Sc?I$5Jb_T`w+ zplO9aX4{uLvw`79TvTrVFnn#7##J|FKrKm^Ai?Dh6KEp}x^*~tP43lTK%^GO|6#Zq z^ddu`3#LWw*JHtfv5C$GtvwIk!6(9d+qd@7mD}4bVTl(0D3*PqWCrmPohw|ve7yC5 z7*uGi)fcD%baQa%LArI9R6017{WGjRJlldh>n^u)9!0yS=i({eSd!JmqdSy2MRnBgn?;U^av!^n(Yq+X#xgu{7XK z_H~P#`|O!^$kxrrcO=XszL!@&T@9Ggw0`+={ut8)=dA+sm{f~gfjdRg zx+a^^bjB17J5*C*l#}g^yw19{M&<}65dV12YUa>3QuO#UU=!N6a&`k!FCW_+94X*W z+enMuh&;4w5F>WNHDi`G4MC_4ZoC`zGm86<1wyK(RL|sX8FL~F;j3=~GDXMzns@;6 znK#SPu9B47LBw(qJi;3A?h{fY`i#AtZGX#&oGXyloNU=5U7MU0aH>cWYc7>opxz zFK~z@(+bVvV!h0tLVXG>gZ}4ziM)x-IW2XnQThWKygS}ImS{WmGBcU3a%}Ukn3T6m z6;j(*CPV^K=5#vSrNxg@No1}lH9KUJ7--tv?-TfNw>Gm67yfdKpg;m2@}juD`tzpQ z=lzNH>wPk(>uo>h>#@B%dvS}6&%Y6{6~Vl@Z2c0{BP3z@Eq8Gvg6{0K{_Ahtv8B*~ zBGWsu^y_y|^&1obfbw7Ti~ljv*KxG5vHa!$ycDfu*6H9rwM)Ntl>*L`D=1Fg#9B($GRpOZL?fCG5jjn-@}x zsw`^gf*Nw+Age3s=G;T1Ykh4NLNsd6OYYCQ9`MNZ*F_OqH1G&ei7Sj!NY1xRibGxQ zLWYaJJl9dTcD24qi4iBXuJqr{f367SgM%RcF(>sp06H$%eKnLeE|H&n{NbRqx3 zy7;2ry8h*w8$ScW2x_evly1_@@k}*DXcGl#kanmRWSHZLI#%neSCl8=+M^Tg(m32h&AOj@qjv=LrosQJ6l0{R?4I#u?@yxaFF_>R zF16Dos812N=BkhV3XKY+EZQPCRfZ#GP^cZ-(Bm_6tw3uo+{Z_|3shxvX&D;W)4t(1 z067;QZ6y>8gX-We&WYF2QI>Aa5bASO;s}tY; z%ZGUR=>Sm&cMx0y{d$dio>MNn09YUeN$OMJ2~6kt;$tNR7U^a) zA3w=Jw|aw0X4Jn9g{uFs)0uT=b3>X25?N|XwUUd$=Ip7(KIGF^Bg2_nOL7Mht^B#| zIzmj34UYro<8RE`UjiAClp)yE@5pKVUjLMpu(db(kI4DIjd4NrB4h*+_(6&IBUbY- z^$*M$)!)*?U&igXFb0x-rv~0{IoCf;4}UYxP|s2CTgl~MW@762m+D7VLk3d>$y;QP zM{+bk$_NuQmeMF3$+DcAKZ+B> z+_noSF^^1MW>rZ6o*GlmdpLB>ZAY0NI5|U7Jq7(De_Wm1s((;$-~<5)3tetqHZO~z zd<*DTx-m<6oEk~ECBX|NWiMU=|02TG^*E+G_-q(MYGlXgwNe7fOboE1`@N*9p=q7(J`6w!)-_Ph}VXL;Zr5OW}W0>*DSv>R%6pJ-SHSk29{D#Oj5 zQNbk=Nnd%{YzPpIw=t2Bs4f7UnRHntQt`D@gv+EF?*# z>6Ch+L46@enIZ!eM#w%=nWr*%^KIt(?Cxr$x_7@K$Mb7D%u|_|L*r5L+DhqtwU`bN zl3Vbe(9zJqa50{>SsAa>=c_NGL>{wN15VvMthQRa4<~Gxa5k;1EBdbCsFRG~j_QQ@ zrxHx2t_X&k_*#q**h?msnswG0VS_K0VKbweLmJCERgtMVQmgz)2In}Jkv!hGzz%^f zg4VO3bgRTmYTE#nS(a_se56=dns^~tzI2_AYnTnHx1EzyG7DGx*Dkt~)=B5bfSXU5 z=o+C2a*sZ_{@gOeh#6eF#BLFnfHG4@<;k(gmg1R-XL1}ChI>I5NtPqwBBd~B#VriZ zYig4K234pL>qi$E_{~t=5G11`r8%w6HNfH_1GWs4(%_EeVCZt?FPeo7yI)1s0<`~ zIV$NE9{II2kx=3&Mr3|gLB{WwV<;8mG!x8EAgB+g{9bPEaE}thi4@`BGgQhMLgB=G zT?=79oU5uk=3NI6L+5m}9@*Kdu$>)}Zr-t4pK^@XiE}qCq#VD<@wD#)93^6ZcW`*e)mqn$Va|=#LT8Z0;w$rw3p6}p7$ESAr6YR}~GLFam&{h`?-`clc zv9=k7J;?&D3a;ge69UTo_JsoJd8oxiAQmB4-gk$@qS!ooNF#Wj{>qT*2f$}Na!-1u za6R5ONbBQ6eNa@GD>w6Kiezr}Y=KFSBGmWeNU?KgoflIC6dYxVfpY>Bv_p+XbP^~O zl0AFFg?Lc;ZAkhv9$S*l_)8bEPZ()`J#3j?S~az19?9{CGnQ1zgef&cN08JgSLLHD zXVfFt@#p-P(CQ0LGi$Fi$CY3qD9N&m@yHJHkHhtHQ-_Z3ZNXc(D_g z?6%v@gqk{d>GfUyFpw_k)vB9`-fhj(2**mh2lEZR`7>!H68)HSr9My2epeG<|8Iu- zoQj5C>GsY*6e1vET&oB>a!~3urM8LdEESy-OzqI!2}NgBJc z;oz=DV+{saAb|Y4g?C~{C_w=Gx1?+XZ<6^_pU2}3*0u%cW)|o_V(?pYveL6T;*8K- zc7A9m6B%qGG%EUKu&sk@@m^1Y2x8Cb2!;S$Pjns#H^D)q?-pbn1?G$k#?}R<&4Y3oHYiKvBTSL+ zmgdkD`}*7X0zo8F>HTod6#08hmH55L{}~+pTfMQivA5E*H2ZgwUrT0Iyq^xqi}nGR z=3XzOqA3|)K9y{#ck5|~*ONSj=$ydHT9l#2gXZRU830WirnZlBt(&{iY{kNQ< zy5#{M8c#a~?r&UGmuC*9%HgXgYY`G%;gU7fb@0Pmgv}eb>_azpnS08XiEuX*Rz0<8 z=u*S-QP!CfR`r`gH!LgYM%}c9%^;@}DyXsmBSS&itdX+oeKIVXrXM`L=sV>N5tg8t z^UNC%N`<9y6&jI^_)(3pY}qJwNK5R*;4voPkg1$vqC5MnK}g)&pX8CY9{HayUoVf= z-JL~t577T$s0V~1mD)YQ%agvLe(Lx5gydg5TN@`wOS6C1L?~WbuhAiW#0+q#UWgpd;B4T^q*mt;Y zybpVT{X>L6zaN3`=Gk(YgITb7=E#1zbl8bR3uiXhjn_;C$Ujn@!$YEvHKaHnYoA_$ zq>zh3K#wJN66iNk5XwuQC>S>S61?E{Tz*cRM6RzAqzz$42kz;o(f}O&l4{;AOb6zl zpOC*3`4XUKtFQk%kKK53vQRWk+o%Of&y1viBqkGl!L0ehL)c9cxtB5mdc%5;LPjiDDNW}ZoC^o2?-&BYcZE4xJtS`*Ij4A{Z565>F9##4$L~Wg7jez=Wn#4 z0yxI8tVcw`kE6p;t@dBX^8PeL&CVU*F}WKFN(wxRU{~DTDa|0Hf{skQREF{bjid>-c%#Lc z6e6BQM*mLSgQcQ#MU*G?k8-0f`SoM~`O! z>dWU~8RS!FV^c6-o?eMKN9Y=7&by21L}p4XIUA#r?M^jas;A9Z}&jj z{x}+LA2AWsZuY&-Vm>^Ql5+D)7#27?$Tz#f^h=|*yL-fpzauA5CEEH}Epm?CU5_tS z=^&*tY9o0IehL3&U_KvlF|D|8(N+9e&-RDlI^$6LuKkx=PH4p7AGbdNg@2ZuuHwukAob8X~=52eNec22UoqV`OH zX|uN3qt7@>$inxIebOKtRcx>2Cj}{+JvyWgl`|jLJw>9>_}IEW`B@x_Xbqfw|uS75ANYWUSLrC zcTSvX&KRR_4aM)ns2!A(_3-238+cVeUJVSlo~#|1tO`5Q4Ds%nNJbzMTvMVobf|IX z<(Kp_;(&l9h%Mdzq(SnD<$lnocVH%x1hIEQN?Q?N7jt+=(0D9-+pp9p*a9ZOKC`Nas5k^ap-FNGuZC^yA=>Y%mX;etE33) zy(;f_ZUSuc6Fghc*7h4;u;{j;>{EmloKduD3MwtSk*o&kjUcEk18c+kpRU)~Pk%zUt`lObwGZ7pK_%F79c7;fsv2}? z=V$Hbe%`tlE@Ttj>KY1t5^5a$SYx@H-zQ`}s8&mcHZ5NF#-F@*Q=hmeE}%^Au49tA zGZHu8RRQI}W%xmLw^-J~FJWNgxXq@|OSu(bqZi?iJZ>nrLGdb7s$26*)ABUigx(M8 zVV4%f`r&NvGPsMw%WY?KJsaku>z-umYT-ov^KWJ5FOT`oegNN@@ZUak-{1eQBPoCL z7(Hv7?=s`2V`$`PWbhv&yC46pP5Nu2|F8QQ+Z)+A8Cg5({O6y3_e%e}{pkP2{W%!v zIoNz3srldDpk?(_gVy&Q=0F1g(EO_n9sg}Isb}x@t&nv1tChP{uC@MVdNx5$aKU64 zdft8jIi9i@qM}7WfQ2UF&4q1{8w8_dL;TQEz!M6_}i`o)KA4Y}mAVa+JnrvA-NvSRV5W{#vW8Ta@GV1Y`6}5^?C$81hk~bQ33#?uIm2`H6#fF z(FE@Vf1UFP=K#rI*rG8lmz2U10fMJds+g`l@NwP!wmm?GJj8fK`!X@w_q9Yh!L-K>Sk6aYs;jUrZlCEj3~gKUr6NipYgi6s(%Q-0Yp z#anreaJ%kX|35 zrmx8^3X>uWISjGcFRKk4GqjIg9!ly$)q390K0Ucka;j?Z+`;Y_oXF;=kWOGuy#w_? zO0t5@?V+S#E-0>(mBx`36^RBu7`rfAKYK?I$w=RvK!h9TJH;B${Xiz2VW#mAHP^ z*3HM<{#vhQ{^|g=VT!BHsh1+s+0UOJR6osPqwlNN3VO(OnRCl%3dIYOr^-@0aKlx8 zXqlnzDS%^S5Nq_a^`V@juI(--0+x2U;;fgTDmje_ee0 zqL-4E&6+V%*EjQ(87IYSBERlQXmqKFLe8Q<%U+3KFalJ33yg+tqNPX>q;$Sig)X)j zS4)JQFlU-0RmVI=`k?=1*f*qkNqf<8{L2hzHWtKtYS%&f1<`^Qp^dPZLUbDn}F|+hs2!%0W z3O^}4jPVrKS7ckWg}xY#xS^BBX6h8Q9^L#EM*EemG4iQFzipz9Y+NN@rI1y$Hj#`W zM}5*Ehf<7E$*~{!M}#X^aC(MVKT_Ukhj!WTKHuH1hlIY)75yD`aq)!$Ln)MiB4k}k2PCjH0^fLKG#`}nOSX{+>O$h&VP=3ZDLv}mE@ob zffLWwp%yb)#`3Y;DgvUm?j_=RK)BCu)vsS*a{#=>NID+NKhD^8>H42^Xz2rO1K&M7 zrtX&m^8;}`4>`@$qNt7I!{$z%D^Q*KGVKp@KO)Ht^RO~2VA`lXu1RmeHRo^kVG;SR zZ{j+AdJy^qeRHRpq5CxFxyMN+es#K72Q>w$jL)@>V?Q-Tvt_klEm_y#xH+a+$nJ?&unfu2$`@ z8j3g>jHfZ%`59Dh6e{C|T6 zQ^8BX0YpH7QbFRMO$2~sIhC0s&wW6@8?8Xd|DoCX_l)>G3^4k)`uv|cQCZ94dm7+{ zdcp$*qwf+?VY(bm2q~{`j#(^Jice3BhL2*#yyo1!f*p5OiA$8Uo;tU;RA?uLtDcT< zmneI&mt#;ls_?~seKBz{WlqXGgtO@})p>lT>HLJD?rnn(-5-a^d6C!oXMG$SqjWhr zIr*${z1{AG90#G{oqNrhD-!PQ`hA&W_ej?e(L?uV6N}V_A#dMIKHA=!E59aesta>7 zT)?$7588SyY;6=cdI){gAge1_cRxK0UY8l7_L{lI(&OU!5(dNiW|NQuD|#K)^1+)N z8X_(}mA6Ox_|b$HAt>x}zVV^z`f9(T2EnuB;qS3k zE!OdMwEFCabZ0y`lllt%V1Jh6>+VJ$&WrU-nkH_7CcLGCN#pOd3GEfH^# zyS#ZxIsy1TK~X9GvTq~(8^+EYp2JOzoZpHtVWeN>g`{<0WLfrlzx~+-V)}y)&tS^X zq;WfPiB^TY75b{rTdtSngZAJ^b=8zE*?^jOiDHp!X9by9xe4{+U`2`8xv^k3)WC+U zwyso{V7Q9B)`GZG$KgVX_{q5RN+SA_tQNcVYSjj-y=tIp+zGZDNxNauxMAFMw8N#v zga*$3r&M9kpmP}u)i$Z{`YAFc+Y=iG%$X6eBhRszL`mWBR}Z5C=Z-0Y;kw)aj&D7w z{SXDi!>M^wTA{b&>JE0maw`E240ZHr>liI3ge<-08jk)fKxly`G1 zHIg#sJu?(%9}EW6Mh|g-U$*YFSzjTO(*`SkbCVTquN2duGZhHbDS`wEe6&-3k_wz= zm{AQ!5_5Vie&=sQAo+(e5OC}}ZpBdfpeAN8)=O&VtsdHHPfp58ftNJW}b|vJ(sU8(i6a z%Vr7x=M-sdX8B*x>QZUedj6Z{%~|lN8Itgyvn&tfO$z75OnCAKwUhx9BC%7kiN>}R zDeUfRzL9wc>ax3Fm4?{MDl^YIcev(y-mo}ESLfErlUGdSshIuPe7r+eMIrq@qZN_B z(k#yzFQ0#C;V6a92lnyyx-YDWjJ(v+gH<1pS2n{>*JBA?Wctf!=<(Do7BjGFgj{>ygBbt20b_e>GxHCk*~){Z@1q9XjA-n9 zlF*wzpMc&8+==)?$cN$v+VQ;{0^;Mb9ovB9?*hNW>is~yc_4J}St_5x+)@5uOCqAO z`^L4+h7j%b*g*$DmHf=pB2Ty~skqM0HB@wZudm)OnXmcac;@jt zP{|Z{e8!`9G!ZP;S+WhtxFm2FPyDDe*pE!eK{OI8%%j>82(1S ziQ@y4$R_93m}n&1X*AXjje0Oidn*xi|Iv+8{LhsI=Q=}G@`tzue#92XYpJ|nmb}`g z4sx>6PI5%yHCDl<3AxH~+Sx;_*FhB%V;f!>ixdKqiK)m%T~XLrt3gk}vfR}4EYx-^ z#@;5`$iU(VqCd)I@q96%Sy`?_2kcfPM=du9u%HFFRWvpOU%9omH?a-`V@1gfM4r;LgSrt50x&8S0S*^U63PW<4Y2g+_lG`V@Ph+q;tY z1_d-GkM6_;BdrI6Ya&09xoBE-UIo!#*%~?pFsm%EPQ>*Q%oua7?<)g4a20RqFbtNo zDh3GFw}R^JUg0R@kTD^&B6xi29Qh=pZ%xScsIu%oWBwz)>M@v%K_Mtt0@{WrJ+M3{;f`~Rj&)q{aHf>^qA~y zvqx}w3Fn+xL>gbXyFbg*3Qm%Y#^^@zVH*9=jpE=T^B`~8 zO3tMX_hzvvFA^zuvpy4+C7vxlt}V}|*E{%ZtfEiPT2PW_27dBYN_=FKlp1Eg6L0bq z*APew>n7}pCtaCvK&lz=@v9BbMAJca7_8V3@ii6u?&QmnB74J3c}-H`;hvfzfoRjz z>L39b?FRyFl^4uL4Tqq5b<%Db}wQ5s>8_IRRjbV}@`pX7N8YgfE8Y+tX z)|R9}s%KJZ(_JEDan7WWL!KjQ5x;kM(e#tt6{xRy#_Ix3_LJ^YB-FMw`1p-_P+8@k z&@!=8G^rMgNHVd>8?FH5x!D zK$ZrjQ>D8@L|95v2?=RLkd|CJ1ZiaHQb3e$L|Wln?j7&0>%zU?VRnFF=J%dC?4JKQ z@B6$@a^dpie$%@^#`XIHopZ2c*1(``|7)|hYL3eHvumq!PMOvd_;Iehc25JzT3(s( zFJ>}Tp0kvo(Blzh+SOk-x%QUe7;o3u4-fkolEI@sJ)Cog7`Umgiyjaj(`PRj_w)sY zLK(Zrx}kPKi~tbFP_(>G_A6E(TSFA+j>Gx*&$Qw{-%AKAQ51dbz+0IyewLu%RH#@p zB;rtUmwUz_4+r}7>KcZCv&{$_si-Wf6ldNS4i~k`By59D}re&^5YCNW}}Dk0=H(+_EJy z#xi;DZ$xNhm7KtA@K#{DHih4Oin`rG3Xhc9DZ(XBk$(%qA3^#Zck|u2Pr-95@sS<0 z`V6)Bp-qDD%sw{3s8-s>J30YWs#dIKnHn5sL)1Ev8l|5S%EHqPYR#)?Yx(ch#d2s? z2?Q#2R6AFZmagp=KWPD}zDZyw>Tq-;AT)?e+dmK?#iF#S4J{!Jr^wXep_aCs&1YnB zXc00f+jv$!J80FnGztjnU{H&v8XF5{dg!T6s`5fupO1`s@u#P)eZ^NGc>qhRP0JwU zSgEPx*4sBmZpK>U50BSBPmp@ks2q9v>SH06R*7I265TiSvskby$uaE3bjEIAZfEW+*~ZQ$6inJO`b=~p|x=iz&y`kgKRkwq`NDH|SxR*@`;%i*5x zY6aa;s3v>fWwjL^PI@uuGrC~@kvF(57E*rcUpf{2ad3QwJsNQUXf4$1L?pB~HZO9} z72IT+b9}g$rfd?#yo)W)IdHe*6|Ja2DmZCKk=C?6b`1dJ*GvfQGR|~ydr1ir4GT~Z zcc8=s68ip-&3??TU9avns8-+%yS99J&$meGRKb43KB30PElIR2{*h-{8%>$I5=DG} z1Wz+2^-6lWv90KL*~>UrGP0S0$}APdt5cJ%fXm>a5NP#F^AY2mtoP!N@|8%~JOLJB zVtJ0BUwmktIN7k+4?r{U_hRyph)eVEPLa*#=20(e%4)n-l#G_chM~4pMfwa{UFOK!R4+AYoD$CJF z38SYH6Q&Djd43|M%`}0=x8{c#@_n)da6nvpD`u--5{$5cS@?tpcdg9rlrdZ|Mbbyj z_^n41$m17FmW@{i9h0)uS*kg&@GlGktO%?=lICvhna|AA5rsZ>X{6uP5H0I0{^Cmw z+R;_q8N@iwk-#I2sO}DI{#GTI&pq8Z&44%AGQ8J-$ILN44QR&q4aHn4?hjJRTF-jf z|E)sJLF6vUTrlf&pN}G}pK0KS*>s6h92fVP1-mcUq0P_Vgx3}a~6vaAkp0#wJSUD4;n_h9Ut&Iu~EM< zLql*Wed@ci4k5oxdnrFH6|cW;OA=d6ze<7Y^#{Cr%!5mru}+=CD%8iZM2sp$%#4De zZe}xP^ei_kC(DmgXiCamiJ-kjJ(LM^M_V9YgK* z`?0#bLyS^lOVf?KEYjr4{g@vWg#I9cMtco^EKV z8rLB2XGi4^@<2I22WN?ztXv0xBbXCAN41x$mlx8@+c$&P3FYkD1`HFj0_8q*D3qwN zUT0IB#RDl+sIg}A4S(o@$`2`a=goYFWWd%5V6!sZ4u|%{9}I3cJ_wWCaxyd z9M0~}^4;>?Ie;F(Gfr8B9wi0kS!6tehYNKGOv6G2JJzBq_y2)clfJF2CRG>P2FZ{k zk)B`I{Y*~Fy@1}%7`73FiyuuAOreza)~<8=uKs4O=7n6d(b5i5Viw%atEoDp2QC*) z-Vr|D^wMhT*%e#DwTSjfd+B{YfOk?|C!w-jroX}{?LnNHnkWSx+WMrwsXvo0t{0mv zPLG2(Rw{o+8fHZJAkxn1KUMsoy zIN+zj@$Bf^L^ceYLth3$Qfn@=h2#P)N{ZAHy5*wtYLWpi6vCgaHMFq6Y*XC+>6$AT zc_H8_@T(rP-hNA?WO-n2rGPQSFMM153W>N=>!)q9D?t;ccB%OZ_fv@lYFd5>O5Tf6 zo#IOi%R1(@aphcN?A_aHfpCg%`A!t_120#)(7O{cpL^huu=d8NedtqtYrJcjctgYU zHFXn4e}=K+;Ez}~b8SVd)Ra3{@;fPPCz9|-IHMOB-Wp3@o6}?}3#qHi67MPjm6|-JG{s&49Ol{`E3g~^k5?zRC`P2PSKE$qo0Q6NG)C<>Av5+q01+rCB}%oQ z-gA(O5BtJ7mbkoax}Vtz6k>_Hc@iuRZGE~%5qJtaw;md?jyPt6){HHEq1_|e5!zI# z&f1|xWaWJM-i%MWpHHc)A2k)twB43-3n^J@&#~7W2{rRq>6P+9H+|EtZSww_I@7YM zjJ>G^l?&z*b53;$mrtGxhwLIH3H_CJ?(fD_=Ly0k^&y3EEM?lIvMRKO1#@hl8)BaO zam3>SYp+eKdh`q-+JSNiIaXP?wi)bhxTL&R3<}Y z5=7nw+cG_67L_=OJJk%Bt3A7e!s`k5l$4EEsbAI23mq7<)T|Px7foU2)^|;FEvWsyW6wtCw94u&ve}3c*|_EIDrC>|C-&Kb-^>!fyB_5pw{C>4(<@%2g%Rq_?7!ED z<7BieauB$dR;1Jy^SZ)yHCuR~%3*ztHd`=`@+-OHH)%1}ys^}d=Mvbjlz++pBzToc3!+nq(0Oc0G64UZh*QE~5$#q@#q?c*KUeo@**%8O?|2o#S zh@yMswitoV1_rA$;hHMPMMYb)$jrJC6)*g^7(>oC32gQk*=>{73NTBhWupvaBZSJf zcDfw4fL|I?Y6W7GtK00GQ=h3*8dNWrK5`e?l^m~lH*V!?8O$DRP84aZy7fdFQ2AZo zE8UEgmM%$E&9^JVOD2w+a(&aJeGOE`p5bU?NmM&LEaawbEhJFS4h=@H-~YDQrTik1 z?TM~C3)>~zSxD^)&7t{oeMf@fD{pV|>%REuELVp|f5YsuLk0CiBb*^}JbOOjlGedTeNAKD(ZMHQw%q3672M7Kfhk>ezI zq;9dePpzvRXKU|cZn}$X;DCzoB{pvh<#bj+Z@KHpoBkk$y6gfJ@2?qSaOY85ynkH@ z*ibOZET-YH_c7FV<$~h$yy2M-+WA|pGx7@36LaQk!{{s~*Tc01zQ=-N5OSEIl z!&{N`=UVF$eIL;6VefBlYhnbS|BibWetG?lY3q zI7J~!!Dcnr^F@{A{KqY|ZU>heg{&+3m0iA|TT*fIT0T@U(ln&T7W{B1l)KZp9l?}) zVbM3_o|gZmrn<|=%#-Tnseb4)bQ7aL{ck8=Bi`V!#=61tGBUjlcSM!bSTczVYD3Hy zSl>w{;ymy1=z^bMP`!TM!Fh?J1DJXX9fJ(*bc0yfW8<#@0iS&PC6hsA7mG+jcE31L z5BwSs@HFhlKS&66rHHK=h}09c5V8d-oLUb%y_`u!h=5>-iWmUd!0W`W?AL&RonWca zKLea*UUnAu#2D$hMiTx{UM0$_)Y z|05VGPaDKw$Zw{TbH=X$0o%Y%Hz5sJ!t z0uG%cKO6eH69pm{RdXPmn@oQ;7pYYc5q$EALhhM@gQsAri9c2WLG*pf(w;z(dzIkO zT9|a>EGSav65?!8iPGU<0N`x!ZxVGxEGj)V92*2Y8w*!wBQj6c09gtPXWobD9RFAY z1Toba3wr`VR5!w*(L85EQE4ZUr+hm7>%Y)ZDJjpQpC~DR4G5SQ=4O4?f}qq@BIA*D zm2kZOzv2H-St7DgN#WpZAZ#-~YwC!?I7BKcMH8G_djkda?`+oZ`X)pqDyah;xg&Zi z@|5=*Dv1G{*K_k!9IS_q-({6II#<&b*}bC-aY+_^%`mG8Q?b0mrH;{|D?}`3+^w%Naq`>$06EIp ACIA2c literal 0 HcmV?d00001 diff --git a/lib/package_project.py b/lib/package_project.py new file mode 100644 index 0000000..2fb4907 --- /dev/null +++ b/lib/package_project.py @@ -0,0 +1,14 @@ +# -*- coding: UTF-8 -*- +""" +@Project -> File :scrapyproject -> package_project +@IDE :PyCharm +@Author :rengengchen +@Date :2021/5/12 10:46 +@Desc : +""" +import shutil +import subprocess + +subprocess.call('python setup.py bdist_wheel') +shutil.rmtree(r'build') +shutil.rmtree(r'analysis_package.egg-info') diff --git a/lib/setup.py b/lib/setup.py new file mode 100644 index 0000000..5d7a00c --- /dev/null +++ b/lib/setup.py @@ -0,0 +1,36 @@ +# coding:utf-8 +from setuptools import setup, find_packages + +PACKAGE = "analysis_package" +NAME = "analysis_package" +DESCRIPTION = "general analysis function" +AUTHOR = "iod" +AUTHOR_EMAIL = "rengengchen@sics.ac.cn" +URL = "" +VERSION = '0.1.3' + +setup( + name=NAME, + version=VERSION, + description=DESCRIPTION, + author=AUTHOR, + author_email=AUTHOR_EMAIL, + license="BSD", + url=URL, + include_package_data=True, + packages=find_packages(), + classifiers=[ + 'Programming Language :: Python', + 'Operating System :: OS Independent', + ], + install_requires=[ + 'pandas', + 'scipy', + 'numpy', + 'matplotlib', + 'seaborn', + 'tqdm', + 'scikit-learn', + ], + zip_safe=False, +)