39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
import os
|
|
|
|
import numpy as np
|
|
import seaborn as sns
|
|
import matplotlib.pyplot as plt
|
|
from scipy.stats import spearmanr
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def Spearman_rank_test(data_frame, feature_a, feature_b, save_path, file_name, sample_size=4000):
|
|
"""
|
|
Spearman_correlation is to determine whether there is a
|
|
Monotonic component between two features, which can be apply
|
|
only for non_linear relationship and ordinal data
|
|
|
|
@param feature_a: Input first feature for Spearman's rank test
|
|
@param feature_b: Input second feature for Spearman's rank test
|
|
@param sample_size: Choose a sample for representing the population
|
|
@param:save_path: output path
|
|
@param:file_name: output name
|
|
|
|
"""
|
|
a = data_frame[feature_a].sample(n=sample_size, random_state=1)
|
|
b = data_frame[feature_b].sample(n=sample_size, random_state=1)
|
|
coef, p = spearmanr(a, b)
|
|
logger.info("Spearmans' correlation coefficient is:" + str(coef))
|
|
alpha = 0.05
|
|
plt.scatter(a, b)
|
|
plt.xlabel("Feature A")
|
|
plt.ylabel("Feature B")
|
|
plt.title("Spearman's Rank Test")
|
|
plt.savefig(os.path.join(save_path, file_name))
|
|
if p > alpha:
|
|
logger.info("Feature are uncorrelated(failed to reject H0) p=" + str(p))
|
|
else:
|
|
logger.info("Features have a monotonic relationship(reject H0) p=" + str(p))
|