import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import scatter_matrix
import seaborn as sns
from IPython.display import HTML
df_04 = pd.read_excel('korelace_teta Iva_vasek.xlsx', usecols='A:J', skiprows=[1], sheet_name='krev_2018-04-30',
nrows=30, index_col=0)
df_11 = pd.read_excel('korelace_teta Iva_vasek.xlsx', usecols='A:J', skiprows=[1], sheet_name='krev_2018-12-17',
nrows=30, index_col=0)
col_list = ['Ca', 'P', 'Mg', 'Zn', 'Cu', 'Se']
display(HTML(r'<h3> Duben </h3>'))
df_04
display(HTML(r'<h3> Listopad </h3>'))
df_11
df_04.loc[:, col_list].describe()
df_11.loc[:, col_list].describe()
res = df_04[col_list].corr(method='pearson')
res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1)
res = df_04[col_list].corr(method='spearman')
res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1)
display(HTML(r'<h3> Duben </h3>'))
g = sns.PairGrid(df_04.loc[:, col_list], diag_sharey=False, )
#g.map_upper(sns.scatterplot)
g.map_lower(sns.scatterplot)
#g.map_lower(sns.kdeplot)
g.map_diag(sns.distplot, kde=False, hist_kws={'ec': 'k'});
display(HTML(r'<h3> Listopad </h3>'))
g = sns.PairGrid(df_11.loc[:, col_list], diag_sharey=False)
#g.map_upper(sns.scatterplot)
g.map_lower(sns.scatterplot)
#g.map_lower(sns.kdeplot)
g.map_diag(sns.distplot, kde=False, hist_kws={'ec': 'k'});
char_04 = df_04.loc[:, col_list+['kat']].groupby('kat').describe().T
#char_04
char_11 = df_11.loc[:, col_list+['kat']].groupby('kat').describe().T
#char_11
pd.concat((char_11.add_suffix('_11'), char_04.add_suffix('_04')), axis=1)
kat_lst = ['kráva', 'prvotelka', 'jalovice']
for kat in kat_lst:
display(HTML(r'<h3> {} </h3>'.format(kat)))
res = df_04.loc[df_04.kat==kat, col_list].corr(method='pearson')
display(res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
kat_lst = ['kráva', 'prvotelka', 'jalovice']
for kat in kat_lst:
display(HTML(r'<h3> {} </h3>'.format(kat)))
res = df_04.loc[df_04.kat==kat, col_list].corr(method='spearman')
display(res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
kat_lst = ['kráva', 'prvotelka', 'jalovice']
for kat in kat_lst:
display(HTML(r'<h3> {} </h3>'.format(kat)))
res = df_11.loc[df_11.kat==kat, col_list].corr(method='pearson')
display(res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
kat_lst = ['kráva', 'prvotelka', 'jalovice']
for kat in kat_lst:
display(HTML(r'<h3> {} </h3>'.format(kat)))
res = df_11.loc[df_11.kat==kat, col_list].corr(method='spearman')
display(res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
g = sns.PairGrid(df_04.loc[:, col_list+['kat']], diag_sharey=False, hue='kat')
g.map_upper(sns.scatterplot)
#g.map_lower(sns.kdeplot)
g.map_diag(sns.distplot, kde=False, hist_kws={'ec': 'k'})
g = g.add_legend();
#sns.pairplot(df.loc[:, col_list+['kat']], hue="kat")
g = sns.PairGrid(df_11.loc[:, col_list+['kat']], diag_sharey=False, hue='kat')
g = g.add_legend()
g.map_upper(sns.scatterplot)
#g.map_lower(sns.kdeplot)
g.map_diag(sns.distplot, kde=False, hist_kws={'ec': 'k'})
g = g.add_legend();
df = pd.concat((df_11.add_suffix('_11'), df_04.add_suffix('_04')), axis=1)
df.rename(columns={'kat_11':'kat'}, inplace=True)
df
col_lst_04 = [col + '_04' for col in col_list]
col_lst_11 = [col + '_11' for col in col_list]
df.loc[:, col_lst_11 + col_lst_04].describe()
import numpy as np
def highlight_diag(data, color='#999'):
'''
highlight the diag values in a DataFrame
'''
attr = 'background-color: {}'.format(color)
# create a new dataframe of the same structure with default style value
df_style = data.replace(data, '')
# fill diagonal with highlight color
np.fill_diagonal(df_style.values, attr)
return df_style
import locale
locale.setlocale(locale.LC_ALL, 'cs_CZ.UTF-8')
pd.set_option("float_format", locale.currency)
pd.options.display.float_format = '{:n}'.format
res = df[col_lst_11 + col_lst_04].corr('pearson')
display(HTML(r'<h3> Všechno </h3>'))
display(HTML(res.style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
#display(res.style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None))
display(HTML(r'<h3> Listopad vs. duben </h3>'))
display(HTML(res.loc[col_lst_11, col_lst_04].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1)._repr_html_().replace('.', ',')))
display(HTML(r'<h3> Duben </h3>'))
display(HTML(res.loc[col_lst_04, col_lst_04].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
display(HTML(r'<h3> Listopad </h3>'))
display(HTML(res.loc[col_lst_11, col_lst_11].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
res = df[col_lst_11 + col_lst_04].corr('spearman')
display(HTML(r'<h3> Všechno </h3>'))
display(HTML(res.style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
#display(res.style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None))
display(HTML(r'<h3> Listopad vs. duben </h3>'))
display(HTML(res.loc[col_lst_11, col_lst_04].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1)._repr_html_().replace('.', ',')))
display(HTML(r'<h3> Duben </h3>'))
display(HTML(res.loc[col_lst_04, col_lst_04].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
display(HTML(r'<h3> Listopad </h3>'))
display(HTML(res.loc[col_lst_11, col_lst_11].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
plt.rcParams['font.family'] = 'Times New Roman'
df_sel = df[col_lst_11 + col_lst_04 + ['kat']]
g = sns.PairGrid(df_sel, diag_sharey=False, hue='kat', x_vars=col_lst_04, y_vars=col_lst_11)
g.fig.set_size_inches(17/2.54,17/2.54)
#for ax in g.ax.flatten():
# ax.set_xtics(rotation=90)
for ax in g.axes.flat:
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='center')
g.map(sns.scatterplot, s=50, linewidth=0)
g = g.add_legend();
plt.rcParams['font.size'] = 10
df_sel = df[col_lst_11 + col_lst_04 + ['kat']]
g = sns.PairGrid(df_sel, diag_sharey=False, hue='kat')
g.fig.set_size_inches(15/2.54,15/2.54)
#g.map_upper(sns.scatterplot, size=.5, s=1)
g.map_lower(sns.scatterplot, s=5, linewidth=0)
#g.map_lower(sns.kdeplot)
g.map_diag(sns.distplot, kde=False, hist_kws={'ec': 'k'})
g = g.add_legend(bbox_to_anchor=(0.9, 1.0), borderaxespad=0., frameon=True,
facecolor='white', title='Kategorie:', framealpha=1)
for ax in g.axes.flat:
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='center')
plt.savefig('body_10.png', dpi=300, bbox_inches='tight')
kat_lst = ['kráva', 'prvotelka', 'jalovice']
for kat in kat_lst:
display(HTML(r'<h3> {} </h3>'.format(kat)))
res = df.loc[df.kat==kat, col_lst_11 + col_lst_04].corr(method='pearson')
#display(res.loc[col_lst_04, col_lst_11].style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
#display(res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
display(HTML(res.style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
#display(res.style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None))
display(HTML(res.loc[col_lst_11, col_lst_04].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1)._repr_html_().replace('.', ',')))
display(HTML(res.loc[col_lst_04, col_lst_04].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
display(HTML(res.loc[col_lst_11, col_lst_11].style.format("{:.2f}").background_gradient(cmap='bwr', vmin=-1, vmax=1).apply(highlight_diag, axis=None)._repr_html_().replace('.', ',')))
kat_lst = ['kráva', 'prvotelka', 'jalovice']
for kat in kat_lst:
display(HTML(r'<h3> {} </h3>'.format(kat)))
res = df.loc[df.kat==kat, col_lst_11 + col_lst_04].corr(method='spearman')
display(res.loc[col_lst_11, col_lst_04].style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
#display(res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
from scipy.stats import f_oneway, shapiro, boxcox_normplot,\
probplot, normaltest, levene, bartlett, ttest_rel, kstest
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
plt.rcParams['font.size'] = 10
figsize = (15/2.54/2,15/2.54/3)
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
axes = df.boxplot(column=['{}_11'.format(col), '{}_04'.format(col)], by='kat', figsize=figsize,
return_type='axes', flierprops=dict(ms=3), showmeans=True, meanline=True,
meanprops=dict(color='r'))
for axid, ax in enumerate(axes):
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
ax.set_xlabel('Kategorie')
plt.suptitle('')
plt.tight_layout()
plt.savefig('boxplot_{}.png'.format(col), dpi=300)#, bbox_inches='tight')
plt.show()
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
for kat in kat_lst:
d_04 = df.loc[df.kat==kat, col + '_04']
d_11 = df.loc[df.kat==kat, col + '_11']
print(kat + ':')
print(' ', levene(d_04, d_11))
print(' ', bartlett(d_04, d_11))
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
lm = ols('{} ~ kat'.format(col), data=df_04[['kat', col]]).fit()
table = sm.stats.anova_lm(lm)
display(table)
print('Ca:')
m_comp = pairwise_tukeyhsd(endog=df_04['Ca'], groups=df_04['kat'], alpha=0.05)
print(m_comp)
print('Zn:')
m_comp = pairwise_tukeyhsd(endog=df_04['Zn'], groups=df_04['kat'], alpha=0.05)
print(m_comp)
print('Ca:')
m_comp = pairwise_tukeyhsd(endog=df_04['Ca'], groups=df_04['kat'], alpha=0.01)
print(m_comp)
print('Zn:')
m_comp = pairwise_tukeyhsd(endog=df_04['Zn'], groups=df_04['kat'], alpha=0.01)
print(m_comp)
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
lm = ols('{} ~ kat'.format(col), data=df_11[['kat', col]]).fit()
table = sm.stats.anova_lm(lm)
display(table)
print('P:')
m_comp = pairwise_tukeyhsd(endog=df_11['P'], groups=df_11['kat'], alpha=0.05)
print(m_comp)
print('P:')
m_comp = pairwise_tukeyhsd(endog=df_11['P'], groups=df_11['kat'], alpha=0.01)
print(m_comp)
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
fig, ax = plt.subplots(figsize=(5, 2))
df.boxplot(column=['{}_11'.format(col), '{}_04'.format(col)], ax=ax)
plt.show()
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
print(levene(df[col + '_04'], df[col + '_11']))
print(bartlett(df[col + '_04'], df[col + '_11']))
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
print(ttest_rel(df['{}_04'.format(col)], df['{}_11'.format(col)]))
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
print('Listopad:', kstest(df['{}_11'.format(col)], 'norm', args=(df['{}_11'.format(col)].mean(), df['{}_11'.format(col)].std())))
print('Duben:', kstest(df['{}_04'.format(col)], 'norm', args=(df['{}_04'.format(col)].mean(), df['{}_04'.format(col)].std())))
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 3))
probplot(df['{}_04'.format(col)], plot=ax[0], dist='norm')
probplot(df['{}_11'.format(col)], plot=ax[1], dist='norm')
plt.show()
for kat in kat_lst:
display(HTML(r'<h4> {} </h4>'.format(kat)))
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
d_11 = df.loc[df.kat==kat, '{}_11'.format(col)]
print('Listopad:', kstest(d_11, 'norm', args=(d_11.mean(), d_11.std())))
d_04 = df.loc[df.kat==kat, '{}_04'.format(col)]
print('Duben:', kstest(d_04, 'norm', args=(d_04.mean(), d_04.std())))
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 3))
probplot(d_04, plot=ax[0], dist='norm')
probplot(d_11, plot=ax[1], dist='norm')
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(dict(a=[1,3,3.5,4,5,8]))
df.describe()
fig, ax = plt.subplots()
df.boxplot(ax=ax)
ax.plot([0.9]*6, df, 'ko')
ax.plot([1.1], [df.mean()], 'ro')
ax.plot([1.1], [df.min()], 'go')
ax.plot([1.1], [df.max()], 'go')
ax.plot([1.1], df.quantile(.25), 'bo')
ax.plot([1.1], df.quantile(.75), 'bo')
d = (df.quantile(.75) - df.quantile(.25))*1.5
ax.plot([1.1], df.quantile(.25)-d, 'mo')
ax.plot([1.1], df.quantile(.75)+d, 'mo')
ax.plot([1.1], df.quantile(.5), 'ko')