import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import scatter_matrix
import seaborn as sns
from IPython.display import HTML
import numpy as np
data = pd.read_excel('Dotazník_Josef_Šajnar_statistika.xlsx', usecols='A:H', skiprows=[0], sheet_name='hot_analyza',
nrows=115, index_col=0, header=0)
col_list = ['Extraverze', 'Přívětivost', 'Svědomitost', 'Neuroticismus', 'Intelekt']
display(HTML(r'<h3> Faktory - HOT </h3>'))
data
Pohlaví | Věk | Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|---|---|
Respondent | |||||||
1 | Muž | 50-59 | 13 | 4 | 7 | 17 | 11 |
2 | Muž | 34-41 | 8 | 6 | 10 | 17 | 10 |
3 | Muž | 50-59 | 10 | 6 | 5 | 10 | 8 |
4 | Muž | 18-25 | 9 | 8 | 10 | 13 | 14 |
5 | Žena | 26-33 | 10 | 5 | 6 | 14 | 9 |
... | ... | ... | ... | ... | ... | ... | ... |
111 | Muž | 34-41 | 7 | 11 | 10 | 17 | 10 |
112 | Muž | 34-41 | 7 | 9 | 6 | 17 | 11 |
113 | Muž | 26-33 | 5 | 6 | 7 | 17 | 10 |
114 | Muž | 18-25 | 5 | 4 | 5 | 20 | 9 |
115 | Muž | 18-25 | 7 | 4 | 6 | 19 | 9 |
115 rows × 7 columns
data.loc[:, col_list].describe()
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
count | 115.000000 | 115.000000 | 115.000000 | 115.000000 | 115.000000 |
mean | 8.643478 | 8.739130 | 7.730435 | 15.486957 | 10.356522 |
std | 3.938544 | 4.155079 | 2.962697 | 3.527748 | 2.082472 |
min | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 7.000000 |
25% | 6.000000 | 6.000000 | 6.000000 | 14.000000 | 9.000000 |
50% | 8.000000 | 7.000000 | 7.000000 | 17.000000 | 10.000000 |
75% | 10.000000 | 10.500000 | 9.000000 | 18.000000 | 11.000000 |
max | 20.000000 | 20.000000 | 18.000000 | 20.000000 | 16.000000 |
res = data[col_list].corr(method='pearson')
res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1)
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.362512 | 0.111219 | -0.588429 | 0.481935 |
Přívětivost | 0.362512 | 1.000000 | 0.352661 | -0.561568 | 0.426486 |
Svědomitost | 0.111219 | 0.352661 | 1.000000 | -0.396063 | 0.064054 |
Neuroticismus | -0.588429 | -0.561568 | -0.396063 | 1.000000 | -0.456081 |
Intelekt | 0.481935 | 0.426486 | 0.064054 | -0.456081 | 1.000000 |
res = data[col_list].corr(method='spearman')
res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1)
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.356463 | 0.263819 | -0.552978 | 0.448460 |
Přívětivost | 0.356463 | 1.000000 | 0.308149 | -0.474260 | 0.326129 |
Svědomitost | 0.263819 | 0.308149 | 1.000000 | -0.467366 | 0.089152 |
Neuroticismus | -0.552978 | -0.474260 | -0.467366 | 1.000000 | -0.288355 |
Intelekt | 0.448460 | 0.326129 | 0.089152 | -0.288355 | 1.000000 |
plt.rcParams['font.size'] = 10
display(HTML(r'<h3> Faktory </h3>'))
g = sns.PairGrid(data.loc[:, col_list], diag_sharey=False, )
g.fig.set_size_inches(15/2.54,15/2.54)
#g.map_upper(sns.scatterplot, size=.5, s=1)
g.map_lower(sns.scatterplot, s=5, linewidth=0)
#g.map_lower(sns.kdeplot)
g.map_diag(sns.distplot, kde=False, hist_kws={'ec': 'k'})
#g = g.add_legend(bbox_to_anchor=(0.9, 1.0), borderaxespad=0., frameon=True,
# facecolor='white', title='Kategorie:', framealpha=1)
plt.savefig('histogramy_all.png', dpi=300, bbox_inches='tight', facecolor='white')
C:\Users\martina\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
char = data.loc[:, col_list+['Věk']].groupby('Věk').describe().T
char
Věk | 18-25 | 26-33 | 34-41 | 42-49 | 50-59 | 60+ | |
---|---|---|---|---|---|---|---|
Extraverze | count | 31.000000 | 31.000000 | 23.000000 | 15.000000 | 9.000000 | 6.000000 |
mean | 6.838710 | 7.709677 | 10.043478 | 9.466667 | 11.555556 | 11.000000 | |
std | 2.437167 | 2.795157 | 5.217787 | 4.206571 | 3.004626 | 5.366563 | |
min | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 6.000000 | 5.000000 | |
25% | 5.000000 | 6.000000 | 7.000000 | 6.500000 | 10.000000 | 6.500000 | |
50% | 6.000000 | 7.000000 | 8.000000 | 9.000000 | 13.000000 | 11.000000 | |
75% | 8.000000 | 9.000000 | 11.500000 | 11.500000 | 13.000000 | 14.750000 | |
max | 14.000000 | 14.000000 | 20.000000 | 19.000000 | 15.000000 | 18.000000 | |
Přívětivost | count | 31.000000 | 31.000000 | 23.000000 | 15.000000 | 9.000000 | 6.000000 |
mean | 7.967742 | 8.161290 | 8.869565 | 8.866667 | 8.666667 | 15.000000 | |
std | 3.995697 | 3.615677 | 3.841216 | 3.739111 | 4.847680 | 4.979960 | |
min | 4.000000 | 4.000000 | 4.000000 | 5.000000 | 4.000000 | 8.000000 | |
25% | 5.500000 | 5.500000 | 6.000000 | 6.500000 | 6.000000 | 12.000000 | |
50% | 7.000000 | 7.000000 | 8.000000 | 8.000000 | 7.000000 | 15.500000 | |
75% | 9.500000 | 10.000000 | 11.000000 | 10.000000 | 10.000000 | 19.000000 | |
max | 19.000000 | 18.000000 | 17.000000 | 20.000000 | 19.000000 | 20.000000 | |
Svědomitost | count | 31.000000 | 31.000000 | 23.000000 | 15.000000 | 9.000000 | 6.000000 |
mean | 7.870968 | 7.838710 | 7.478261 | 7.533333 | 7.333333 | 8.500000 | |
std | 3.159556 | 3.225903 | 2.171501 | 3.563038 | 2.291288 | 3.391165 | |
min | 4.000000 | 4.000000 | 4.000000 | 4.000000 | 5.000000 | 6.000000 | |
25% | 6.000000 | 6.000000 | 6.000000 | 5.000000 | 6.000000 | 6.250000 | |
50% | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.500000 | |
75% | 9.500000 | 9.500000 | 8.500000 | 8.500000 | 7.000000 | 8.750000 | |
max | 16.000000 | 15.000000 | 13.000000 | 18.000000 | 12.000000 | 15.000000 | |
Neuroticismus | count | 31.000000 | 31.000000 | 23.000000 | 15.000000 | 9.000000 | 6.000000 |
mean | 15.935484 | 16.677419 | 15.304348 | 15.200000 | 14.111111 | 10.500000 | |
std | 3.375752 | 2.856421 | 3.125188 | 3.211586 | 3.620927 | 5.468089 | |
min | 6.000000 | 7.000000 | 8.000000 | 10.000000 | 10.000000 | 4.000000 | |
25% | 14.500000 | 15.000000 | 14.000000 | 13.000000 | 11.000000 | 6.000000 | |
50% | 17.000000 | 17.000000 | 16.000000 | 16.000000 | 15.000000 | 11.000000 | |
75% | 18.000000 | 19.000000 | 17.000000 | 17.500000 | 17.000000 | 13.750000 | |
max | 20.000000 | 20.000000 | 20.000000 | 20.000000 | 19.000000 | 18.000000 | |
Intelekt | count | 31.000000 | 31.000000 | 23.000000 | 15.000000 | 9.000000 | 6.000000 |
mean | 9.774194 | 9.741935 | 10.130435 | 11.533333 | 10.888889 | 13.666667 | |
std | 1.647416 | 1.692488 | 1.713697 | 2.386470 | 2.368778 | 2.250926 | |
min | 7.000000 | 7.000000 | 8.000000 | 8.000000 | 8.000000 | 11.000000 | |
25% | 9.000000 | 9.000000 | 9.000000 | 10.000000 | 10.000000 | 12.000000 | |
50% | 10.000000 | 10.000000 | 10.000000 | 12.000000 | 11.000000 | 13.500000 | |
75% | 10.500000 | 10.000000 | 11.000000 | 14.000000 | 11.000000 | 15.750000 | |
max | 14.000000 | 14.000000 | 16.000000 | 15.000000 | 15.000000 | 16.000000 |
Věk_lst = ['18-25', '26-33', '34-41', '42-49', '50-59', '60+']
for Věk in Věk_lst:
display(HTML(r'<h3> {} </h3>'.format(Věk)))
res = data.loc[data.Věk==Věk, col_list].corr(method='pearson')
display(res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.194556 | 0.326196 | -0.147163 | 0.297806 |
Přívětivost | 0.194556 | 1.000000 | 0.525087 | -0.662453 | 0.160901 |
Svědomitost | 0.326196 | 0.525087 | 1.000000 | -0.653980 | 0.147911 |
Neuroticismus | -0.147163 | -0.662453 | -0.653980 | 1.000000 | -0.164541 |
Intelekt | 0.297806 | 0.160901 | 0.147911 | -0.164541 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.713910 | 0.567631 | -0.709336 | 0.526182 |
Přívětivost | 0.713910 | 1.000000 | 0.679613 | -0.788761 | 0.246700 |
Svědomitost | 0.567631 | 0.679613 | 1.000000 | -0.743799 | 0.102016 |
Neuroticismus | -0.709336 | -0.788761 | -0.743799 | 1.000000 | -0.210852 |
Intelekt | 0.526182 | 0.246700 | 0.102016 | -0.210852 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.292854 | -0.050059 | -0.806436 | 0.553431 |
Přívětivost | 0.292854 | 1.000000 | -0.030327 | -0.174506 | 0.196047 |
Svědomitost | -0.050059 | -0.030327 | 1.000000 | -0.082705 | -0.090814 |
Neuroticismus | -0.806436 | -0.174506 | -0.082705 | 1.000000 | -0.550933 |
Intelekt | 0.553431 | 0.196047 | -0.090814 | -0.550933 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | -0.091128 | -0.041620 | -0.425089 | 0.172662 |
Přívětivost | -0.091128 | 1.000000 | 0.541865 | -0.289081 | 0.424785 |
Svědomitost | -0.041620 | 0.541865 | 1.000000 | -0.365788 | 0.308572 |
Neuroticismus | -0.425089 | -0.289081 | -0.365788 | 1.000000 | -0.527488 |
Intelekt | 0.172662 | 0.424785 | 0.308572 | -0.527488 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.177360 | -0.157359 | -0.442983 | 0.554206 |
Přívětivost | 0.177360 | 1.000000 | -0.180060 | -0.011869 | 0.616850 |
Svědomitost | -0.157359 | -0.180060 | 1.000000 | 0.145642 | -0.314752 |
Neuroticismus | -0.442983 | -0.011869 | 0.145642 | 1.000000 | -0.144116 |
Intelekt | 0.554206 | 0.616850 | -0.314752 | -0.144116 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.905510 | -0.329690 | -0.967802 | 0.943729 |
Přívětivost | 0.905510 | 1.000000 | -0.521084 | -0.807907 | 0.981309 |
Svědomitost | -0.329690 | -0.521084 | 1.000000 | 0.156392 | -0.445419 |
Neuroticismus | -0.967802 | -0.807907 | 0.156392 | 1.000000 | -0.844961 |
Intelekt | 0.943729 | 0.981309 | -0.445419 | -0.844961 | 1.000000 |
for Věk in Věk_lst:
display(HTML(r'<h3> {} </h3>'.format(Věk)))
res = data.loc[data.Věk==Věk, col_list].corr(method='spearman')
display(res.style.background_gradient(cmap='bwr', vmin=-1, vmax=1))
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.213270 | 0.305143 | -0.126486 | 0.346276 |
Přívětivost | 0.213270 | 1.000000 | 0.354518 | -0.736524 | 0.000000 |
Svědomitost | 0.305143 | 0.354518 | 1.000000 | -0.603280 | 0.115867 |
Neuroticismus | -0.126486 | -0.736524 | -0.603280 | 1.000000 | -0.036790 |
Intelekt | 0.346276 | 0.000000 | 0.115867 | -0.036790 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.660864 | 0.566963 | -0.627876 | 0.451964 |
Přívětivost | 0.660864 | 1.000000 | 0.486257 | -0.578427 | 0.121254 |
Svědomitost | 0.566963 | 0.486257 | 1.000000 | -0.727673 | 0.173476 |
Neuroticismus | -0.627876 | -0.578427 | -0.727673 | 1.000000 | -0.092969 |
Intelekt | 0.451964 | 0.121254 | 0.173476 | -0.092969 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.265481 | 0.203880 | -0.738916 | 0.508857 |
Přívětivost | 0.265481 | 1.000000 | 0.105954 | -0.310576 | 0.511290 |
Svědomitost | 0.203880 | 0.105954 | 1.000000 | -0.305170 | 0.062397 |
Neuroticismus | -0.738916 | -0.310576 | -0.305170 | 1.000000 | -0.443671 |
Intelekt | 0.508857 | 0.511290 | 0.062397 | -0.443671 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.056706 | -0.043600 | -0.558958 | 0.255272 |
Přívětivost | 0.056706 | 1.000000 | 0.427158 | -0.028881 | 0.429651 |
Svědomitost | -0.043600 | 0.427158 | 1.000000 | -0.319685 | 0.175278 |
Neuroticismus | -0.558958 | -0.028881 | -0.319685 | 1.000000 | -0.536607 |
Intelekt | 0.255272 | 0.429651 | 0.175278 | -0.536607 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | -0.051732 | -0.237728 | -0.366434 | 0.587719 |
Přívětivost | -0.051732 | 1.000000 | -0.074949 | 0.114407 | 0.500074 |
Svědomitost | -0.237728 | -0.074949 | 1.000000 | 0.171941 | -0.260155 |
Neuroticismus | -0.366434 | 0.114407 | 0.171941 | 1.000000 | -0.017244 |
Intelekt | 0.587719 | 0.500074 | -0.260155 | -0.017244 | 1.000000 |
Extraverze | Přívětivost | Svědomitost | Neuroticismus | Intelekt | |
---|---|---|---|---|---|
Extraverze | 1.000000 | 0.706188 | -0.057977 | -1.000000 | 0.794461 |
Přívětivost | 0.706188 | 1.000000 | -0.179124 | -0.706188 | 0.954545 |
Svědomitost | -0.057977 | -0.179124 | 1.000000 | 0.057977 | -0.358249 |
Neuroticismus | -1.000000 | -0.706188 | 0.057977 | 1.000000 | -0.794461 |
Intelekt | 0.794461 | 0.954545 | -0.358249 | -0.794461 | 1.000000 |
plt.rcParams['font.size'] = 10
g = sns.PairGrid(data.loc[:, col_list+['Věk']], diag_sharey=False, hue='Věk')
g.fig.set_size_inches(15/2.54,15/2.54)
#g.map_upper(sns.scatterplot, size=.5, s=1)
g.map_lower(sns.scatterplot, s=5, linewidth=0)
#g.map_lower(sns.kdeplot)
g.map_diag(sns.distplot, kde=False, hist_kws={'ec': 'k'})
g = g.add_legend(bbox_to_anchor=(0.7, 0.9), borderaxespad=0., frameon=True,
facecolor='white', title='Věk:', framealpha=1, ncol=2)
plt.savefig('histogramy_age.png', dpi=300, bbox_inches='tight', facecolor='white')
C:\Users\martina\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
plt.rcParams['font.size'] = 10
figsize = (16/2.54,15/2.54/2)
for col in col_list:
display(HTML(r'<h4> {} </h4>'.format(col)))
fig, (ax, ax2) = plt.subplots(ncols=2, figsize=figsize, gridspec_kw={'width_ratios':[1,3]}, tight_layout=True, sharey=True)
vals = np.sqrt(data.loc[:, ['{}'.format(col), 'Věk']].groupby('Věk').count().values)/10.
val = np.sum(vals) #np.sqrt(data.loc[:, '{}'.format(col)].count())/10.
data.boxplot(column=['{}'.format(col)], ax=ax, widths=[val], showmeans=True, meanline=True, meanprops=dict(color='r'))
data.boxplot(column=['{}'.format(col)], by='Věk', ax=ax2, widths=vals, showmeans=True, meanline=True, meanprops=dict(color='r'))
#ax.set_title(r'{}'.format(col))
fig.suptitle('')
#print(val, np.sum(vals), vals)
#print(ax.get_xlim())
ax.set_ylim(3.5,21)
plt.savefig('boxplot_{}.png'.format(col), dpi=300, bbox_inches='tight', pad_inches=0.01, facecolor='white')
plt.show()