Features Correlations

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
In [29]:
train = pd.read_csv('train5.csv')
test = pd.read_csv('test5.csv')
In [30]:
train.head()
Out[30]:
id spacegroup number_of_total_atoms percent_atom_al percent_atom_ga percent_atom_in lattice_vector_1_ang lattice_vector_2_ang lattice_vector_3_ang lattice_angle_alpha_degree ... cgain rgain cgao rgao calin ralin calo ralo cino rino
0 1 33 80.0 0.6250 0.3750 0.000 9.9523 8.5513 9.1775 90.0026 ... 0.000000 inf -68.138833 1.955995 0.000000 inf -126.380141 1.953003 0.000000 inf
1 2 194 80.0 0.6250 0.3750 0.000 6.1840 6.1838 23.6287 90.0186 ... 0.000000 inf -74.347543 1.936593 0.000000 inf -97.805858 1.959401 0.000000 inf
2 3 227 40.0 0.8125 0.1875 0.000 9.7510 5.6595 13.9630 90.9688 ... 0.000000 inf -13.810688 1.869581 0.000000 inf -57.104462 1.947915 0.000000 inf
3 4 167 30.0 0.7500 0.0000 0.250 5.0036 5.0034 13.5318 89.9888 ... 0.000000 inf 0.000000 inf 11.027235 3.316789 -42.805965 2.019747 -16.121344 2.026801
4 5 194 80.0 0.0000 0.6250 0.375 6.6614 6.6612 24.5813 89.9960 ... 30.715613 3.556476 -111.422149 2.016436 0.000000 inf 0.000000 inf -41.095532 2.195004

5 rows × 76 columns

In [31]:
train.columns
Out[31]:
Index(['id', 'spacegroup', 'number_of_total_atoms', 'percent_atom_al',
       'percent_atom_ga', 'percent_atom_in', 'lattice_vector_1_ang',
       'lattice_vector_2_ang', 'lattice_vector_3_ang',
       'lattice_angle_alpha_degree', 'lattice_angle_beta_degree',
       'lattice_angle_gamma_degree', 'formation_energy_ev_natom',
       'bandgap_energy_ev', 'vol', 'n_ga', 'n_al', 'n_in', 'n_o', 'mass_al',
       'mass_ga', 'mass_in', 'mass_o', 'density', 'z_al', 'z_ga', 'z_in',
       'z_o', 'z_tot', 'r_ene', 'k_ene', 'p_ene', 'eta', 'r_gao_ave',
       'r_gao_min', 'r_alo_ave', 'r_alo_min', 'r_ino_ave', 'r_ino_min',
       'inv_r_gao_sum', 'inv2_r_gao_sum', 'inv_r_alo_sum', 'inv2_r_alo_sum',
       'inv_r_ino_sum', 'inv2_r_ino_sum', 'mass_tot', 'inv2_r_sum',
       'inv_r_sum', 'r_min', 'r_mean', 'ngao_mean', 'nalo_mean', 'nino_mean',
       'n_mean', 'coul', 'rmean', 'cgaga', 'rgaga', 'calal', 'ralal', 'cinin',
       'rinin', 'coo', 'roo', 'cgaal', 'rgaal', 'cgain', 'rgain', 'cgao',
       'rgao', 'calin', 'ralin', 'calo', 'ralo', 'cino', 'rino'],
      dtype='object')
In [33]:
sns.pairplot(train,vars=['mass_ga','mass_al','mass_in','mass_tot','percent_atom_ga', 'percent_atom_al', 'percent_atom_in',
       'formation_energy_ev_natom','bandgap_energy_ev'],kind='reg',diag_kind='kde')
plt.show()
In [34]:
sns.pairplot(train,vars=['spacegroup','vol','density','z_ga','z_al','z_in','z_tot',
       'formation_energy_ev_natom','bandgap_energy_ev'],kind='reg',diag_kind='kde')
plt.show()
In [35]:
sns.pairplot(train,vars=['r_min','rmean','ngao_mean','nalo_mean','nino_mean','n_mean',
       'formation_energy_ev_natom','bandgap_energy_ev'],kind='reg',diag_kind='kde')
plt.show()
In [38]:
sns.pairplot(train,vars=['inv_r_gao_sum','inv_r_alo_sum','inv_r_ino_sum','cgao','calo','cino','coul',
       'formation_energy_ev_natom','bandgap_energy_ev'],kind='reg',diag_kind='kde')
plt.show()
In [37]:
sns.pairplot(train,vars=['cgaga','calal','cinin','cgaal','cgain','calin',
       'formation_energy_ev_natom','bandgap_energy_ev'],kind='reg',diag_kind='kde')
plt.show()