Regressione Custom Data¶
from google.colab import files
uploaded = files.upload()
!ls
dati.xlsx drive sample_data
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
data = pd.read_excel('dati.xlsx')
data.head()
var1 | var2 | var6 | var7 | var8 | var9 | var10 | var11 | var12 | var13 | var14 | var15 | var16 | var17 | var18 | var19 | var20 | var21 | var22 | var23 | var27 | var28 | var29 | var30 | var31 | var32 | var33 | var34 | var35 | var39 | var40 | var41 | var42 | var43 | var44 | var45 | var46 | var47 | var48 | var49 | ... | var108 | var109 | var110 | var111 | var112 | var113 | var114 | var115 | var116 | var123 | var124 | var125 | var126 | var127 | var128 | var129 | var130 | var131 | var132 | var133 | var134 | var135 | var136 | var137 | var141 | var142 | var143 | var147 | var148 | var149 | var150 | var151 | var152 | var153 | var154 | var155 | var156 | var157 | var158 | costi | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 147823139 | 148193655 | 51344312 | 61401789 | 71387976 | 23064675 | 20882011 | 14244395 | 2794279 | 2677799 | 2833077 | 14562964 | 14352577 | 8073273 | 0 | 0 | 0 | 0 | 0 | 73273 | 9312470 | 7915379 | 7272270 | 35436524 | 37191289 | 32749686 | 32719851 | 34723462 | 30111298 | 84978 | 318158 | 318158 | 0 | 0 | 0 | 7834908 | 2178851 | 18788146 | 290899 | 188430 | ... | 13310780 | 13264428 | 12114005 | 14296757 | 13558421 | 13421133 | -823170 | -507756 | -530593 | 1228034 | 977093 | 1122097 | 783131 | 572136 | 267603 | 1588604 | 1604455 | 1856974 | 36310 | -66672 | 135970 | 0 | 0 | 0 | 1870101 | 3257494 | 4594656 | -3027241 | 1534078 | 3813249 | 229 | 214 | 207 | 84978 | 318158 | 318158 | 25290616 | 24108358 | 24535319 | 144205134 |
1 | 147562322 | 163251010 | 1037776 | 1391301 | 1602930 | 23503659 | 22973259 | 20946238 | 13368527 | 12404318 | 11085331 | 1850794 | 1693511 | 1060485 | 364872 | 1287356 | 811956 | 1485922 | 406155 | 248529 | 34529004 | 35683884 | 35340902 | 23391321 | 22915530 | 18886264 | 10795833 | 11017483 | 11177585 | 1583 | 1583 | 1588 | 355977 | 43180 | 471450 | 5293624 | 2889766 | 1616895 | 408806 | 551290 | ... | 16889643 | 15833127 | 15986500 | 2713441 | 2434423 | 2180861 | 3207072 | 1304536 | -3015558 | 912728 | 1042592 | 1058524 | 80006 | 47390 | 35171 | 769967 | 895916 | 1201286 | 11943 | 82820 | 1553908 | 0 | 0 | 0 | 1373420 | 1220932 | 1304576 | 1924469 | 2124721 | 2204557 | 454 | 411 | 456 | 1583 | 1583 | 1588 | 14133394 | 10404117 | 12898505 | 140165127 |
2 | 147562000 | 142652000 | 737000 | 707000 | 745000 | 51250000 | 51604000 | 50992000 | 23900000 | 25404000 | 26269000 | 69000 | 66000 | 56000 | 0 | 0 | 0 | 0 | 0 | 0 | 17498000 | 12270000 | 12954000 | 31359000 | 47731000 | 43044000 | 26022000 | 43298000 | 39653000 | 1801000 | 1761000 | 1171000 | 3000 | 3000 | 0 | 19082000 | 4224000 | 5958000 | 1596000 | 7000 | ... | 9901000 | 10029000 | 9581000 | 3832000 | 3762000 | 3497000 | -6933000 | 683000 | 2125000 | 2624000 | 2743000 | 1937000 | 53000 | 59000 | 59000 | 915000 | 1122000 | 1153000 | -1000 | 3000 | -10000 | 0 | 0 | -36000 | 9000 | 1166000 | 1450000 | -803000 | 3170000 | 2408000 | 116 | 165 | 168 | 1801000 | 1761000 | 1171000 | 31861000 | 22494000 | 24687000 | 145190000 |
3 | 147067625 | 140160124 | 39316 | 5715 | 8015 | 5258584 | 5491288 | 5735791 | 4183889 | 4354175 | 4510666 | 25917 | 25917 | 25917 | 25917 | 25917 | 25917 | 0 | 0 | 0 | 25923161 | 17921979 | 19627665 | 21948953 | 22868722 | 23881755 | 15584991 | 15576621 | 23831452 | 890646 | 0 | 0 | 0 | 2226616 | 0 | 185206 | 54173 | 1151195 | 643650 | 567191 | ... | 3075383 | 3296773 | 3290801 | 631775 | 638706 | 725484 | -1868447 | -306450 | 3770484 | 421274 | 885471 | 377168 | 217482 | 149383 | 155720 | 428722 | 439358 | 525316 | 34150 | 95776 | -114667 | 0 | 0 | 0 | 80381 | 185005 | 258220 | 478455 | 494325 | 473999 | 60 | 60 | 65 | 890646 | 0 | 0 | 1000000 | 0 | 1188439 | 112760368 |
4 | 145273739 | 127413305 | 6571906 | 3613836 | 3218188 | 65670888 | 56917531 | 51856050 | 20251827 | 20965972 | 20661743 | 2324116 | 2070401 | 179562 | 1754388 | 1625505 | 178366 | 0 | 1196 | 1196 | 18950865 | 16878270 | 18797216 | 8362491 | 16303624 | 14624193 | 4425910 | 11432236 | 11013435 | 0 | 0 | 0 | 0 | 0 | 0 | 14381920 | 10465984 | 10529721 | 1463431 | 707312 | ... | 6335628 | 5294076 | 4974163 | 5310610 | 4361914 | 3345706 | -2235901 | 3075212 | -2049351 | 1676826 | 1524379 | 2270501 | 31436 | 12726 | 60454 | 781590 | 787832 | 1051073 | -481 | -95 | -697 | 0 | 0 | 0 | 472994 | 795056 | 943092 | 966348 | 1243727 | 869544 | 171 | 141 | 132 | 0 | 0 | 0 | 27325115 | 24037285 | 24492237 | 171980718 |
5 rows × 135 columns
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7479 entries, 0 to 7478
Columns: 135 entries, var1 to costi
dtypes: int64(135)
memory usage: 7.7 MB
data.describe()
var1 | var2 | var6 | var7 | var8 | var9 | var10 | var11 | var12 | var13 | var14 | var15 | var16 | var17 | var18 | var19 | var20 | var21 | var22 | var23 | var27 | var28 | var29 | var30 | var31 | var32 | var33 | var34 | var35 | var39 | var40 | var41 | var42 | var43 | var44 | var45 | var46 | var47 | var48 | var49 | ... | var108 | var109 | var110 | var111 | var112 | var113 | var114 | var115 | var116 | var123 | var124 | var125 | var126 | var127 | var128 | var129 | var130 | var131 | var132 | var133 | var134 | var135 | var136 | var137 | var141 | var142 | var143 | var147 | var148 | var149 | var150 | var151 | var152 | var153 | var154 | var155 | var156 | var157 | var158 | costi | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | ... | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7479.000000 | 7479.000000 | 7479.000000 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 | 7.479000e+03 |
mean | 7.450131e+06 | 6.995936e+06 | 3.377709e+05 | 2.954539e+05 | 2.374321e+05 | 2.080609e+06 | 1.977554e+06 | 1.866474e+06 | 9.678114e+05 | 9.397979e+05 | 8.494891e+05 | 2.730416e+05 | 2.483007e+05 | 2.281703e+05 | 1.867172e+05 | 1.722472e+05 | 1.578191e+05 | 2.484367e+04 | 2.069652e+04 | 1.858893e+04 | 1.325843e+06 | 1.256179e+06 | 1.160876e+06 | 2.188183e+06 | 2.161061e+06 | 2.019932e+06 | 1.505207e+06 | 1.506826e+06 | 1.418052e+06 | 6.012760e+04 | 5.466102e+04 | 5.602265e+04 | 9.828008e+04 | 8.966333e+04 | 6.497756e+04 | 5.901713e+05 | 5.677180e+05 | 5.011291e+05 | 5.604334e+04 | 5.130161e+04 | ... | 8.240549e+05 | 7.776656e+05 | 7.208212e+05 | 2.562921e+05 | 2.444049e+05 | 2.225539e+05 | -2.048778e+04 | -2.213982e+04 | -2.373427e+04 | 9.113039e+04 | 8.943493e+04 | 8.721292e+04 | 1.517198e+04 | 1.664002e+04 | 1.264100e+04 | 5.208721e+04 | 4.878168e+04 | 5.321769e+04 | -2.687684e+02 | -2.391546e+03 | 3.375687e+02 | -6.536953e+03 | -4.234294e+03 | -4.462733e+03 | 8.603223e+04 | 8.922157e+04 | 9.986623e+04 | 1.745804e+05 | 1.562779e+05 | 1.578679e+05 | 21.127958 | 20.665062 | 19.299773 | 6.772403e+04 | 6.175881e+04 | 6.265347e+04 | 1.076427e+06 | 9.915747e+05 | 9.137636e+05 | 7.633431e+06 |
std | 1.646081e+07 | 1.588951e+07 | 6.690769e+06 | 5.441119e+06 | 2.622168e+06 | 5.869370e+06 | 5.642296e+06 | 5.481200e+06 | 3.769399e+06 | 3.708256e+06 | 3.013458e+06 | 1.941856e+06 | 1.788020e+06 | 1.698509e+06 | 1.564896e+06 | 1.488300e+06 | 1.409354e+06 | 4.182777e+05 | 3.335736e+05 | 2.810225e+05 | 4.068137e+06 | 3.848374e+06 | 3.627495e+06 | 4.826904e+06 | 4.757523e+06 | 4.640312e+06 | 3.604305e+06 | 3.616744e+06 | 3.454110e+06 | 4.522008e+05 | 3.798973e+05 | 3.431355e+05 | 1.457817e+06 | 1.281663e+06 | 9.854663e+05 | 2.142858e+06 | 2.123655e+06 | 1.775132e+06 | 2.349897e+05 | 2.311300e+05 | ... | 2.012542e+06 | 1.902859e+06 | 1.754317e+06 | 9.203786e+05 | 8.755274e+05 | 6.934450e+05 | 5.676930e+05 | 4.581167e+05 | 4.757960e+05 | 2.798760e+05 | 3.139860e+05 | 3.194439e+05 | 2.030893e+05 | 3.315507e+05 | 1.329318e+05 | 2.667476e+05 | 1.764451e+05 | 2.018601e+05 | 3.163845e+04 | 6.514876e+04 | 3.858767e+04 | 3.112814e+05 | 8.613143e+04 | 1.057758e+05 | 3.406345e+05 | 3.400689e+05 | 3.614035e+05 | 1.173476e+06 | 1.160223e+06 | 9.680606e+05 | 46.321574 | 46.255558 | 44.001820 | 4.963626e+05 | 4.301135e+05 | 3.964864e+05 | 4.537630e+06 | 3.720963e+06 | 3.141080e+06 | 1.683081e+07 |
min | 4.150000e+02 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ... | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | -2.239386e+07 | -1.435085e+07 | -1.746800e+07 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | -1.448299e+06 | -1.556761e+06 | -1.277128e+06 | -2.626785e+07 | -3.700000e+06 | -7.439191e+06 | -5.930205e+06 | -2.872546e+06 | -1.594206e+06 | -3.335908e+07 | -6.080911e+07 | -3.185507e+07 | 0.000000 | 0.000000 | 0.000000 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
25% | 5.184700e+05 | 4.002950e+05 | 6.975000e+02 | 7.300000e+02 | 3.775000e+02 | 6.432050e+04 | 5.949900e+04 | 3.985650e+04 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 2.053550e+04 | 1.949950e+04 | 1.205200e+04 | 1.559340e+05 | 1.477175e+05 | 1.042130e+05 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.458000e+04 | 1.615450e+04 | 1.075200e+04 | 6.000000e+02 | 5.625000e+02 | ... | 9.125000e+04 | 8.197950e+04 | 5.863450e+04 | 1.444350e+04 | 1.248900e+04 | 9.404500e+03 | -1.659450e+04 | -2.148750e+04 | -1.365000e+04 | 7.652500e+03 | 6.483000e+03 | 5.034000e+03 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.503500e+03 | 1.203000e+03 | 7.910000e+02 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.754500e+03 | 1.750000e+03 | 8.245000e+02 | 1.013000e+03 | 1.045000e+03 | 0.000000e+00 | 4.000000 | 4.000000 | 3.000000 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 5.438740e+05 |
50% | 1.695363e+06 | 1.533985e+06 | 1.381500e+04 | 1.238800e+04 | 9.955000e+03 | 3.347110e+05 | 3.155580e+05 | 2.630320e+05 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 6.410000e+02 | 6.030000e+02 | 3.650000e+02 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.257350e+05 | 1.193750e+05 | 1.008450e+05 | 5.375490e+05 | 5.138090e+05 | 4.571890e+05 | 2.296820e+05 | 2.075290e+05 | 1.797610e+05 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 6.617500e+04 | 6.966300e+04 | 5.812100e+04 | 5.084000e+03 | 4.535000e+03 | ... | 2.699830e+05 | 2.525620e+05 | 2.204670e+05 | 4.847200e+04 | 4.608400e+04 | 4.158700e+04 | 0.000000e+00 | -5.000000e+01 | 0.000000e+00 | 2.294600e+04 | 2.142400e+04 | 1.997200e+04 | 1.400000e+01 | 2.000000e+01 | 2.400000e+01 | 8.236000e+03 | 7.691000e+03 | 7.702000e+03 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.053200e+04 | 1.050300e+04 | 1.029100e+04 | 1.601000e+04 | 1.603900e+04 | 1.249900e+04 | 10.000000 | 9.000000 | 9.000000 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.145440e+05 | 1.071490e+05 | 8.262500e+04 | 1.760361e+06 |
75% | 6.050798e+06 | 5.594102e+06 | 8.305000e+04 | 7.726200e+04 | 6.841050e+04 | 1.514472e+06 | 1.470158e+06 | 1.357036e+06 | 1.525510e+05 | 1.505230e+05 | 8.037500e+03 | 1.238250e+04 | 1.148500e+04 | 9.743000e+03 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 7.469065e+05 | 7.242070e+05 | 6.505790e+05 | 1.876297e+06 | 1.835515e+06 | 1.688428e+06 | 1.212872e+06 | 1.228060e+06 | 1.120250e+06 | 8.460000e+02 | 1.243500e+03 | 1.800000e+03 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 3.167980e+05 | 3.178475e+05 | 2.838085e+05 | 2.642000e+04 | 2.368550e+04 | ... | 6.861245e+05 | 6.513525e+05 | 6.123230e+05 | 1.769925e+05 | 1.651410e+05 | 1.541010e+05 | 6.946000e+03 | 4.335500e+03 | 4.780500e+03 | 7.280900e+04 | 6.995300e+04 | 6.879700e+04 | 5.370000e+02 | 7.200000e+02 | 6.890000e+02 | 3.309400e+04 | 3.361550e+04 | 3.634900e+04 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 4.810350e+04 | 4.970400e+04 | 5.255950e+04 | 9.653200e+04 | 9.412450e+04 | 8.194550e+04 | 20.000000 | 20.000000 | 19.000000 | 3.167000e+03 | 3.810500e+03 | 4.782000e+03 | 6.487320e+05 | 6.149110e+05 | 5.736580e+05 | 6.073618e+06 |
max | 1.478231e+08 | 1.954054e+08 | 3.949990e+08 | 4.236760e+08 | 1.218370e+08 | 1.822085e+08 | 1.819357e+08 | 1.819572e+08 | 1.814344e+08 | 1.814863e+08 | 5.161911e+07 | 5.580400e+07 | 5.208281e+07 | 5.203800e+07 | 5.255939e+07 | 5.207929e+07 | 5.203700e+07 | 2.095430e+07 | 1.845768e+07 | 1.460000e+07 | 9.040429e+07 | 8.979462e+07 | 8.604582e+07 | 8.066182e+07 | 7.479026e+07 | 9.178638e+07 | 7.180935e+07 | 6.493519e+07 | 5.631069e+07 | 1.943005e+07 | 1.777804e+07 | 1.030000e+07 | 9.020468e+07 | 7.449480e+07 | 6.477029e+07 | 5.985522e+07 | 6.518876e+07 | 4.206783e+07 | 8.187088e+06 | 9.092467e+06 | ... | 6.004269e+07 | 5.354036e+07 | 3.470024e+07 | 5.065200e+07 | 4.429233e+07 | 2.746300e+07 | 1.973944e+07 | 8.680905e+06 | 8.962435e+06 | 1.006186e+07 | 1.105028e+07 | 1.301970e+07 | 1.048585e+07 | 2.550890e+07 | 6.901368e+06 | 1.134100e+07 | 6.313221e+06 | 6.851000e+06 | 6.784740e+05 | 3.876515e+06 | 1.553908e+06 | 9.993120e+05 | 1.840159e+06 | 6.716100e+05 | 9.545512e+06 | 7.919978e+06 | 1.002997e+07 | 1.838250e+07 | 1.384342e+07 | 2.333264e+07 | 1384.000000 | 1307.000000 | 1244.000000 | 1.943005e+07 | 1.777804e+07 | 1.662483e+07 | 2.025610e+08 | 1.872810e+08 | 1.169022e+08 | 1.754113e+08 |
8 rows × 135 columns
X = data.iloc[:,0:-1]
y = data['costi']
X.shape
(7479, 134)
y.shape
(7479,)
# Split the data into a training set and a test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
(5235, 134) (2244, 134) (5235,) (2244,)
from sklearn.linear_model import LinearRegression
model = LinearRegression(normalize=True)
model.fit( X_train, y_train )
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)
model.intercept_
-3200.777457194403
model.coef_
array([-2.30996462e-03, -1.50058663e-02, -2.81813068e-02, 1.84983370e-01,
-1.07535485e-01, -1.25291295e-02, 1.39249816e-01, -9.91288975e-02,
4.14284936e-03, -4.74053837e-03, -4.09265041e-04, -1.07988920e-02,
5.53436241e-02, -4.89814741e-02, -2.08352209e-02, 7.25301721e-02,
-3.77319133e-02, 8.76194678e-02, -7.65657544e-02, 1.17948143e-01,
-9.30513956e-01, 1.08357037e+00, -1.26653220e-01, 4.79202414e-03,
1.43007817e-01, -1.15888295e-01, 9.94973398e-03, 2.70744978e-04,
-7.92966224e-03, 2.69702385e-02, -4.17703102e-02, 1.29981403e-02,
-2.03380493e-02, 1.65893118e-01, -1.27604963e-01, -2.66074722e-02,
1.79488728e-01, -1.18126746e-01, -4.34200642e-02, 1.21637354e-02,
-1.04109145e-02, 5.60581055e-03, -6.01850048e-02, 4.47773476e-02,
2.20944502e-02, -1.13635756e-01, 7.64066351e-02, 1.19314776e-04,
9.93569282e-03, -1.59561082e-02, -7.39703203e-02, -1.36109362e-01,
1.99741967e-01, 1.20559367e-01, -6.27703586e-02, -1.04838456e-01,
2.16557279e-03, -9.92887862e-02, 7.54038176e-02, 2.07529998e-03,
4.41791060e-03, 9.55652299e-04, -5.91073869e-03, -4.61698720e-02,
-3.67701369e-03, 4.96002655e-02, 3.53159925e-03, -1.41144644e-02,
1.63627563e-02, 2.72654249e-02, 1.04871684e-02, -3.80283405e-02,
-1.08564554e-02, 1.39354551e-02, 1.32101117e-03, -4.13592564e-03,
-5.18201426e-03, -2.56661936e-02, -1.70815551e-01, 1.63681451e-01,
1.24688231e+00, -4.47986551e-02, -9.86953813e-02, -9.54673606e-01,
5.20125142e-02, -1.03908251e-01, -2.52766571e-01, 4.74436896e-02,
1.18334001e-01, -2.33639979e-01, 7.00241620e-02, 9.27074982e-02,
-3.03265154e-01, 8.86666687e-02, 8.77259902e-02, -2.27952770e-01,
-1.43269119e-02, 1.39954894e-01, -3.04086786e-01, 1.11272269e-01,
3.04783758e-02, -1.19847308e+00, 1.00332024e-02, 1.20315710e-01,
-1.96644059e-01, -9.27282850e-02, 1.83966606e-01, 1.92388403e-01,
1.04369772e-01, -1.97001878e-01, -8.36998547e-02, -1.21567478e-01,
8.16877455e-02, -5.26305994e-01, -3.52562902e-02, -1.94390763e-01,
2.07845429e-01, -8.20499899e-01, -1.30818522e-03, -3.24226028e-01,
1.05224108e-01, 5.12933641e-02, -2.29442019e-01, -2.68507534e-03,
1.74531598e-01, 6.62690395e+02, -2.30495434e+02, 2.22724695e+02,
-2.02134192e-02, 1.36489920e-02, 9.06889487e-03, 2.07529998e-03,
4.41791060e-03, 9.55652299e-04])
y_predict = model.predict(X_test)
import numpy as np
length = y_predict.shape[0] #
x = np.linspace(0,length,length)
plt.figure(figsize=(30,15))
plt.plot(x, y_test, label='costi_reali')
plt.plot(x, y_predict, label='costi_predetti')
plt.legend(loc=2);
Export coeff of regressor¶
coeff = model.coef_
coeff
### EXPORT THE COEFF OF REGRESSOR
formula_excel =pd.DataFrame(coeff, columns=['coeff'])
formula_excel.to_excel('formula.xlsx', index=False)
Export Prediction in a final column¶
data['costi_PRED']=model.predict(data.iloc[:,0:-1])
data[['costi', 'costi_PRED']].head()
costi | costi_PRED | |
---|---|---|
0 | 144205134 | 1.443731e+08 |
1 | 140165127 | 1.379588e+08 |
2 | 145190000 | 1.444782e+08 |
3 | 112760368 | 1.130755e+08 |
4 | 171980718 | 1.734026e+08 |
#save the data
data.to_excel('result1.xlsx', index=False)
Export model¶
import pickle
pickle.dump( model, open( 'model.p', 'wb' ) )
# modello di regressione lineare con tutte le 134 Xi
Import the model (pickle) with new data¶
from google.colab import files
uploaded = files.upload()
Saving test_cost.xlsx to test_cost.xlsx
#Loading a new dataset
import pandas as pd
import numpy as np
df = pd.read_excel('test_cost.xlsx')
X = df.iloc[:,0:-1]
y = df['costi']
X.shape
(100, 134)
#Load the pickled object.
model = pickle.load( open( "model.p", "rb" ) )
y_predict = model.predict(X)
df['costi_PRED'] = model.predict(X)
df[['costi', 'costi_PRED']].head()
costi | costi_PRED | |
---|---|---|
0 | 2508929000 | 2.522365e+09 |
1 | 2173873000 | 2.171454e+09 |
2 | 1526367000 | 1.572747e+09 |
3 | 1174142000 | 1.166190e+09 |
4 | 0 | -4.325780e+07 |
length = y.shape[0] #
x = np.linspace(0,length,length)
plt.figure(figsize=(20,15))
plt.plot(x, y, label='costi_reali')
plt.plot(x, y_predict, label='costi_predetti')
plt.legend(loc=2);
#save the data
df.to_excel('result_new_dataset.xlsx', index=False)
Reduce number of regressor 8 regressors¶
data2 = data.copy()
del data2['costi_PRED']
corrmat = data2.corr()
corrmat
var1 | var2 | var6 | var7 | var8 | var9 | var10 | var11 | var12 | var13 | var14 | var15 | var16 | var17 | var18 | var19 | var20 | var21 | var22 | var23 | var27 | var28 | var29 | var30 | var31 | var32 | var33 | var34 | var35 | var39 | var40 | var41 | var42 | var43 | var44 | var45 | var46 | var47 | var48 | var49 | ... | var108 | var109 | var110 | var111 | var112 | var113 | var114 | var115 | var116 | var123 | var124 | var125 | var126 | var127 | var128 | var129 | var130 | var131 | var132 | var133 | var134 | var135 | var136 | var137 | var141 | var142 | var143 | var147 | var148 | var149 | var150 | var151 | var152 | var153 | var154 | var155 | var156 | var157 | var158 | costi | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
var1 | 1.000000 | 0.986345 | 0.135692 | 0.130403 | 0.235256 | 0.652578 | 0.639252 | 0.625818 | 0.522128 | 0.517458 | 0.624950 | 0.415115 | 0.386054 | 0.369241 | 0.381076 | 0.351668 | 0.337167 | 0.187145 | 0.182562 | 0.213661 | 0.643967 | 0.652575 | 0.653724 | 0.885616 | 0.901770 | 0.889494 | 0.849578 | 0.871460 | 0.873803 | 0.206912 | 0.217005 | 0.260878 | 0.152898 | 0.170960 | 0.167835 | 0.561514 | 0.529838 | 0.551337 | 0.342868 | 0.313105 | ... | 0.738153 | 0.747532 | 0.763650 | 0.557382 | 0.591085 | 0.674079 | -0.081304 | -0.108396 | -0.161343 | 0.458148 | 0.441197 | 0.424094 | 0.188005 | 0.141545 | 0.265174 | 0.387952 | 0.532638 | 0.535435 | -0.033266 | -0.075452 | 0.034593 | -0.030284 | -0.157753 | -0.136756 | 0.503319 | 0.566754 | 0.620623 | 0.374695 | 0.314028 | 0.392427 | 0.639018 | 0.626608 | 0.642236 | 0.188735 | 0.188945 | 0.223395 | 0.456726 | 0.488830 | 0.537188 | 0.986153 |
var2 | 0.986345 | 1.000000 | 0.183263 | 0.193421 | 0.267485 | 0.656802 | 0.644170 | 0.635073 | 0.529413 | 0.525425 | 0.638663 | 0.416329 | 0.389934 | 0.370164 | 0.378868 | 0.351583 | 0.336201 | 0.188228 | 0.182147 | 0.214706 | 0.637955 | 0.648195 | 0.653929 | 0.883733 | 0.895384 | 0.894533 | 0.852143 | 0.868651 | 0.883315 | 0.217426 | 0.220016 | 0.264968 | 0.152903 | 0.168143 | 0.169033 | 0.553340 | 0.528199 | 0.553966 | 0.330029 | 0.305480 | ... | 0.726053 | 0.733222 | 0.767513 | 0.594631 | 0.603129 | 0.712480 | -0.078030 | -0.105031 | -0.157499 | 0.464254 | 0.447246 | 0.434021 | 0.186468 | 0.152456 | 0.271204 | 0.413598 | 0.548888 | 0.565061 | -0.065981 | -0.084900 | 0.030522 | -0.042834 | -0.174988 | -0.136803 | 0.465343 | 0.548087 | 0.623318 | 0.341054 | 0.290481 | 0.381861 | 0.630293 | 0.620489 | 0.642667 | 0.198160 | 0.191570 | 0.226940 | 0.476820 | 0.518401 | 0.542348 | 0.976198 |
var6 | 0.135692 | 0.183263 | 1.000000 | 0.873993 | 0.813696 | 0.189718 | 0.182933 | 0.165854 | 0.131329 | 0.127274 | 0.139800 | 0.335777 | 0.130861 | 0.101365 | 0.259090 | 0.096747 | 0.087498 | 0.003349 | 0.003959 | 0.006180 | 0.113859 | 0.109382 | 0.099655 | 0.148542 | 0.155342 | 0.128628 | 0.128418 | 0.143798 | 0.123677 | 0.097007 | 0.055760 | 0.051068 | 0.005120 | 0.005173 | 0.008444 | 0.089825 | 0.176643 | 0.165656 | 0.055283 | 0.023465 | ... | 0.184832 | 0.138871 | 0.182232 | 0.674075 | 0.334837 | 0.517908 | 0.001620 | -0.048689 | -0.013749 | 0.107458 | 0.085651 | 0.087308 | 0.059003 | 0.078457 | 0.122742 | 0.622851 | 0.224365 | 0.521631 | -0.216426 | 0.005878 | -0.069827 | -0.004018 | 0.025614 | -0.184567 | -0.090820 | 0.202786 | 0.202446 | -0.125143 | 0.031223 | 0.106843 | 0.145932 | 0.139871 | 0.135043 | 0.087843 | 0.048679 | 0.043582 | 0.752713 | 0.479469 | 0.243378 | 0.176026 |
var7 | 0.130403 | 0.193421 | 0.873993 | 1.000000 | 0.725537 | 0.226065 | 0.216583 | 0.194307 | 0.151139 | 0.145181 | 0.156576 | 0.219797 | 0.143006 | 0.104691 | 0.167873 | 0.099393 | 0.089165 | 0.002658 | 0.002687 | 0.006144 | 0.125486 | 0.113749 | 0.104222 | 0.166999 | 0.175232 | 0.145167 | 0.156505 | 0.176031 | 0.147110 | 0.129380 | 0.052250 | 0.051619 | 0.007890 | 0.007631 | 0.012758 | 0.103887 | 0.144036 | 0.178280 | 0.040511 | 0.028061 | ... | 0.189389 | 0.127823 | 0.182980 | 0.748622 | 0.354451 | 0.591595 | -0.006842 | -0.050565 | -0.008803 | 0.114722 | 0.079594 | 0.102993 | 0.053131 | 0.101960 | 0.150623 | 0.498843 | 0.280969 | 0.427650 | -0.335108 | 0.048683 | -0.111728 | -0.003179 | 0.028650 | -0.200492 | -0.115681 | 0.134505 | 0.202609 | -0.158951 | -0.029327 | 0.084978 | 0.151873 | 0.147328 | 0.136093 | 0.117306 | 0.045500 | 0.044010 | 0.652591 | 0.635520 | 0.317137 | 0.184230 |
var8 | 0.235256 | 0.267485 | 0.813696 | 0.725537 | 1.000000 | 0.265146 | 0.266050 | 0.246782 | 0.206811 | 0.205262 | 0.207759 | 0.363357 | 0.211711 | 0.196459 | 0.298309 | 0.178568 | 0.184040 | 0.006438 | 0.006876 | 0.014061 | 0.157938 | 0.157350 | 0.144310 | 0.222544 | 0.228397 | 0.213953 | 0.195191 | 0.206778 | 0.197445 | 0.078609 | 0.065872 | 0.071788 | 0.020384 | 0.019720 | 0.025038 | 0.147141 | 0.210100 | 0.266617 | 0.084317 | 0.061866 | ... | 0.247220 | 0.235928 | 0.261570 | 0.617437 | 0.510830 | 0.597908 | -0.008968 | -0.097494 | -0.007816 | 0.167191 | 0.169458 | 0.136359 | 0.073326 | 0.214011 | 0.352343 | 0.600951 | 0.449527 | 0.642732 | -0.081671 | 0.264306 | -0.198528 | -0.002500 | 0.070876 | -0.465443 | 0.017319 | 0.243098 | 0.260698 | -0.111626 | 0.026316 | 0.082273 | 0.197056 | 0.190604 | 0.191515 | 0.070684 | 0.057094 | 0.061017 | 0.708195 | 0.452155 | 0.439678 | 0.243066 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
var155 | 0.223395 | 0.226940 | 0.043582 | 0.044010 | 0.061017 | 0.202353 | 0.202773 | 0.201791 | 0.166981 | 0.163899 | 0.199016 | 0.128268 | 0.126408 | 0.120588 | 0.118179 | 0.114071 | 0.110688 | 0.015444 | 0.016602 | 0.023159 | 0.138535 | 0.145335 | 0.156384 | 0.305291 | 0.327402 | 0.325720 | 0.251763 | 0.262242 | 0.250124 | 0.576255 | 0.675614 | 0.862762 | 0.004004 | 0.003298 | 0.012819 | 0.125483 | 0.102116 | 0.109212 | 0.134293 | 0.113861 | ... | 0.171965 | 0.176136 | 0.183390 | 0.177772 | 0.171289 | 0.206994 | -0.010671 | 0.017562 | -0.024172 | 0.152030 | 0.140095 | 0.179137 | 0.051656 | 0.034954 | 0.154785 | 0.188886 | 0.220122 | 0.223574 | -0.057395 | -0.054727 | 0.035122 | -0.017513 | -0.055084 | -0.011086 | 0.088607 | 0.100526 | 0.105844 | 0.019526 | 0.030046 | 0.055176 | 0.137069 | 0.136641 | 0.141692 | 0.722125 | 0.827372 | 1.000000 | 0.192632 | 0.232049 | 0.277538 | 0.219139 |
var156 | 0.456726 | 0.476820 | 0.752713 | 0.652591 | 0.708195 | 0.578031 | 0.564299 | 0.533198 | 0.458980 | 0.444540 | 0.504364 | 0.452596 | 0.297705 | 0.269472 | 0.397359 | 0.274203 | 0.262533 | 0.086206 | 0.092490 | 0.101815 | 0.435457 | 0.433399 | 0.409762 | 0.469106 | 0.483043 | 0.454719 | 0.410637 | 0.430182 | 0.412124 | 0.210692 | 0.154981 | 0.199667 | 0.042844 | 0.038677 | 0.016677 | 0.260620 | 0.313495 | 0.305936 | 0.227179 | 0.178598 | ... | 0.415474 | 0.386672 | 0.415266 | 0.730107 | 0.523319 | 0.671638 | -0.040482 | -0.092845 | -0.093796 | 0.289609 | 0.283453 | 0.313727 | 0.136388 | 0.151322 | 0.281111 | 0.693159 | 0.540685 | 0.713428 | -0.154815 | 0.045929 | -0.046221 | -0.024945 | -0.017496 | -0.296575 | 0.069369 | 0.319128 | 0.329185 | -0.012658 | 0.103984 | 0.169218 | 0.349209 | 0.335781 | 0.335507 | 0.207735 | 0.154641 | 0.192632 | 1.000000 | 0.800378 | 0.698905 | 0.482840 |
var157 | 0.488830 | 0.518401 | 0.479469 | 0.635520 | 0.452155 | 0.656210 | 0.645442 | 0.611144 | 0.515172 | 0.502036 | 0.573969 | 0.323258 | 0.336961 | 0.292983 | 0.285183 | 0.290790 | 0.266979 | 0.104310 | 0.115773 | 0.123223 | 0.454784 | 0.451243 | 0.440515 | 0.516594 | 0.539544 | 0.516462 | 0.474865 | 0.500620 | 0.481140 | 0.237844 | 0.181698 | 0.239888 | 0.041072 | 0.037496 | 0.023761 | 0.279626 | 0.299713 | 0.313378 | 0.232222 | 0.210832 | ... | 0.438662 | 0.400436 | 0.440466 | 0.748071 | 0.511556 | 0.704817 | -0.049675 | -0.079458 | -0.124970 | 0.330287 | 0.311307 | 0.359791 | 0.153271 | 0.247595 | 0.213060 | 0.546249 | 0.626313 | 0.583229 | -0.258613 | -0.054892 | -0.005612 | -0.066081 | -0.086703 | -0.165852 | 0.074250 | 0.241839 | 0.319580 | -0.000928 | 0.072383 | 0.144425 | 0.378126 | 0.368310 | 0.368484 | 0.236244 | 0.182645 | 0.232049 | 0.800378 | 1.000000 | 0.777121 | 0.524987 |
var158 | 0.537188 | 0.542348 | 0.243378 | 0.317137 | 0.439678 | 0.682216 | 0.684901 | 0.684083 | 0.570262 | 0.562681 | 0.652159 | 0.353212 | 0.377000 | 0.368508 | 0.333145 | 0.350430 | 0.352395 | 0.107082 | 0.116334 | 0.123803 | 0.483165 | 0.489269 | 0.490547 | 0.548446 | 0.567178 | 0.554446 | 0.483307 | 0.495304 | 0.490955 | 0.214167 | 0.222927 | 0.286564 | 0.045740 | 0.041128 | 0.024487 | 0.277906 | 0.272270 | 0.341752 | 0.245506 | 0.231290 | ... | 0.446247 | 0.449236 | 0.471581 | 0.555260 | 0.553331 | 0.643753 | -0.069377 | -0.074575 | -0.131576 | 0.353918 | 0.354668 | 0.399339 | 0.152856 | 0.081858 | 0.402450 | 0.427419 | 0.662039 | 0.669947 | -0.068658 | 0.173842 | -0.114673 | -0.078316 | 0.013429 | -0.420892 | 0.207181 | 0.285600 | 0.296679 | 0.049811 | 0.051782 | 0.095128 | 0.387896 | 0.380843 | 0.393833 | 0.217472 | 0.223513 | 0.277538 | 0.698905 | 0.777121 | 1.000000 | 0.535850 |
costi | 0.986153 | 0.976198 | 0.176026 | 0.184230 | 0.243066 | 0.663195 | 0.645689 | 0.623935 | 0.526338 | 0.519144 | 0.620732 | 0.412508 | 0.386070 | 0.362460 | 0.375275 | 0.349002 | 0.328834 | 0.185970 | 0.182333 | 0.213124 | 0.651736 | 0.654572 | 0.638381 | 0.892665 | 0.896231 | 0.874316 | 0.865041 | 0.873108 | 0.864293 | 0.205703 | 0.211874 | 0.256292 | 0.151247 | 0.170439 | 0.164481 | 0.562900 | 0.519217 | 0.545312 | 0.340801 | 0.312354 | ... | 0.758261 | 0.753295 | 0.765291 | 0.595142 | 0.576023 | 0.690973 | -0.104126 | -0.131195 | -0.162064 | 0.458831 | 0.423249 | 0.409725 | 0.188688 | 0.140951 | 0.248732 | 0.400310 | 0.515660 | 0.524363 | -0.062510 | -0.096260 | 0.036555 | -0.032482 | -0.157398 | -0.124410 | 0.507332 | 0.565530 | 0.625243 | 0.387224 | 0.337390 | 0.416265 | 0.654021 | 0.631454 | 0.640518 | 0.187614 | 0.184095 | 0.219139 | 0.482840 | 0.524987 | 0.535850 | 1.000000 |
135 rows × 135 columns
fig, ax = plt.subplots(figsize = (18, 10))
sns.heatmap(corrmat, annot = True, annot_kws={'size': 12});
corrmat.index.values
array(['var1', 'var2', 'var6', 'var7', 'var8', 'var9', 'var10', 'var11',
'var12', 'var13', 'var14', 'var15', 'var16', 'var17', 'var18',
'var19', 'var20', 'var21', 'var22', 'var23', 'var27', 'var28',
'var29', 'var30', 'var31', 'var32', 'var33', 'var34', 'var35',
'var39', 'var40', 'var41', 'var42', 'var43', 'var44', 'var45',
'var46', 'var47', 'var48', 'var49', 'var50', 'var51', 'var52',
'var53', 'var54', 'var55', 'var56', 'var57', 'var58', 'var59',
'var60', 'var61', 'var62', 'var63', 'var64', 'var65', 'var66',
'var67', 'var68', 'var69', 'var70', 'var71', 'var72', 'var73',
'var74', 'var75', 'var76', 'var77', 'var78', 'var79', 'var80',
'var81', 'var82', 'var83', 'var84', 'var85', 'var86', 'var90',
'var91', 'var92', 'var93', 'var94', 'var95', 'var96', 'var97',
'var98', 'var99', 'var100', 'var101', 'var102', 'var103', 'var104',
'var105', 'var106', 'var107', 'var108', 'var109', 'var110',
'var111', 'var112', 'var113', 'var114', 'var115', 'var116',
'var123', 'var124', 'var125', 'var126', 'var127', 'var128',
'var129', 'var130', 'var131', 'var132', 'var133', 'var134',
'var135', 'var136', 'var137', 'var141', 'var142', 'var143',
'var147', 'var148', 'var149', 'var150', 'var151', 'var152',
'var153', 'var154', 'var155', 'var156', 'var157', 'var158',
'costi'], dtype=object)
def getCorrelatedFeature(corrdata, threshold):
feature = []
value = []
for i, index in enumerate(corrdata.index):
if abs(corrdata[index])> threshold:
feature.append(index)
value.append(corrdata[index])
df = pd.DataFrame(data = value, index = feature, columns=['Corr Value'])
return df
threshold = 0.90
corr_value = getCorrelatedFeature(corrmat['costi'], threshold)
corr_value
Corr Value | |
---|---|
var1 | 0.986153 |
var2 | 0.976198 |
var93 | 0.998334 |
var94 | 0.985817 |
var95 | 0.975280 |
var99 | 0.959306 |
var100 | 0.946764 |
var101 | 0.936186 |
costi | 1.000000 |
corr_value.index.values
array(['var1', 'var2', 'var93', 'var94', 'var95', 'var99', 'var100',
'var101', 'costi'], dtype=object)
correlated_data = data2[corr_value.index]
correlated_data.head()
var1 | var2 | var93 | var94 | var95 | var99 | var100 | var101 | costi | |
---|---|---|---|---|---|---|---|---|---|
0 | 147823139 | 148193655 | 145914503 | 148992105 | 149021406 | 85824629 | 86758885 | 83651116 | 144205134 |
1 | 147562322 | 163251010 | 142727898 | 150222876 | 166417107 | 102237238 | 113037130 | 131392006 | 140165127 |
2 | 147562000 | 142652000 | 148757000 | 153783000 | 148906000 | 117422000 | 111227000 | 108035000 | 145190000 |
3 | 147067625 | 140160124 | 119349203 | 145528364 | 143260149 | 105779603 | 129982682 | 121831610 | 112760368 |
4 | 145273739 | 127413305 | 174796438 | 148011089 | 130682792 | 149110551 | 119058121 | 106955719 | 171980718 |
correlated_data.shape
(7479, 9)
sns.pairplot(correlated_data)
plt.tight_layout()
sns.heatmap(correlated_data.corr(), annot=True, annot_kws={'size': 12});
X = correlated_data.drop(labels=['costi'], axis = 1)
y = correlated_data['costi']
X.head()
var1 | var2 | var93 | var94 | var95 | var99 | var100 | var101 | |
---|---|---|---|---|---|---|---|---|
0 | 147823139 | 148193655 | 145914503 | 148992105 | 149021406 | 85824629 | 86758885 | 83651116 |
1 | 147562322 | 163251010 | 142727898 | 150222876 | 166417107 | 102237238 | 113037130 | 131392006 |
2 | 147562000 | 142652000 | 148757000 | 153783000 | 148906000 | 117422000 | 111227000 | 108035000 |
3 | 147067625 | 140160124 | 119349203 | 145528364 | 143260149 | 105779603 | 129982682 | 121831610 |
4 | 145273739 | 127413305 | 174796438 | 148011089 | 130682792 | 149110551 | 119058121 | 106955719 |
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
X_train.shape, X_test.shape
((5983, 8), (1496, 8))
model = LinearRegression(normalize=True,fit_intercept=True)
model.fit(X_train, y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)
y_predict = model.predict(X_test)
df = pd.DataFrame(data = [y_predict, y_test])
df = df.T
df.columns = ['costi', 'costi_PRED']
df
costi | costi_PRED | |
---|---|---|
0 | 3.664867e+06 | 3534300.0 |
1 | 7.809564e+06 | 7874798.0 |
2 | 1.152198e+08 | 115850068.0 |
3 | 1.010573e+06 | 1048631.0 |
4 | 3.258395e+06 | 3348815.0 |
... | ... | ... |
1491 | 9.072874e+06 | 9303572.0 |
1492 | 2.374781e+06 | 2310607.0 |
1493 | 5.624134e+06 | 5696533.0 |
1494 | 5.119308e+06 | 5233938.0 |
1495 | 2.547442e+05 | 295468.0 |
1496 rows × 2 columns
length = y_predict.shape[0] #
x = np.linspace(0,length,length)
plt.figure(figsize=(20,15))
plt.plot(x, y_test, label='test')
plt.plot(x, y_predict, label='predetto')
plt.legend(loc=2);
model.intercept_
-27689.42884542793
coeff = model.coef_
coeff
array([ 0.10767113, 0.3805769 , 0.93862885, -0.09831021, -0.34587131,
0.00826232, 0.04751049, -0.04413217])
label = np.asarray(correlated_data.columns)
label
array(['var1', 'var2', 'var93', 'var94', 'var95', 'var99', 'var100',
'var101', 'costi'], dtype=object)
export = pd.DataFrame({
'coeff':[0.10767113, 0.3805769 , 0.93862885, -0.09831021, -0.34587131,0.00826232, 0.04751049, -0.04413217],
'label':['var1', 'var2', 'var93', 'var94', 'var95', 'var99', 'var100','var101'] })
export[['coeff','label']]
coeff | label | |
---|---|---|
0 | 0.107671 | var1 |
1 | 0.380577 | var2 |
2 | 0.938629 | var93 |
3 | -0.098310 | var94 |
4 | -0.345871 | var95 |
5 | 0.008262 | var99 |
6 | 0.047510 | var100 |
7 | -0.044132 | var101 |
export.to_excel('ridotta.xlsx', index=False)
## testare la formua con l'excel di test