svm.py

   1 #
   2 # Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
   3 # Angel Farguell (angel.farguell@gmail.com)
   4 #
   5 # to install:
   6 #       conda install scikit-learn
   7 #       conda install scikit-image
   8
   9 from sklearn import svm
  10 from sklearn.model_selection import GridSearchCV
  11 from scipy import interpolate, spatial
  12 import matplotlib.pyplot as plt
  13 import matplotlib.font_manager
  14 import matplotlib.colors as colors
  15 from mpl_toolkits.mplot3d import axes3d
  16 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
  17 import numpy as np
  18 from time import time
  19 from infrared_perimeters import process_infrared_perimeters
  20 import sys
  21 import saveload as sl
  22
  23 def verify_inputs(params):
  24     required_args = [("search", False), ("norm", True),
  25         ("notnan", True), ("artil", False), ("hartil", 0.2),
  26         ("artiu", True), ("hartiu", 0.1), ("downarti", True),
  27         ("dminz", 0.1), ("confal", 100), ("toparti", False),
  28         ("dmaxz", 0.1), ("confau", 100), ("plot_data", False),
  29         ("plot_scaled", False), ("plot_decision", False),
  30         ("plot_poly", False), ("plot_supports", False),
  31         ("plot_result", False)]
  32     # check each argument that should exist
  33     for key, default in required_args:
  34         if key not in params:
  35             params.update({key: default})
  36     return params
  37
  38 def preprocess_data_svm(data, scale, minconf=80.):
  39     """
  40     Preprocess satellite data from JPSSD to use in Support Vector Machine directly
  41     (without any interpolation as space-time 3D points)
  42
  43     :param data: dictionary of satellite data from JPSSD
  44     :param scale: time scales
  45     :param minconf: optional, minim fire confidence level to take into account
  46     :return X: matrix of features for SVM
  47     :return y: vector of labels for SVM
  48     :return c: vector of confidence level for SVM
  49
  50     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
  51     Angel Farguell (angel.farguell@gmail.com), 2019-09-24
  52     """
  53
  54     # confidence of ground detections
  55     gconf = 95.
  56     maxg = 1e5
  57
  58     # scale from seconds to days
  59     tscale = 24.*3600.
  60     # number of granules
  61     ngran = maxg//len(data.keys())
  62
  63     detlon = np.concatenate([d['lon_fire'] for d in data.itervalues()])
  64     detlat = np.concatenate([d['lat_fire'] for d in data.itervalues()])
  65     confs = np.concatenate([d['conf_fire'] for d in data.itervalues()])
  66     bb = (detlon[confs > minconf].min(),detlon[confs > minconf].max(),detlat[confs > minconf].min(),detlat[confs > minconf].max())
  67     dc = (bb[1]-bb[0],bb[3]-bb[2])
  68     bf = (bb[0]-dc[0]*.3,bb[1]+dc[0]*.3,bb[2]-dc[1]*.3,bb[3]+dc[1]*.3)
  69     print bf
  70
  71     # process all the points as space-time 3D nodes
  72     XX = [[],[]]
  73     cf = []
  74     for gran in data.items():
  75         print '> processing granule %s' % gran[0]
  76         tt = (gran[1]['time_num']-scale[0])/tscale
  77         conf = gran[1]['conf_fire']>=minconf
  78         xf = np.c_[(gran[1]['lon_fire'][conf],gran[1]['lat_fire'][conf],np.repeat(tt,conf.sum()))]
  79         print 'fire detections: %g' % len(xf)
  80         XX[0].append(xf)
  81         mask = np.logical_and(gran[1]['lon_nofire'] >= bf[0],
  82                 np.logical_and(gran[1]['lon_nofire'] <= bf[1],
  83                  np.logical_and(gran[1]['lat_nofire'] >= bf[2],
  84                                  gran[1]['lat_nofire'] <= bf[3])))
  85         xg = np.c_[(gran[1]['lon_nofire'][mask],gran[1]['lat_nofire'][mask],np.repeat(tt,mask.sum()))]
  86         print 'no fire detections: %g' % len(xg)
  87         coarsening = int(1 + max((len(xg)-len(xf))//ngran,0))
  88         print 'no fire coarsening: %d' % coarsening
  89         print 'no fire detections reduction: %g' % len(xg[::coarsening])
  90         XX[1].append(xg[::coarsening])
  91         cf.append(gran[1]['conf_fire'][conf])
  92
  93     Xf = np.concatenate(tuple(XX[0]))
  94     Xg = np.concatenate(tuple(XX[1]))
  95     X = np.concatenate((Xg,Xf))
  96     y = np.concatenate((-np.ones(len(Xg)),np.ones(len(Xf))))
  97     c = np.concatenate((gconf*np.ones(len(Xg)),np.concatenate(tuple(cf))))
  98     print 'shape X: ', X.shape
  99     print 'shape y: ', y.shape
 100     print 'shape c: ', c.shape
 101     print 'len fire: ', len(X[y==1])
 102     print 'len ground: ', len(X[y==-1])
 103
 104     return X,y,c
 105
 106 def preprocess_result_svm(lons, lats, U, L, T, scale, time_num_granules, C=None):
 107     """
 108     Preprocess satellite data from JPSSD and setup to use in Support Vector Machine
 109
 110     :param lons: longitud grid
 111     :param lats: latitde grid
 112     :param U: upper bound grid
 113     :param L: lower bound grid
 114     :param T: mask grid
 115     :param scale: time scales
 116     :param time_num_granules: times of the granules
 117     :return X: matrix of features for SVM
 118     :return y: vector of labels for SVM
 119     :return c: vector of confidence level for SVM
 120
 121     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 122     Angel Farguell (angel.farguell@gmail.com), 2019-04-01
 123     """
 124
 125     # Flatten coordinates
 126     lon = np.ravel(lons).astype(float)
 127     lat = np.ravel(lats).astype(float)
 128
 129     # Temporal scale to days
 130     tscale = 24*3600
 131     U = U/tscale
 132     L = L/tscale
 133
 134     # Ensuring U>=L always
 135     print 'U>L: ',(U>L).sum()
 136     print 'U<L: ',(U<L).sum()
 137     print 'U==L: ',(U==L).sum()
 138
 139     # Reshape to 1D
 140     uu = np.ravel(U)
 141     ll = np.ravel(L)
 142     tt = np.ravel(T)
 143
 144     # Maximum and minimums to NaN data
 145     uu[uu==uu.max()] = np.nan
 146     ll[ll==ll.min()] = np.nan
 147
 148     # Mask created during computation of lower and upper bounds
 149     mk = tt==scale[1]-scale[0]
 150     # Masking upper bounds outside the mask
 151     uu[mk] = np.nan
 152
 153     # Creating minimum value for the upper bounds
 154     muu = uu[~np.isnan(uu)].min()
 155     # Creating maximum value for the lower bounds
 156     mll = ll[~np.isnan(ll)].max()
 157
 158     ### Reduction of the density of lower bounds
 159     # Creation of low bounds mask (True values are going to turn Nan's in lower bound data)
 160     lmk = np.copy(mk)
 161     ## Reason: We do not care about lower bounds below the upper bounds which are far from the upper bounds
 162     # temporary lower mask, all False (values of the mask where the mask is False, inside the fire mask)
 163     tlmk = lmk[~mk]
 164     # set to True all the bounds less than floor of minimum of upper bounds in fire mask
 165     tlmk[~np.isnan(ll[~mk])] = (ll[~mk][~np.isnan(ll[~mk])] < np.floor(muu))
 166     # set lower mask from temporary mask
 167     lmk[~mk] = tlmk
 168     ## Reason: Coarsening of the lower bounds below the upper bounds to create balance
 169     # create coarsening of the lower bound data below the upper bounds to be similar amount that upper bounds
 170     kk = (~np.isnan(ll[~lmk])).sum()/(~np.isnan(uu)).sum()
 171     if kk:
 172         # temporary lower mask, all True (values of the lower mask where the lower mask is False, set to True)
 173         tlmk = ~lmk[~lmk]
 174         # only set a subset of the lower mask values to False (coarsening)
 175         tlmk[::kk] = False
 176         # set lower mask form temporary mask
 177         lmk[~lmk] = tlmk
 178     ## Reason: We care about the maximum lower bounds which are not below upper bounds
 179     # temporary lower mask, all True (values of the mask where the mask is True, outside the fire mask)
 180     tlmk = lmk[mk]
 181     # temporary lower mask 2, all True (subset of the previous mask where the lower bounds is not Nan)
 182     t2lmk = tlmk[~np.isnan(ll[mk])]
 183     # set to False in the temporary lower mask 2 where lower bounds have maximum value
 184     t2lmk[ll[mk][~np.isnan(ll[mk])] == mll] = False
 185     # set temporary lower mask from temporary lower mask 2
 186     tlmk[~np.isnan(ll[mk])] = t2lmk
 187     # set lower mask from temporary lower mask
 188     lmk[mk] = tlmk
 189     ## Reason: Coarsening of the not maximum lower bounds not below the upper bounds to create balance
 190     # set subset outside of the fire mask for the rest
 191     # create coarsening of the not maximum lower bounds not below the upper bounds to be similar amount that the current number of lower bounds
 192     kk = (ll[mk][~np.isnan(ll[mk])] < mll).sum()/(~np.isnan(ll[~lmk])).sum()
 193     if kk:
 194         # temporary lower mask, values of the current lower mask outside of the original fire mask
 195         tlmk = lmk[mk]
 196         # temporary lower mask 2, subset of the previous mask where the lower bound is not Nan
 197         t2lmk = tlmk[~np.isnan(ll[mk])]
 198         # temporary lower mask 3, subset of the previous mask where the lower bounds are not maximum
 199         t3lmk = t2lmk[ll[mk][~np.isnan(ll[mk])] < mll]
 200         # coarsening of the temporary lower mask 3
 201         t3lmk[::kk] = False
 202         # set the temporary lower mask 2 from the temporary lower mask 3
 203         t2lmk[ll[mk][~np.isnan(ll[mk])] < mll] = t3lmk
 204         # set the temporary lower mask from the temporary lower mask 2
 205         tlmk[~np.isnan(ll[mk])] = t2lmk
 206         # set the lower mask from the temporary lower mask
 207         lmk[mk] = tlmk
 208
 209     # Masking lower bounds from previous lower mask
 210     ll[lmk] = np.nan
 211
 212     # Values different than NaN in the upper and lower bounds
 213     um = np.array(~np.isnan(uu))
 214     lm = np.array(~np.isnan(ll))
 215     # Define all the x, y, and z components of upper and lower bounds
 216     ux = lon[um]
 217     uy = lat[um]
 218     uz = uu[um]
 219     lx = lon[lm]
 220     ly = lat[lm]
 221     lz = ll[lm]
 222
 223     # Create the data to call SVM3 function from svm.py
 224     X = np.c_[np.concatenate((lx,ux)),np.concatenate((ly,uy)),np.concatenate((lz,uz))]
 225     y = np.concatenate((-np.ones(len(lx)),np.ones(len(ux))))
 226     # Print the shape of the data
 227     print 'shape X: ', X.shape
 228     print 'shape y: ', y.shape
 229
 230     if C is None:
 231         c = 80*np.ones(y.shape)
 232     else:
 233         c = np.concatenate((np.ravel(C[0])[lm],np.ravel(C[1])[um]))
 234
 235     # Clean data if not in bounding box
 236     bbox = (lon.min(),lon.max(),lat.min(),lat.max(),time_num_granules)
 237     geo_mask = np.logical_and(np.logical_and(np.logical_and(X[:,0] >= bbox[0],X[:,0] <= bbox[1]), X[:,1] >= bbox[2]), X[:,1] <= bbox[3])
 238     btime = (0,(scale[1]-scale[0])/tscale)
 239     time_mask = np.logical_and(X[:,2] >= btime[0], X[:,2] <= btime[1])
 240     whole_mask = np.logical_and(geo_mask, time_mask)
 241     X = X[whole_mask,:]
 242     y = y[whole_mask]
 243     c = c[whole_mask]
 244
 245     return X,y,c
 246
 247 def make_fire_mesh(fxlon, fxlat, it, nt):
 248     """
 249     Create a mesh of points to evaluate the decision function
 250
 251     :param fxlon: data to base x-axis meshgrid on
 252     :param fxlat: data to base y-axis meshgrid on
 253     :param it: data to base z-axis meshgrid on
 254     :param nt: tuple of number of nodes at each direction, optional
 255     :param coarse: coarsening of the fire mesh
 256     :return xx, yy, zz: ndarray
 257
 258     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 259     Angel Farguell (angel.farguell@gmail.com), 2019-04-01
 260     """
 261
 262     xx = np.repeat(fxlon[:, :, np.newaxis], nt, axis=2)
 263     yy = np.repeat(fxlat[:, :, np.newaxis], nt, axis=2)
 264     tt = np.linspace(it[0],it[1],nt)
 265     zz = np.swapaxes(np.swapaxes(np.array([np.ones(fxlon.shape)*t for t in tt]),0,1),1,2)
 266
 267     return xx, yy, zz
 268
 269 def make_meshgrid(x, y, z, s=(50,50,50), exp=.1):
 270     """
 271     Create a mesh of points to evaluate the decision function
 272
 273     :param x: data to base x-axis meshgrid on
 274     :param y: data to base y-axis meshgrid on
 275     :param z: data to base z-axis meshgrid on
 276     :param s: tuple of number of nodes at each direction, optional
 277     :param exp: extra percentage of time steps in each direction (between 0 and 1), optional
 278     :return xx, yy, zz: ndarray
 279
 280     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 281     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 282     Modified version of:
 283     https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
 284     """
 285
 286     if not isinstance(s, tuple):
 287         print '>> FAILED <<'
 288         print 'The number of nodes at each direction is not a tuple: ', s
 289         sys.exit(1)
 290     # number of nodes in each direction
 291     sx, sy, sz = np.array(s).astype(int)
 292     # extra step sizes in each direction
 293     brx = int(sx * exp)
 294     bry = int(sy * exp)
 295     brz = int(sz * exp)
 296     # grid lengths in each directon
 297     lx = x.max() - x.min()
 298     ly = y.max() - y.min()
 299     lz = z.max() - z.min()
 300     # grid resolutions in each direction
 301     hx = lx / (sx - 2*brx - 1)
 302     hy = ly / (sy - 2*bry - 1)
 303     hz = lz / (sz - 2*brz - 1)
 304     # extrem values for each dimension
 305     x_min, x_max = x.min() - brx * hx, x.max() + brx * hx
 306     y_min, y_max = y.min() - bry * hy, y.max() + bry * hy
 307     z_min, z_max = z.min() - brz * hz, z.max() + brz * hz
 308     # generating the mesh grid
 309     xx, yy, zz = np.meshgrid(np.linspace(y_min, y_max, sy),
 310                              np.linspace(x_min, x_max, sx),
 311                              np.linspace(z_min, z_max, sz))
 312     return xx, yy, zz
 313
 314 def find_roots(Fx,Fy,zr,Z,plot_poly=False):
 315     """
 316     Find at each point where the roots of Z are.
 317
 318     :param Fx: meshgrid ndarray with x component
 319     :param Fy: meshgrid ndarray with y component
 320     :param zr: meshgrid ndarray with z levels
 321     :param plot_poly: boolean of plotting polynomial approximation
 322     :return Fz: 2D mesh with the hyperplane z which gives decision functon 0
 323
 324     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 325     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 326     Modified version of:
 327     https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
 328     """
 329     if plot_poly:
 330         fig = plt.figure()
 331     # Computing fire arrival time from previous decision function
 332     print '>> Computing fire arrival time...'
 333     sys.stdout.flush()
 334     t_1 = time()
 335     # Initializing fire arrival time
 336     Fz = np.zeros(Fx.shape)
 337     # For each x and y
 338     for k1 in range(Fx.shape[0]):
 339         for k2 in range(Fx.shape[1]):
 340             # Approximate the vertical decision function by a piecewise polynomial (cubic spline interpolation)
 341             pz = interpolate.CubicSpline(zr, Z[k1,k2])
 342             # Compute the real roots of the the piecewise polynomial
 343             rr = pz.roots()
 344             # Just take the real roots between min(zz) and max(zz)
 345             realr = rr.real[np.logical_and(abs(rr.imag) < 1e-5, np.logical_and(rr.real > zr.min(), rr.real < zr.max()))]
 346             if len(realr) > 0:
 347                 # Take the minimum root
 348                 Fz[k1,k2] = realr.min()
 349                 # Plotting the approximated polynomial with the decision function
 350                 if plot_poly:
 351                     try:
 352                         plt.ion()
 353                         plt.plot(pz(zr),zr)
 354                         plt.plot(Z[k1,k2],zr,'+')
 355                         plt.plot(np.zeros(len(realr)),realr,'o',c='g')
 356                         plt.plot(0,Fz[k1,k2],'o',markersize=3,c='r')
 357                         plt.title('Polynomial approximation of decision_function(%f,%f,z)' % (Fx[k1,k2],Fy[k1,k2]))
 358                         plt.xlabel('decision function value')
 359                         plt.ylabel('Z')
 360                         plt.legend(['polynomial','decision values','roots','fire arrival time'])
 361                         plt.xlim([Z.min(),Z.max()])
 362                         plt.ylim([zz.min(),zz.max()])
 363                         plt.show()
 364                         plt.pause(.001)
 365                         plt.cla()
 366                     except Exception as e:
 367                         print 'Warning: something went wrong when plotting...'
 368                         print e
 369             else:
 370                 # If there is not a real root of the polynomial between zz.min() and zz.max(), just define as a Nan
 371                 Fz[k1,k2] = np.nan
 372     t_2 = time()
 373     return Fz
 374
 375 def frontier(clf, xx, yy, zz, bal=.5, plot_decision=False, plot_poly=False, using_weights=False, save_decision=False):
 376     """
 377     Compute the surface decision frontier for a classifier.
 378
 379     :param clf: a classifier
 380     :param xx: meshgrid ndarray
 381     :param yy: meshgrid ndarray
 382     :param zz: meshgrid ndarray
 383     :param bal: number between 0 and 1, balance between lower and upper bounds in decision function (in case not level 0)
 384     :param plot_decision: boolean of plotting decision volume
 385     :param plot_poly: boolean of plotting polynomial approximation
 386     :return F: 2D meshes with xx, yy coordinates and the hyperplane z which gives decision functon 0
 387
 388     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 389     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 390     Modified version of:
 391     https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
 392     """
 393
 394     # Creating the 3D grid
 395     XX = np.c_[np.ravel(xx), np.ravel(yy), np.ravel(zz)]
 396
 397     # Evaluating the decision function
 398     print '>> Evaluating the decision function...'
 399     sys.stdout.flush()
 400     t_1 = time()
 401     if using_weights:
 402         from libsvm_weights.python.svmutil import svm_predict
 403         _, _, p_vals = svm_predict([], XX, clf, '-q')
 404         ZZ = np.array([p[0] for p in p_vals])
 405     else:
 406         ZZ = clf.decision_function(XX)
 407     t_2 = time()
 408     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 409     hist = np.histogram(ZZ)
 410     print 'counts: ', hist[0]
 411     print 'values: ', hist[1]
 412     print 'decision function range: ', ZZ.min(), '~', ZZ.max()
 413
 414     # Reshaping decision function volume
 415     Z = ZZ.reshape(xx.shape)
 416     print 'decision function shape: ', Z.shape
 417     if save_decision:
 418         sl.save((xx,yy,zz,Z),'decision')
 419
 420     if plot_decision:
 421         try:
 422             from skimage import measure
 423             from shiftcmap import shiftedColorMap
 424             verts, faces, normals, values = measure.marching_cubes_lewiner(Z, level=0, allow_degenerate=False)
 425             # Scale and transform to actual size of the interesting volume
 426             h = np.divide([xx.max()-xx.min(), yy.max() - yy.min(), zz.max() - zz.min()],np.array(xx.shape)-1)
 427             verts = verts * h
 428             verts = verts + [xx.min(), yy.min(), zz.min()]
 429             mesh = Poly3DCollection(verts[faces], facecolor='orange', alpha=.9)
 430             fig = plt.figure()
 431             ax = fig.gca(projection='3d')
 432             fig.suptitle("Decision volume")
 433             col = [(0, .5, 0), (.5, .5, .5), (.5, 0, 0)]
 434             cm = colors.LinearSegmentedColormap.from_list('GrRdD',col,N=50)
 435             midpoint = 1 - ZZ.max() / (ZZ.max() + abs(ZZ.min()))
 436             shiftedcmap = shiftedColorMap(cm, midpoint=midpoint, name='shifted')
 437             kk = 1+np.divide(xx.shape,50)
 438             X = np.ravel(xx[::kk[0],::kk[1],::kk[2]])
 439             Y = np.ravel(yy[::kk[0],::kk[1],::kk[2]])
 440             T = np.ravel(zz[::kk[0],::kk[1],::kk[2]])
 441             CC = np.ravel(Z[::kk[0],::kk[1],::kk[2]])
 442             p = ax.scatter(X, Y, T, c=CC, s=.1, alpha=.5, cmap=shiftedcmap)
 443             cbar = fig.colorbar(p)
 444             cbar.set_label('decision function value', rotation=270, labelpad=20)
 445             ax.add_collection3d(mesh)
 446             ax.set_zlim([xx.min(),xx.max()])
 447             ax.set_ylim([yy.min(),yy.max()])
 448             ax.set_zlim([zz.min(),zz.max()])
 449             ax.set_xlabel("Longitude normalized")
 450             ax.set_ylabel("Latitude normalized")
 451             ax.set_zlabel("Time normalized")
 452             plt.savefig('decision.png')
 453         except Exception as e:
 454             print 'Warning: something went wrong when plotting...'
 455             print e
 456
 457     # xx 2-dimensional array
 458     Fx = xx[:, :, 0]
 459     # yy 2-dimensional array
 460     Fy = yy[:, :, 0]
 461     # zz 1-dimensional array
 462     zr = zz[0, 0]
 463     # find roots
 464     Fz = find_roots(Fx,Fy,zr,Z,plot_poly=plot_poly)
 465
 466     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 467     F = [Fx,Fy,Fz]
 468
 469     return F
 470
 471 def SVM3(X, y, C=1., kgam=1., fire_grid=None, it=None, **params):
 472     """
 473     3D SuperVector Machine analysis and plot
 474
 475     :param X: Training vectors, where n_samples is the number of samples and n_features is the number of features.
 476     :param y: Target values
 477     :param C: Penalization (argument of svm.SVC class), optional
 478     :param kgam: Scalar multiplier for gamma (capture more details increasing it), optional
 479     :param fire_grid: The longitud and latitude grid where to have the fire arrival time, optional
 480     :return F: tuple with (longitude grid, latitude grid, fire arrival time grid)
 481
 482     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 483     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 484     Modified version of:
 485     https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
 486     """
 487     # add default values for parameters not specified
 488     params = verify_inputs(params)
 489     print 'svm params: ',params
 490
 491     t_init = time()
 492
 493     col = [(0, .5, 0), (.5, 0, 0)]
 494     cm_GR = colors.LinearSegmentedColormap.from_list('GrRd',col,N=2)
 495     col = [(1, 0, 0), (.25, 0, 0)]
 496     cm_Rds = colors.LinearSegmentedColormap.from_list('Rds',col,N=100)
 497
 498     # if fire_grid==None: creation of the grid options
 499     # number of vertical nodes per observation
 500     vN = 1
 501     # number of horizontal nodes per observation
 502     hN = 5
 503
 504     # using different weights for the data
 505     if isinstance(C,(list,tuple,np.ndarray)):
 506         using_weights = True
 507         from libsvm_weights.python.svm import svm_problem, svm_parameter
 508         from libsvm_weights.python.svmutil import svm_train
 509         from sklearn.utils import compute_class_weight
 510     else:
 511         using_weights = False
 512
 513     # Data inputs
 514     X = np.array(X).astype(float)
 515     y = np.array(y)
 516
 517     # Original data
 518     oX = np.array(X).astype(float)
 519     oy = np.array(y)
 520
 521     # Visualization of the data
 522     X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
 523     if params['plot_data']:
 524         try:
 525             fig = plt.figure()
 526             ax = fig.gca(projection='3d')
 527             fig.suptitle("Plotting the original data to fit")
 528             ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 529             ax.set_xlabel("Longitude")
 530             ax.set_ylabel("Latitude")
 531             ax.set_zlabel("Time (days)")
 532             plt.savefig('original_data.png')
 533         except Exception as e:
 534             print 'Warning: something went wrong when plotting...'
 535             print e
 536
 537     # Normalization of the data into [0,1]^3
 538     if params['norm']:
 539         xmin = X0.min()
 540         xlen = X0.max() - X0.min()
 541         x0 = np.divide(X0 - xmin, xlen)
 542         ymin = X1.min()
 543         ylen = X1.max() - X1.min()
 544         x1 = np.divide(X1 - ymin, ylen)
 545         zmin = X2.min()
 546         zlen = X2.max() - X2.min()
 547         x2 = np.divide(X2 - zmin, zlen)
 548         X0, X1, X2 = x0, x1, x2
 549         X[:, 0] = X0
 550         X[:, 1] = X1
 551         X[:, 2] = X2
 552
 553     # Creation of fire and ground artificial detections
 554     if params['artil'] or params['artiu'] or params['toparti'] or params['downarti']:
 555         # Extreme values at z direction
 556         minz = X[:, 2].min()
 557         maxz = X[:, 2].max()
 558         # Division of lower and upper bounds for data and confidence level
 559         fl = X[y==np.unique(y)[0]]
 560         fu = X[y==np.unique(y)[1]]
 561
 562         # Artifitial extensions of the lower bounds
 563         if params['artil']:
 564             # Create artificial lower bounds
 565             flz = np.array([ np.unique(np.append(np.arange(f[2],minz,-params['hartil']),f[2])) for f in fl ])
 566             # Definition of new ground detections after artificial detections added
 567             Xg = np.concatenate([ np.c_[(np.repeat(fl[k][0],len(flz[k])),np.repeat(fl[k][1],len(flz[k])),flz[k])] for k in range(len(flz)) ])
 568             if using_weights:
 569                 cl = C[y==np.unique(y)[0]]
 570                 Cg = np.concatenate([ np.repeat(cl[k],len(flz[k])) for k in range(len(flz)) ])
 571         else:
 572             Xg = fl
 573             if using_weights:
 574                 cl = C[y==np.unique(y)[0]]
 575                 Cg = cl
 576
 577         # Artifitial extensions of the upper bounds
 578         if params['artiu']:
 579             # Create artificial upper bounds
 580             fuz = np.array([ np.unique(np.append(np.arange(f[2],maxz,params['hartiu']),f[2])) for f in fu ])
 581             # Definition of new fire detections after artificial detections added
 582             Xf = np.concatenate([ np.c_[(np.repeat(fu[k][0],len(fuz[k])),np.repeat(fu[k][1],len(fuz[k])),fuz[k])] for k in range(len(fuz)) ])
 583             # Define new confidence levels
 584             if using_weights:
 585                 cu = C[y==np.unique(y)[1]]
 586                 Cf = np.concatenate([ np.repeat(cu[k],len(fuz[k])) for k in range(len(fuz)) ])
 587         else:
 588             Xf = fu
 589             if using_weights:
 590                 cu = C[y==np.unique(y)[1]]
 591                 Cf = cu
 592
 593         # Bottom artificial lower bounds
 594         if params['downarti']:
 595             # Creation of the x,y new mesh of artificial lower bounds
 596             xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
 597                 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
 598             # All the artificial new mesh are going to be below the data
 599             zng = np.repeat(minz-params['dminz'],len(np.ravel(xn)))
 600             # Artifitial lower bounds
 601             Xga = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(zng)]
 602             # Definition of new ground detections after down artificial lower detections
 603             Xgn = np.concatenate((Xg,Xga))
 604             # Definition of new confidence level
 605             if using_weights:
 606                 Cga = np.ones(len(Xga))*params['confal']
 607                 Cgn = np.concatenate((Cg,Cga))
 608         else:
 609             Xgn = Xg
 610             if using_weights:
 611                 Cgn = Cg
 612
 613         # Top artificial upper bounds
 614         if params['toparti']:
 615             # Creation of the x,y new mesh of artificial upper bounds
 616             xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
 617                                 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
 618             # All the artificial new mesh are going to be over the data
 619             znf = np.repeat(maxz+params['dmaxz'],len(np.ravel(xn)))
 620             # Artifitial upper bounds
 621             Xfa = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(znf)]
 622             # Definition of new fire detections after top artificial upper detections
 623             Xfn = np.concatenate((Xf,Xfa))
 624             # Definition of new confidence level
 625             if using_weights:
 626                 Cfa = np.ones(len(Xfa))*params['confau']
 627                 Cfn = np.concatenate((Cf,Cfa))
 628         else:
 629             Xfn = Xf
 630             if using_weights:
 631                 Cfn = Cf
 632
 633         # New definition of the training vectors
 634         X = np.concatenate((Xgn, Xfn))
 635         # New definition of the target values
 636         y = np.concatenate((np.repeat(np.unique(y)[0],len(Xgn)),np.repeat(np.unique(y)[1],len(Xfn))))
 637         # New definition of the confidence level
 638         if using_weights:
 639             C = np.concatenate((Cgn, Cfn))
 640         # New definition of each feature vector
 641         X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
 642
 643     # Printing number of samples and features
 644     n0 = (y==np.unique(y)[0]).sum().astype(float)
 645     n1 = (y==np.unique(y)[1]).sum().astype(float)
 646     n_samples, n_features = X.shape
 647     print 'n_samples =', n_samples
 648     print 'n_samples_{-1} =', int(n0)
 649     print 'n_samples_{+1} =', int(n1)
 650     print 'n_features =', n_features
 651
 652     # Visualization of scaled data
 653     if params['plot_scaled']:
 654         try:
 655             fig = plt.figure()
 656             ax = fig.gca(projection='3d')
 657             fig.suptitle("Plotting the data scaled to fit")
 658             ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 659             ax.set_xlabel("Longitude normalized")
 660             ax.set_ylabel("Latitude normalized")
 661             ax.set_zlabel("Time normalized")
 662             plt.savefig('scaled_data.png')
 663         except Exception as e:
 664             print 'Warning: something went wrong when plotting...'
 665             print e
 666
 667     # Reescaling gamma to include more detailed results
 668     gamma = 1. / (n_features * X.std())
 669     print 'gamma =', gamma
 670
 671     # Creating the SVM model and fitting the data using Super Vector Machine technique
 672     print '>> Creating the SVM model...'
 673     sys.stdout.flush()
 674     if using_weights:
 675         gamma = kgam*gamma
 676         # Compute class balanced weights
 677         cls, _ = np.unique(y, return_inverse=True)
 678         class_weight = compute_class_weight("balanced", cls, y)
 679         prob = svm_problem(C,y,X)
 680         arg = '-g %.15g -w%01d %.15g -w%01d %.15g -m 1000 -h 0' % (gamma, cls[0], class_weight[0],
 681                                             cls[1], class_weight[1])
 682         param = svm_parameter(arg)
 683         print '>> Fitting the SVM model...'
 684         t_1 = time()
 685         clf = svm_train(prob,param)
 686         t_2 = time()
 687     else:
 688         t_1 = time()
 689         if params['search']:
 690             print '>> Searching for best value of C and gamma...'
 691             # Grid Search
 692             # Parameter Grid
 693             param_grid = {'C': np.logspace(0,5,6), 'gamma': gamma*np.logspace(0,5,6)}
 694             # Make grid search classifier
 695             grid_search = GridSearchCV(svm.SVC(cache_size=2000,class_weight="balanced",probability=True), param_grid, n_jobs=-1, verbose=1, cv=5, iid=False)
 696             print '>> Fitting the SVM model...'
 697             # Train the classifier
 698             grid_search.fit(X, y)
 699             print "Best Parameters:\n", grid_search.best_params_
 700             clf = grid_search.best_estimator_
 701             print "Best Estimators:\n", clf
 702         else:
 703             gamma = kgam*gamma
 704             clf = svm.SVC(C=C, kernel="rbf", gamma=gamma, cache_size=2000, class_weight="balanced") # default kernel: exp(-gamma||x-x'||^2)
 705             print clf
 706             print '>> Fitting the SVM model...'
 707             # Fitting the data using Super Vector Machine technique
 708             clf.fit(X, y)
 709         t_2 = time()
 710     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 711
 712     if not using_weights:
 713         # Check if the classification failed
 714         if clf.fit_status_:
 715             print '>> FAILED <<'
 716             print 'Failed fitting the data'
 717             sys.exit(1)
 718         print 'number of support vectors: ', clf.n_support_
 719         print 'score of trained data: ', clf.score(X,y)
 720
 721     # Creating the mesh grid to evaluate the classification
 722     print '>> Creating mesh grid to evaluate the classification...'
 723     nnodes = np.ceil(np.power(n_samples,1./n_features))
 724     if fire_grid is None:
 725         # Number of necessary nodes
 726         hnodes = hN*nnodes
 727         vnodes = vN*nnodes
 728         print 'number of horizontal nodes (%d meshgrid nodes for each observation): %d' % (hN,hnodes)
 729         print 'number of vertical nodes (%d meshgrid nodes for each observation): %d' % (vN,vnodes)
 730         # Computing resolution of the mesh to evaluate
 731         sdim = (hnodes,hnodes,vnodes)
 732         print 'grid_size = %dx%dx%d = %d' % (sdim[0],sdim[1],sdim[2],np.prod(sdim))
 733         t_1 = time()
 734         xx, yy, zz = make_meshgrid(X0, X1, X2, s=sdim)
 735         t_2 = time()
 736     else:
 737         fxlon = np.divide(fire_grid[0] - xmin, xlen)
 738         fxlat = np.divide(fire_grid[1] - ymin, ylen)
 739         if it is None:
 740             it = (X2.min(),X2.max())
 741         else:
 742             it = np.divide(np.array(it) - zmin, zlen)
 743         vnodes = vN*nnodes
 744         sdim = (fxlon.shape[0],fxlon.shape[1],vnodes)
 745         print 'fire_grid_size = %dx%dx%d = %d' % (sdim + (np.prod(sdim),))
 746         t_1 = time()
 747         xx, yy, zz = make_fire_mesh(fxlon, fxlat, it, sdim[2])
 748         t_2 = time()
 749         print 'grid_created = %dx%dx%d = %d' % (zz.shape + (np.prod(zz.shape),))
 750     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 751
 752     # Computing the 2D fire arrival time, F
 753     print '>> Computing the 2D fire arrival time, F...'
 754     sys.stdout.flush()
 755     F = frontier(clf, xx, yy, zz, plot_decision=params['plot_decision'], plot_poly=params['plot_poly'], using_weights=using_weights)
 756
 757     print '>> Creating final results...'
 758     sys.stdout.flush()
 759     # Plotting the Separating Hyperplane of the SVM classification with the support vectors
 760     if params['plot_supports']:
 761         try:
 762             if using_weights:
 763                 supp_ind = np.sort(clf.get_sv_indices())-1
 764                 supp_vec = X[supp_ind]
 765             else:
 766                 supp_ind = clf.support_
 767                 supp_vec = clf.support_vectors_
 768             fig = plt.figure()
 769             ax = fig.gca(projection='3d')
 770             fig.suptitle("Plotting the 3D Separating Hyperplane of an SVM")
 771             # plotting the separating hyperplane
 772             ax.plot_surface(F[0], F[1], F[2], color='orange', alpha=.3)
 773             # computing the indeces where no support vectors
 774             rr = np.array(range(len(y)))
 775             ms = np.isin(rr,supp_ind)
 776             nsupp = rr[~ms]
 777             # plotting no-support vectors (smaller)
 778             ax.scatter(X0[nsupp], X1[nsupp], X2[nsupp], c=y[nsupp], cmap=cm_GR, s=.5, vmin=y.min(), vmax=y.max(), alpha=.1)
 779             # plotting support vectors (bigger)
 780             ax.scatter(supp_vec[:, 0], supp_vec[:, 1], supp_vec[:, 2], c=y[supp_ind], cmap=cm_GR, s=10, edgecolors='k', linewidth=.5, alpha=.5);
 781             ax.set_xlim(xx.min(),xx.max())
 782             ax.set_ylim(yy.min(),yy.max())
 783             ax.set_zlim(zz.min(),zz.max())
 784             ax.set_xlabel("Longitude normalized")
 785             ax.set_ylabel("Latitude normalized")
 786             ax.set_zlabel("Time normalized")
 787             plt.savefig('support.png')
 788         except Exception as e:
 789             print 'Warning: something went wrong when plotting...'
 790             print e
 791
 792     # Plot the fire arrival time resulting from the SVM classification normalized
 793     if params['plot_result']:
 794         try:
 795             Fx, Fy, Fz = np.array(F[0]), np.array(F[1]), np.array(F[2])
 796             with np.errstate(invalid='ignore'):
 797                 Fz[Fz > X2.max()] = np.nan
 798             if params['notnan']:
 799                 Fz[np.isnan(Fz)] = X2.max()
 800                 Fz = np.minimum(Fz, X2.max())
 801             fig = plt.figure()
 802             ax = fig.gca(projection='3d')
 803             fig.suptitle("Fire arrival time normalized")
 804             # plotting fire arrival time
 805             p = ax.plot_surface(Fx, Fy, Fz, cmap=cm_Rds,
 806                            linewidth=0, antialiased=False)
 807             ax.set_xlim(xx.min(),xx.max())
 808             ax.set_ylim(yy.min(),yy.max())
 809             ax.set_zlim(zz.min(),zz.max())
 810             cbar = fig.colorbar(p)
 811             cbar.set_label('Fire arrival time normalized', labelpad=20, rotation=270)
 812             ax.set_xlabel("Longitude normalized")
 813             ax.set_ylabel("Latitude normalized")
 814             ax.set_zlabel("Time normalized")
 815             plt.savefig('tign_g.png')
 816         except Exception as e:
 817             print 'Warning: something went wrong when plotting...'
 818             print e
 819
 820     # Translate the result again into initial data scale
 821     if params['norm']:
 822         f0 = F[0] * xlen + xmin
 823         f1 = F[1] * ylen + ymin
 824         f2 = F[2] * zlen + zmin
 825         FF = [f0,f1,f2]
 826
 827     # Set all the larger values at the end to be the same maximum value
 828     oX0, oX1, oX2 = oX[:, 0], oX[:, 1], oX[:, 2]
 829     FFx, FFy, FFz = FF[0], FF[1], FF[2]
 830
 831     if params['notnan']:
 832         with np.errstate(invalid='ignore'):
 833             FFz[np.isnan(FFz)] = np.nanmax(FFz)
 834     FF = [FFx,FFy,FFz]
 835
 836     # Plot the fire arrival time resulting from the SVM classification
 837     if params['plot_result']:
 838         try:
 839             # Plotting the result
 840             fig = plt.figure()
 841             ax = fig.gca(projection='3d')
 842             fig.suptitle("Plotting the 3D graph function of a SVM")
 843             FFx, FFy, FFz = np.array(FF[0]), np.array(FF[1]), np.array(FF[2])
 844             # plotting original data
 845             ax.scatter(oX0, oX1, oX2, c=oy, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 846             # plotting fire arrival time
 847             ax.plot_wireframe(FFx, FFy, FFz, color='orange', alpha=.5)
 848             ax.set_xlabel("Longitude")
 849             ax.set_ylabel("Latitude")
 850             ax.set_zlabel("Time (days)")
 851             plt.savefig('result.png')
 852         except Exception as e:
 853             print 'Warning: something went wrong when plotting...'
 854             print e
 855
 856     print '>> SUCCESS <<'
 857     t_final = time()
 858     print 'TOTAL elapsed time: %ss.' % str(abs(t_final-t_init))
 859     plt.close()
 860
 861     return FF
 862
 863
 864 if __name__ == "__main__":
 865     # Experiment to do
 866     exp = 1
 867     search = True
 868
 869     # Defining ground and fire detections
 870     def exp1():
 871         Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0]]
 872         Xf = [[0, 0, 1], [1, 1, 0], [2, 2, 1], [2, 0, 1], [0, 2, 1]]
 873         C = 10.
 874         kgam = 1.
 875         return Xg, Xf, C, kgam
 876     def exp2():
 877         Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0],
 878             [4, 2, 0], [4, 0, 0], [2, 1, .5], [0, 1, .5],
 879             [4, 1, .5], [2, 0, .5], [2, 2, .5]]
 880         Xf = [[0, 0, 1], [1, 1, 0.25], [2, 2, 1], [2, 0, 1], [0, 2, 1], [3, 1, 0.25], [4, 2, 1], [4, 0, 1]]
 881         C = np.concatenate((np.array([50.,50.,50.,50.,50.,50.,
 882                         1000.,100.,100.,100.,100.]), 100.*np.ones(len(Xf))))
 883         kgam = 5.
 884         return Xg, Xf, C, kgam
 885
 886     # Creating the options
 887     options = {1 : exp1, 2 : exp2}
 888
 889     # Defining the option depending on the experiment
 890     Xg, Xf, C, kgam = options[exp]()
 891
 892     # Creating the data necessary to run SVM3 function
 893     X = np.concatenate((Xg, Xf))
 894     y = np.concatenate((-np.ones(len(Xg)), np.ones(len(Xf))))
 895
 896     # Running SVM classification
 897     SVM3(X,y,C=C,kgam=kgam,search=search,plot_result=True)