adding requests to conda install docs
[JPSSData.git] / svm.py
blobdae4fbb22f6938a1ef3acaf027c784260187019c
2 # Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
3 # Angel Farguell (angel.farguell@gmail.com)
5 # to install:
6 # conda install scikit-learn
7 # conda install scikit-image
9 from sklearn import svm
10 from sklearn.model_selection import GridSearchCV
11 from scipy import interpolate, spatial
12 import matplotlib.pyplot as plt
13 import matplotlib.font_manager
14 import matplotlib.colors as colors
15 from mpl_toolkits.mplot3d import axes3d
16 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
17 import numpy as np
18 from time import time
19 from infrared_perimeters import process_infrared_perimeters
20 import sys
21 import saveload as sl
23 def verify_inputs(params):
24 required_args = [("search", False), ("norm", True),
25 ("notnan", True), ("artil", False), ("hartil", 0.2),
26 ("artiu", True), ("hartiu", 0.1), ("downarti", True),
27 ("dminz", 0.1), ("confal", 100), ("toparti", False),
28 ("dmaxz", 0.1), ("confau", 100), ("plot_data", False),
29 ("plot_scaled", False), ("plot_decision", False),
30 ("plot_poly", False), ("plot_supports", False),
31 ("plot_result", False)]
32 # check each argument that should exist
33 for key, default in required_args:
34 if key not in params:
35 params.update({key: default})
36 return params
38 def preprocess_data_svm(data, scale, minconf=80.):
39 """
40 Preprocess satellite data from JPSSD to use in Support Vector Machine directly
41 (without any interpolation as space-time 3D points)
43 :param data: dictionary of satellite data from JPSSD
44 :param scale: time scales
45 :param minconf: optional, minim fire confidence level to take into account
46 :return X: matrix of features for SVM
47 :return y: vector of labels for SVM
48 :return c: vector of confidence level for SVM
50 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
51 Angel Farguell (angel.farguell@gmail.com), 2019-09-24
52 """
54 # confidence of ground detections
55 gconf = 95.
56 maxg = 1e5
58 # scale from seconds to days
59 tscale = 24.*3600.
60 # number of granules
61 ngran = maxg//len(data.keys())
63 detlon = np.concatenate([d['lon_fire'] for d in data.itervalues()])
64 detlat = np.concatenate([d['lat_fire'] for d in data.itervalues()])
65 confs = np.concatenate([d['conf_fire'] for d in data.itervalues()])
66 bb = (detlon[confs > minconf].min(),detlon[confs > minconf].max(),detlat[confs > minconf].min(),detlat[confs > minconf].max())
67 dc = (bb[1]-bb[0],bb[3]-bb[2])
68 bf = (bb[0]-dc[0]*.3,bb[1]+dc[0]*.3,bb[2]-dc[1]*.3,bb[3]+dc[1]*.3)
69 print bf
71 # process all the points as space-time 3D nodes
72 XX = [[],[]]
73 cf = []
74 for gran in data.items():
75 print '> processing granule %s' % gran[0]
76 tt = (gran[1]['time_num']-scale[0])/tscale
77 conf = gran[1]['conf_fire']>=minconf
78 xf = np.c_[(gran[1]['lon_fire'][conf],gran[1]['lat_fire'][conf],np.repeat(tt,conf.sum()))]
79 print 'fire detections: %g' % len(xf)
80 XX[0].append(xf)
81 mask = np.logical_and(gran[1]['lon_nofire'] >= bf[0],
82 np.logical_and(gran[1]['lon_nofire'] <= bf[1],
83 np.logical_and(gran[1]['lat_nofire'] >= bf[2],
84 gran[1]['lat_nofire'] <= bf[3])))
85 xg = np.c_[(gran[1]['lon_nofire'][mask],gran[1]['lat_nofire'][mask],np.repeat(tt,mask.sum()))]
86 print 'no fire detections: %g' % len(xg)
87 coarsening = int(1 + max((len(xg)-len(xf))//ngran,0))
88 print 'no fire coarsening: %d' % coarsening
89 print 'no fire detections reduction: %g' % len(xg[::coarsening])
90 XX[1].append(xg[::coarsening])
91 cf.append(gran[1]['conf_fire'][conf])
93 Xf = np.concatenate(tuple(XX[0]))
94 Xg = np.concatenate(tuple(XX[1]))
95 X = np.concatenate((Xg,Xf))
96 y = np.concatenate((-np.ones(len(Xg)),np.ones(len(Xf))))
97 c = np.concatenate((gconf*np.ones(len(Xg)),np.concatenate(tuple(cf))))
98 print 'shape X: ', X.shape
99 print 'shape y: ', y.shape
100 print 'shape c: ', c.shape
101 print 'len fire: ', len(X[y==1])
102 print 'len ground: ', len(X[y==-1])
104 return X,y,c
106 def preprocess_result_svm(lons, lats, U, L, T, scale, time_num_granules, C=None):
108 Preprocess satellite data from JPSSD and setup to use in Support Vector Machine
110 :param lons: longitud grid
111 :param lats: latitde grid
112 :param U: upper bound grid
113 :param L: lower bound grid
114 :param T: mask grid
115 :param scale: time scales
116 :param time_num_granules: times of the granules
117 :return X: matrix of features for SVM
118 :return y: vector of labels for SVM
119 :return c: vector of confidence level for SVM
121 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
122 Angel Farguell (angel.farguell@gmail.com), 2019-04-01
125 # Flatten coordinates
126 lon = np.ravel(lons).astype(float)
127 lat = np.ravel(lats).astype(float)
129 # Temporal scale to days
130 tscale = 24*3600
131 U = U/tscale
132 L = L/tscale
134 # Ensuring U>=L always
135 print 'U>L: ',(U>L).sum()
136 print 'U<L: ',(U<L).sum()
137 print 'U==L: ',(U==L).sum()
139 # Reshape to 1D
140 uu = np.ravel(U)
141 ll = np.ravel(L)
142 tt = np.ravel(T)
144 # Maximum and minimums to NaN data
145 uu[uu==uu.max()] = np.nan
146 ll[ll==ll.min()] = np.nan
148 # Mask created during computation of lower and upper bounds
149 mk = tt==scale[1]-scale[0]
150 # Masking upper bounds outside the mask
151 uu[mk] = np.nan
153 # Creating minimum value for the upper bounds
154 muu = uu[~np.isnan(uu)].min()
155 # Creating maximum value for the lower bounds
156 mll = ll[~np.isnan(ll)].max()
158 ### Reduction of the density of lower bounds
159 # Creation of low bounds mask (True values are going to turn Nan's in lower bound data)
160 lmk = np.copy(mk)
161 ## Reason: We do not care about lower bounds below the upper bounds which are far from the upper bounds
162 # temporary lower mask, all False (values of the mask where the mask is False, inside the fire mask)
163 tlmk = lmk[~mk]
164 # set to True all the bounds less than floor of minimum of upper bounds in fire mask
165 tlmk[~np.isnan(ll[~mk])] = (ll[~mk][~np.isnan(ll[~mk])] < np.floor(muu))
166 # set lower mask from temporary mask
167 lmk[~mk] = tlmk
168 ## Reason: Coarsening of the lower bounds below the upper bounds to create balance
169 # create coarsening of the lower bound data below the upper bounds to be similar amount that upper bounds
170 kk = (~np.isnan(ll[~lmk])).sum()/(~np.isnan(uu)).sum()
171 if kk:
172 # temporary lower mask, all True (values of the lower mask where the lower mask is False, set to True)
173 tlmk = ~lmk[~lmk]
174 # only set a subset of the lower mask values to False (coarsening)
175 tlmk[::kk] = False
176 # set lower mask form temporary mask
177 lmk[~lmk] = tlmk
178 ## Reason: We care about the maximum lower bounds which are not below upper bounds
179 # temporary lower mask, all True (values of the mask where the mask is True, outside the fire mask)
180 tlmk = lmk[mk]
181 # temporary lower mask 2, all True (subset of the previous mask where the lower bounds is not Nan)
182 t2lmk = tlmk[~np.isnan(ll[mk])]
183 # set to False in the temporary lower mask 2 where lower bounds have maximum value
184 t2lmk[ll[mk][~np.isnan(ll[mk])] == mll] = False
185 # set temporary lower mask from temporary lower mask 2
186 tlmk[~np.isnan(ll[mk])] = t2lmk
187 # set lower mask from temporary lower mask
188 lmk[mk] = tlmk
189 ## Reason: Coarsening of the not maximum lower bounds not below the upper bounds to create balance
190 # set subset outside of the fire mask for the rest
191 # create coarsening of the not maximum lower bounds not below the upper bounds to be similar amount that the current number of lower bounds
192 kk = (ll[mk][~np.isnan(ll[mk])] < mll).sum()/(~np.isnan(ll[~lmk])).sum()
193 if kk:
194 # temporary lower mask, values of the current lower mask outside of the original fire mask
195 tlmk = lmk[mk]
196 # temporary lower mask 2, subset of the previous mask where the lower bound is not Nan
197 t2lmk = tlmk[~np.isnan(ll[mk])]
198 # temporary lower mask 3, subset of the previous mask where the lower bounds are not maximum
199 t3lmk = t2lmk[ll[mk][~np.isnan(ll[mk])] < mll]
200 # coarsening of the temporary lower mask 3
201 t3lmk[::kk] = False
202 # set the temporary lower mask 2 from the temporary lower mask 3
203 t2lmk[ll[mk][~np.isnan(ll[mk])] < mll] = t3lmk
204 # set the temporary lower mask from the temporary lower mask 2
205 tlmk[~np.isnan(ll[mk])] = t2lmk
206 # set the lower mask from the temporary lower mask
207 lmk[mk] = tlmk
209 # Masking lower bounds from previous lower mask
210 ll[lmk] = np.nan
212 # Values different than NaN in the upper and lower bounds
213 um = np.array(~np.isnan(uu))
214 lm = np.array(~np.isnan(ll))
215 # Define all the x, y, and z components of upper and lower bounds
216 ux = lon[um]
217 uy = lat[um]
218 uz = uu[um]
219 lx = lon[lm]
220 ly = lat[lm]
221 lz = ll[lm]
223 # Create the data to call SVM3 function from svm.py
224 X = np.c_[np.concatenate((lx,ux)),np.concatenate((ly,uy)),np.concatenate((lz,uz))]
225 y = np.concatenate((-np.ones(len(lx)),np.ones(len(ux))))
226 # Print the shape of the data
227 print 'shape X: ', X.shape
228 print 'shape y: ', y.shape
230 if C is None:
231 c = 80*np.ones(y.shape)
232 else:
233 c = np.concatenate((np.ravel(C[0])[lm],np.ravel(C[1])[um]))
235 # Clean data if not in bounding box
236 bbox = (lon.min(),lon.max(),lat.min(),lat.max(),time_num_granules)
237 geo_mask = np.logical_and(np.logical_and(np.logical_and(X[:,0] >= bbox[0],X[:,0] <= bbox[1]), X[:,1] >= bbox[2]), X[:,1] <= bbox[3])
238 btime = (0,(scale[1]-scale[0])/tscale)
239 time_mask = np.logical_and(X[:,2] >= btime[0], X[:,2] <= btime[1])
240 whole_mask = np.logical_and(geo_mask, time_mask)
241 X = X[whole_mask,:]
242 y = y[whole_mask]
243 c = c[whole_mask]
245 return X,y,c
247 def make_fire_mesh(fxlon, fxlat, it, nt):
249 Create a mesh of points to evaluate the decision function
251 :param fxlon: data to base x-axis meshgrid on
252 :param fxlat: data to base y-axis meshgrid on
253 :param it: data to base z-axis meshgrid on
254 :param nt: tuple of number of nodes at each direction, optional
255 :param coarse: coarsening of the fire mesh
256 :return xx, yy, zz: ndarray
258 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
259 Angel Farguell (angel.farguell@gmail.com), 2019-04-01
262 xx = np.repeat(fxlon[:, :, np.newaxis], nt, axis=2)
263 yy = np.repeat(fxlat[:, :, np.newaxis], nt, axis=2)
264 tt = np.linspace(it[0],it[1],nt)
265 zz = np.swapaxes(np.swapaxes(np.array([np.ones(fxlon.shape)*t for t in tt]),0,1),1,2)
267 return xx, yy, zz
269 def make_meshgrid(x, y, z, s=(50,50,50), exp=.1):
271 Create a mesh of points to evaluate the decision function
273 :param x: data to base x-axis meshgrid on
274 :param y: data to base y-axis meshgrid on
275 :param z: data to base z-axis meshgrid on
276 :param s: tuple of number of nodes at each direction, optional
277 :param exp: extra percentage of time steps in each direction (between 0 and 1), optional
278 :return xx, yy, zz: ndarray
280 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
281 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
282 Modified version of:
283 https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
286 if not isinstance(s, tuple):
287 print '>> FAILED <<'
288 print 'The number of nodes at each direction is not a tuple: ', s
289 sys.exit(1)
290 # number of nodes in each direction
291 sx, sy, sz = np.array(s).astype(int)
292 # extra step sizes in each direction
293 brx = int(sx * exp)
294 bry = int(sy * exp)
295 brz = int(sz * exp)
296 # grid lengths in each directon
297 lx = x.max() - x.min()
298 ly = y.max() - y.min()
299 lz = z.max() - z.min()
300 # grid resolutions in each direction
301 hx = lx / (sx - 2*brx - 1)
302 hy = ly / (sy - 2*bry - 1)
303 hz = lz / (sz - 2*brz - 1)
304 # extrem values for each dimension
305 x_min, x_max = x.min() - brx * hx, x.max() + brx * hx
306 y_min, y_max = y.min() - bry * hy, y.max() + bry * hy
307 z_min, z_max = z.min() - brz * hz, z.max() + brz * hz
308 # generating the mesh grid
309 xx, yy, zz = np.meshgrid(np.linspace(y_min, y_max, sy),
310 np.linspace(x_min, x_max, sx),
311 np.linspace(z_min, z_max, sz))
312 return xx, yy, zz
314 def find_roots(Fx,Fy,zr,Z,plot_poly=False):
316 Find at each point where the roots of Z are.
318 :param Fx: meshgrid ndarray with x component
319 :param Fy: meshgrid ndarray with y component
320 :param zr: meshgrid ndarray with z levels
321 :param plot_poly: boolean of plotting polynomial approximation
322 :return Fz: 2D mesh with the hyperplane z which gives decision functon 0
324 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
325 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
326 Modified version of:
327 https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
329 if plot_poly:
330 fig = plt.figure()
331 # Computing fire arrival time from previous decision function
332 print '>> Computing fire arrival time...'
333 sys.stdout.flush()
334 t_1 = time()
335 # Initializing fire arrival time
336 Fz = np.zeros(Fx.shape)
337 # For each x and y
338 for k1 in range(Fx.shape[0]):
339 for k2 in range(Fx.shape[1]):
340 # Approximate the vertical decision function by a piecewise polynomial (cubic spline interpolation)
341 pz = interpolate.CubicSpline(zr, Z[k1,k2])
342 # Compute the real roots of the the piecewise polynomial
343 rr = pz.roots()
344 # Just take the real roots between min(zz) and max(zz)
345 realr = rr.real[np.logical_and(abs(rr.imag) < 1e-5, np.logical_and(rr.real > zr.min(), rr.real < zr.max()))]
346 if len(realr) > 0:
347 # Take the minimum root
348 Fz[k1,k2] = realr.min()
349 # Plotting the approximated polynomial with the decision function
350 if plot_poly:
351 try:
352 plt.ion()
353 plt.plot(pz(zr),zr)
354 plt.plot(Z[k1,k2],zr,'+')
355 plt.plot(np.zeros(len(realr)),realr,'o',c='g')
356 plt.plot(0,Fz[k1,k2],'o',markersize=3,c='r')
357 plt.title('Polynomial approximation of decision_function(%f,%f,z)' % (Fx[k1,k2],Fy[k1,k2]))
358 plt.xlabel('decision function value')
359 plt.ylabel('Z')
360 plt.legend(['polynomial','decision values','roots','fire arrival time'])
361 plt.xlim([Z.min(),Z.max()])
362 plt.ylim([zz.min(),zz.max()])
363 plt.show()
364 plt.pause(.001)
365 plt.cla()
366 except Exception as e:
367 print 'Warning: something went wrong when plotting...'
368 print e
369 else:
370 # If there is not a real root of the polynomial between zz.min() and zz.max(), just define as a Nan
371 Fz[k1,k2] = np.nan
372 t_2 = time()
373 return Fz
375 def frontier(clf, xx, yy, zz, bal=.5, plot_decision=False, plot_poly=False, using_weights=False, save_decision=False):
377 Compute the surface decision frontier for a classifier.
379 :param clf: a classifier
380 :param xx: meshgrid ndarray
381 :param yy: meshgrid ndarray
382 :param zz: meshgrid ndarray
383 :param bal: number between 0 and 1, balance between lower and upper bounds in decision function (in case not level 0)
384 :param plot_decision: boolean of plotting decision volume
385 :param plot_poly: boolean of plotting polynomial approximation
386 :return F: 2D meshes with xx, yy coordinates and the hyperplane z which gives decision functon 0
388 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
389 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
390 Modified version of:
391 https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
394 # Creating the 3D grid
395 XX = np.c_[np.ravel(xx), np.ravel(yy), np.ravel(zz)]
397 # Evaluating the decision function
398 print '>> Evaluating the decision function...'
399 sys.stdout.flush()
400 t_1 = time()
401 if using_weights:
402 from libsvm_weights.python.svmutil import svm_predict
403 _, _, p_vals = svm_predict([], XX, clf, '-q')
404 ZZ = np.array([p[0] for p in p_vals])
405 else:
406 ZZ = clf.decision_function(XX)
407 t_2 = time()
408 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
409 hist = np.histogram(ZZ)
410 print 'counts: ', hist[0]
411 print 'values: ', hist[1]
412 print 'decision function range: ', ZZ.min(), '~', ZZ.max()
414 # Reshaping decision function volume
415 Z = ZZ.reshape(xx.shape)
416 print 'decision function shape: ', Z.shape
417 if save_decision:
418 sl.save((xx,yy,zz,Z),'decision')
420 if plot_decision:
421 try:
422 from skimage import measure
423 from shiftcmap import shiftedColorMap
424 verts, faces, normals, values = measure.marching_cubes_lewiner(Z, level=0, allow_degenerate=False)
425 # Scale and transform to actual size of the interesting volume
426 h = np.divide([xx.max()-xx.min(), yy.max() - yy.min(), zz.max() - zz.min()],np.array(xx.shape)-1)
427 verts = verts * h
428 verts = verts + [xx.min(), yy.min(), zz.min()]
429 mesh = Poly3DCollection(verts[faces], facecolor='orange', alpha=.9)
430 fig = plt.figure()
431 ax = fig.gca(projection='3d')
432 fig.suptitle("Decision volume")
433 col = [(0, .5, 0), (.5, .5, .5), (.5, 0, 0)]
434 cm = colors.LinearSegmentedColormap.from_list('GrRdD',col,N=50)
435 midpoint = 1 - ZZ.max() / (ZZ.max() + abs(ZZ.min()))
436 shiftedcmap = shiftedColorMap(cm, midpoint=midpoint, name='shifted')
437 kk = 1+np.divide(xx.shape,50)
438 X = np.ravel(xx[::kk[0],::kk[1],::kk[2]])
439 Y = np.ravel(yy[::kk[0],::kk[1],::kk[2]])
440 T = np.ravel(zz[::kk[0],::kk[1],::kk[2]])
441 CC = np.ravel(Z[::kk[0],::kk[1],::kk[2]])
442 p = ax.scatter(X, Y, T, c=CC, s=.1, alpha=.5, cmap=shiftedcmap)
443 cbar = fig.colorbar(p)
444 cbar.set_label('decision function value', rotation=270, labelpad=20)
445 ax.add_collection3d(mesh)
446 ax.set_zlim([xx.min(),xx.max()])
447 ax.set_ylim([yy.min(),yy.max()])
448 ax.set_zlim([zz.min(),zz.max()])
449 ax.set_xlabel("Longitude normalized")
450 ax.set_ylabel("Latitude normalized")
451 ax.set_zlabel("Time normalized")
452 plt.savefig('decision.png')
453 except Exception as e:
454 print 'Warning: something went wrong when plotting...'
455 print e
457 # xx 2-dimensional array
458 Fx = xx[:, :, 0]
459 # yy 2-dimensional array
460 Fy = yy[:, :, 0]
461 # zz 1-dimensional array
462 zr = zz[0, 0]
463 # find roots
464 Fz = find_roots(Fx,Fy,zr,Z,plot_poly=plot_poly)
466 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
467 F = [Fx,Fy,Fz]
469 return F
471 def SVM3(X, y, C=1., kgam=1., fire_grid=None, it=None, **params):
473 3D SuperVector Machine analysis and plot
475 :param X: Training vectors, where n_samples is the number of samples and n_features is the number of features.
476 :param y: Target values
477 :param C: Penalization (argument of svm.SVC class), optional
478 :param kgam: Scalar multiplier for gamma (capture more details increasing it), optional
479 :param fire_grid: The longitud and latitude grid where to have the fire arrival time, optional
480 :return F: tuple with (longitude grid, latitude grid, fire arrival time grid)
482 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
483 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
484 Modified version of:
485 https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
487 # add default values for parameters not specified
488 params = verify_inputs(params)
489 print 'svm params: ',params
491 t_init = time()
493 col = [(0, .5, 0), (.5, 0, 0)]
494 cm_GR = colors.LinearSegmentedColormap.from_list('GrRd',col,N=2)
495 col = [(1, 0, 0), (.25, 0, 0)]
496 cm_Rds = colors.LinearSegmentedColormap.from_list('Rds',col,N=100)
498 # if fire_grid==None: creation of the grid options
499 # number of vertical nodes per observation
500 vN = 1
501 # number of horizontal nodes per observation
502 hN = 5
504 # using different weights for the data
505 if isinstance(C,(list,tuple,np.ndarray)):
506 using_weights = True
507 from libsvm_weights.python.svm import svm_problem, svm_parameter
508 from libsvm_weights.python.svmutil import svm_train
509 from sklearn.utils import compute_class_weight
510 else:
511 using_weights = False
513 # Data inputs
514 X = np.array(X).astype(float)
515 y = np.array(y)
517 # Original data
518 oX = np.array(X).astype(float)
519 oy = np.array(y)
521 # Visualization of the data
522 X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
523 if params['plot_data']:
524 try:
525 fig = plt.figure()
526 ax = fig.gca(projection='3d')
527 fig.suptitle("Plotting the original data to fit")
528 ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
529 ax.set_xlabel("Longitude")
530 ax.set_ylabel("Latitude")
531 ax.set_zlabel("Time (days)")
532 plt.savefig('original_data.png')
533 except Exception as e:
534 print 'Warning: something went wrong when plotting...'
535 print e
537 # Normalization of the data into [0,1]^3
538 if params['norm']:
539 xmin = X0.min()
540 xlen = X0.max() - X0.min()
541 x0 = np.divide(X0 - xmin, xlen)
542 ymin = X1.min()
543 ylen = X1.max() - X1.min()
544 x1 = np.divide(X1 - ymin, ylen)
545 zmin = X2.min()
546 zlen = X2.max() - X2.min()
547 x2 = np.divide(X2 - zmin, zlen)
548 X0, X1, X2 = x0, x1, x2
549 X[:, 0] = X0
550 X[:, 1] = X1
551 X[:, 2] = X2
553 # Creation of fire and ground artificial detections
554 if params['artil'] or params['artiu'] or params['toparti'] or params['downarti']:
555 # Extreme values at z direction
556 minz = X[:, 2].min()
557 maxz = X[:, 2].max()
558 # Division of lower and upper bounds for data and confidence level
559 fl = X[y==np.unique(y)[0]]
560 fu = X[y==np.unique(y)[1]]
562 # Artifitial extensions of the lower bounds
563 if params['artil']:
564 # Create artificial lower bounds
565 flz = np.array([ np.unique(np.append(np.arange(f[2],minz,-params['hartil']),f[2])) for f in fl ])
566 # Definition of new ground detections after artificial detections added
567 Xg = np.concatenate([ np.c_[(np.repeat(fl[k][0],len(flz[k])),np.repeat(fl[k][1],len(flz[k])),flz[k])] for k in range(len(flz)) ])
568 if using_weights:
569 cl = C[y==np.unique(y)[0]]
570 Cg = np.concatenate([ np.repeat(cl[k],len(flz[k])) for k in range(len(flz)) ])
571 else:
572 Xg = fl
573 if using_weights:
574 cl = C[y==np.unique(y)[0]]
575 Cg = cl
577 # Artifitial extensions of the upper bounds
578 if params['artiu']:
579 # Create artificial upper bounds
580 fuz = np.array([ np.unique(np.append(np.arange(f[2],maxz,params['hartiu']),f[2])) for f in fu ])
581 # Definition of new fire detections after artificial detections added
582 Xf = np.concatenate([ np.c_[(np.repeat(fu[k][0],len(fuz[k])),np.repeat(fu[k][1],len(fuz[k])),fuz[k])] for k in range(len(fuz)) ])
583 # Define new confidence levels
584 if using_weights:
585 cu = C[y==np.unique(y)[1]]
586 Cf = np.concatenate([ np.repeat(cu[k],len(fuz[k])) for k in range(len(fuz)) ])
587 else:
588 Xf = fu
589 if using_weights:
590 cu = C[y==np.unique(y)[1]]
591 Cf = cu
593 # Bottom artificial lower bounds
594 if params['downarti']:
595 # Creation of the x,y new mesh of artificial lower bounds
596 xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
597 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
598 # All the artificial new mesh are going to be below the data
599 zng = np.repeat(minz-params['dminz'],len(np.ravel(xn)))
600 # Artifitial lower bounds
601 Xga = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(zng)]
602 # Definition of new ground detections after down artificial lower detections
603 Xgn = np.concatenate((Xg,Xga))
604 # Definition of new confidence level
605 if using_weights:
606 Cga = np.ones(len(Xga))*params['confal']
607 Cgn = np.concatenate((Cg,Cga))
608 else:
609 Xgn = Xg
610 if using_weights:
611 Cgn = Cg
613 # Top artificial upper bounds
614 if params['toparti']:
615 # Creation of the x,y new mesh of artificial upper bounds
616 xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
617 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
618 # All the artificial new mesh are going to be over the data
619 znf = np.repeat(maxz+params['dmaxz'],len(np.ravel(xn)))
620 # Artifitial upper bounds
621 Xfa = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(znf)]
622 # Definition of new fire detections after top artificial upper detections
623 Xfn = np.concatenate((Xf,Xfa))
624 # Definition of new confidence level
625 if using_weights:
626 Cfa = np.ones(len(Xfa))*params['confau']
627 Cfn = np.concatenate((Cf,Cfa))
628 else:
629 Xfn = Xf
630 if using_weights:
631 Cfn = Cf
633 # New definition of the training vectors
634 X = np.concatenate((Xgn, Xfn))
635 # New definition of the target values
636 y = np.concatenate((np.repeat(np.unique(y)[0],len(Xgn)),np.repeat(np.unique(y)[1],len(Xfn))))
637 # New definition of the confidence level
638 if using_weights:
639 C = np.concatenate((Cgn, Cfn))
640 # New definition of each feature vector
641 X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
643 # Printing number of samples and features
644 n0 = (y==np.unique(y)[0]).sum().astype(float)
645 n1 = (y==np.unique(y)[1]).sum().astype(float)
646 n_samples, n_features = X.shape
647 print 'n_samples =', n_samples
648 print 'n_samples_{-1} =', int(n0)
649 print 'n_samples_{+1} =', int(n1)
650 print 'n_features =', n_features
652 # Visualization of scaled data
653 if params['plot_scaled']:
654 try:
655 fig = plt.figure()
656 ax = fig.gca(projection='3d')
657 fig.suptitle("Plotting the data scaled to fit")
658 ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
659 ax.set_xlabel("Longitude normalized")
660 ax.set_ylabel("Latitude normalized")
661 ax.set_zlabel("Time normalized")
662 plt.savefig('scaled_data.png')
663 except Exception as e:
664 print 'Warning: something went wrong when plotting...'
665 print e
667 # Reescaling gamma to include more detailed results
668 gamma = 1. / (n_features * X.std())
669 print 'gamma =', gamma
671 # Creating the SVM model and fitting the data using Super Vector Machine technique
672 print '>> Creating the SVM model...'
673 sys.stdout.flush()
674 if using_weights:
675 gamma = kgam*gamma
676 # Compute class balanced weights
677 cls, _ = np.unique(y, return_inverse=True)
678 class_weight = compute_class_weight("balanced", cls, y)
679 prob = svm_problem(C,y,X)
680 arg = '-g %.15g -w%01d %.15g -w%01d %.15g -m 1000 -h 0' % (gamma, cls[0], class_weight[0],
681 cls[1], class_weight[1])
682 param = svm_parameter(arg)
683 print '>> Fitting the SVM model...'
684 t_1 = time()
685 clf = svm_train(prob,param)
686 t_2 = time()
687 else:
688 t_1 = time()
689 if params['search']:
690 print '>> Searching for best value of C and gamma...'
691 # Grid Search
692 # Parameter Grid
693 param_grid = {'C': np.logspace(0,5,6), 'gamma': gamma*np.logspace(0,5,6)}
694 # Make grid search classifier
695 grid_search = GridSearchCV(svm.SVC(cache_size=2000,class_weight="balanced",probability=True), param_grid, n_jobs=-1, verbose=1, cv=5, iid=False)
696 print '>> Fitting the SVM model...'
697 # Train the classifier
698 grid_search.fit(X, y)
699 print "Best Parameters:\n", grid_search.best_params_
700 clf = grid_search.best_estimator_
701 print "Best Estimators:\n", clf
702 else:
703 gamma = kgam*gamma
704 clf = svm.SVC(C=C, kernel="rbf", gamma=gamma, cache_size=2000, class_weight="balanced") # default kernel: exp(-gamma||x-x'||^2)
705 print clf
706 print '>> Fitting the SVM model...'
707 # Fitting the data using Super Vector Machine technique
708 clf.fit(X, y)
709 t_2 = time()
710 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
712 if not using_weights:
713 # Check if the classification failed
714 if clf.fit_status_:
715 print '>> FAILED <<'
716 print 'Failed fitting the data'
717 sys.exit(1)
718 print 'number of support vectors: ', clf.n_support_
719 print 'score of trained data: ', clf.score(X,y)
721 # Creating the mesh grid to evaluate the classification
722 print '>> Creating mesh grid to evaluate the classification...'
723 nnodes = np.ceil(np.power(n_samples,1./n_features))
724 if fire_grid is None:
725 # Number of necessary nodes
726 hnodes = hN*nnodes
727 vnodes = vN*nnodes
728 print 'number of horizontal nodes (%d meshgrid nodes for each observation): %d' % (hN,hnodes)
729 print 'number of vertical nodes (%d meshgrid nodes for each observation): %d' % (vN,vnodes)
730 # Computing resolution of the mesh to evaluate
731 sdim = (hnodes,hnodes,vnodes)
732 print 'grid_size = %dx%dx%d = %d' % (sdim[0],sdim[1],sdim[2],np.prod(sdim))
733 t_1 = time()
734 xx, yy, zz = make_meshgrid(X0, X1, X2, s=sdim)
735 t_2 = time()
736 else:
737 fxlon = np.divide(fire_grid[0] - xmin, xlen)
738 fxlat = np.divide(fire_grid[1] - ymin, ylen)
739 if it is None:
740 it = (X2.min(),X2.max())
741 else:
742 it = np.divide(np.array(it) - zmin, zlen)
743 vnodes = vN*nnodes
744 sdim = (fxlon.shape[0],fxlon.shape[1],vnodes)
745 print 'fire_grid_size = %dx%dx%d = %d' % (sdim + (np.prod(sdim),))
746 t_1 = time()
747 xx, yy, zz = make_fire_mesh(fxlon, fxlat, it, sdim[2])
748 t_2 = time()
749 print 'grid_created = %dx%dx%d = %d' % (zz.shape + (np.prod(zz.shape),))
750 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
752 # Computing the 2D fire arrival time, F
753 print '>> Computing the 2D fire arrival time, F...'
754 sys.stdout.flush()
755 F = frontier(clf, xx, yy, zz, plot_decision=params['plot_decision'], plot_poly=params['plot_poly'], using_weights=using_weights)
757 print '>> Creating final results...'
758 sys.stdout.flush()
759 # Plotting the Separating Hyperplane of the SVM classification with the support vectors
760 if params['plot_supports']:
761 try:
762 if using_weights:
763 supp_ind = np.sort(clf.get_sv_indices())-1
764 supp_vec = X[supp_ind]
765 else:
766 supp_ind = clf.support_
767 supp_vec = clf.support_vectors_
768 fig = plt.figure()
769 ax = fig.gca(projection='3d')
770 fig.suptitle("Plotting the 3D Separating Hyperplane of an SVM")
771 # plotting the separating hyperplane
772 ax.plot_surface(F[0], F[1], F[2], color='orange', alpha=.3)
773 # computing the indeces where no support vectors
774 rr = np.array(range(len(y)))
775 ms = np.isin(rr,supp_ind)
776 nsupp = rr[~ms]
777 # plotting no-support vectors (smaller)
778 ax.scatter(X0[nsupp], X1[nsupp], X2[nsupp], c=y[nsupp], cmap=cm_GR, s=.5, vmin=y.min(), vmax=y.max(), alpha=.1)
779 # plotting support vectors (bigger)
780 ax.scatter(supp_vec[:, 0], supp_vec[:, 1], supp_vec[:, 2], c=y[supp_ind], cmap=cm_GR, s=10, edgecolors='k', linewidth=.5, alpha=.5);
781 ax.set_xlim(xx.min(),xx.max())
782 ax.set_ylim(yy.min(),yy.max())
783 ax.set_zlim(zz.min(),zz.max())
784 ax.set_xlabel("Longitude normalized")
785 ax.set_ylabel("Latitude normalized")
786 ax.set_zlabel("Time normalized")
787 plt.savefig('support.png')
788 except Exception as e:
789 print 'Warning: something went wrong when plotting...'
790 print e
792 # Plot the fire arrival time resulting from the SVM classification normalized
793 if params['plot_result']:
794 try:
795 Fx, Fy, Fz = np.array(F[0]), np.array(F[1]), np.array(F[2])
796 with np.errstate(invalid='ignore'):
797 Fz[Fz > X2.max()] = np.nan
798 if params['notnan']:
799 Fz[np.isnan(Fz)] = X2.max()
800 Fz = np.minimum(Fz, X2.max())
801 fig = plt.figure()
802 ax = fig.gca(projection='3d')
803 fig.suptitle("Fire arrival time normalized")
804 # plotting fire arrival time
805 p = ax.plot_surface(Fx, Fy, Fz, cmap=cm_Rds,
806 linewidth=0, antialiased=False)
807 ax.set_xlim(xx.min(),xx.max())
808 ax.set_ylim(yy.min(),yy.max())
809 ax.set_zlim(zz.min(),zz.max())
810 cbar = fig.colorbar(p)
811 cbar.set_label('Fire arrival time normalized', labelpad=20, rotation=270)
812 ax.set_xlabel("Longitude normalized")
813 ax.set_ylabel("Latitude normalized")
814 ax.set_zlabel("Time normalized")
815 plt.savefig('tign_g.png')
816 except Exception as e:
817 print 'Warning: something went wrong when plotting...'
818 print e
820 # Translate the result again into initial data scale
821 if params['norm']:
822 f0 = F[0] * xlen + xmin
823 f1 = F[1] * ylen + ymin
824 f2 = F[2] * zlen + zmin
825 FF = [f0,f1,f2]
827 # Set all the larger values at the end to be the same maximum value
828 oX0, oX1, oX2 = oX[:, 0], oX[:, 1], oX[:, 2]
829 FFx, FFy, FFz = FF[0], FF[1], FF[2]
831 if params['notnan']:
832 with np.errstate(invalid='ignore'):
833 FFz[np.isnan(FFz)] = np.nanmax(FFz)
834 FF = [FFx,FFy,FFz]
836 # Plot the fire arrival time resulting from the SVM classification
837 if params['plot_result']:
838 try:
839 # Plotting the result
840 fig = plt.figure()
841 ax = fig.gca(projection='3d')
842 fig.suptitle("Plotting the 3D graph function of a SVM")
843 FFx, FFy, FFz = np.array(FF[0]), np.array(FF[1]), np.array(FF[2])
844 # plotting original data
845 ax.scatter(oX0, oX1, oX2, c=oy, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
846 # plotting fire arrival time
847 ax.plot_wireframe(FFx, FFy, FFz, color='orange', alpha=.5)
848 ax.set_xlabel("Longitude")
849 ax.set_ylabel("Latitude")
850 ax.set_zlabel("Time (days)")
851 plt.savefig('result.png')
852 except Exception as e:
853 print 'Warning: something went wrong when plotting...'
854 print e
856 print '>> SUCCESS <<'
857 t_final = time()
858 print 'TOTAL elapsed time: %ss.' % str(abs(t_final-t_init))
859 plt.close()
861 return FF
864 if __name__ == "__main__":
865 # Experiment to do
866 exp = 1
867 search = True
869 # Defining ground and fire detections
870 def exp1():
871 Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0]]
872 Xf = [[0, 0, 1], [1, 1, 0], [2, 2, 1], [2, 0, 1], [0, 2, 1]]
873 C = 10.
874 kgam = 1.
875 return Xg, Xf, C, kgam
876 def exp2():
877 Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0],
878 [4, 2, 0], [4, 0, 0], [2, 1, .5], [0, 1, .5],
879 [4, 1, .5], [2, 0, .5], [2, 2, .5]]
880 Xf = [[0, 0, 1], [1, 1, 0.25], [2, 2, 1], [2, 0, 1], [0, 2, 1], [3, 1, 0.25], [4, 2, 1], [4, 0, 1]]
881 C = np.concatenate((np.array([50.,50.,50.,50.,50.,50.,
882 1000.,100.,100.,100.,100.]), 100.*np.ones(len(Xf))))
883 kgam = 5.
884 return Xg, Xf, C, kgam
886 # Creating the options
887 options = {1 : exp1, 2 : exp2}
889 # Defining the option depending on the experiment
890 Xg, Xf, C, kgam = options[exp]()
892 # Creating the data necessary to run SVM3 function
893 X = np.concatenate((Xg, Xf))
894 y = np.concatenate((-np.ones(len(Xg)), np.ones(len(Xf))))
896 # Running SVM classification
897 SVM3(X,y,C=C,kgam=kgam,search=search,plot_result=True)