1


2

"""

3

Created on Tue Aug 14 14:52:18 2018

4


5

@author: Jonathan

6

"""

7


8

from __future__ import print_function

9

import numpy as np

10

import matplotlib.pyplot as plt

11

import random

12

import time

13


14

import keras

15

from keras.models import Sequential

16

from keras.layers import Dense

17

from keras.layers import Dropout

18

from keras import regularizers

19

import numpy as np

20

import matplotlib.pyplot as plt

21

import seaborn as sns

22

import pandas as pd

23

import sklearn.preprocessing as preprocessing

24

from sklearn.model_selection import train_test_split

25

from sklearn.preprocessing import StandardScaler

26

from sklearn.naive_bayes import GaussianNB

27

from sklearn.metrics import confusion_matrix

28

from sklearn.metrics import f1_score

29

from sklearn.metrics import accuracy_score

30

from sklearn.feature_selection import SelectKBest

31

from sklearn.feature_selection import chi2

32

from sklearn.feature_selection import VarianceThreshold

33

from sklearn.ensemble import GradientBoostingClassifier

34

from sklearn.metrics import roc_curve, auc, roc_auc_score

35

import matplotlib.pyplot as plt

36

import seaborn as sns

37

from sklearn import svm

38

from sklearn.neighbors import KNeighborsClassifier

39

import tensorflow as tf

40


41

import json

42

global gdata, x_vectors, y_vectors

43


44

random.seed(1)

45

np.random.seed(2)

46


47


48


49


50


51


52

def failuremodes():

53

"""

54

Plots two failure modes of the system: decreasing variance and change in

55

covariance structure

56


57

Parameters

58



59

device_name: String. Name of device eg 'N:C1CGRD'

60


61

Returns

62



63

none

64


65

"""

66

x1 = np.linspace(0,1,10**5)

67


68

global y2vals, y3

69

initialy = np.random.multivariate_normal([0,0], [[1,0],[0,1]],5*10**4)

70

finaly = np.random.multivariate_normal([0,0], [[1,0.9],[0.9,1]],5*10**4)

71

y1vals = [i[0]for i in initialy] + [i[0]for i in finaly]

72

y2vals = [i[1]for i in initialy] + [i[1]for i in finaly]

73


74

uppery1bracket = np.zeros(10**5) + 2

75

lowery1bracket = np.zeros(10**5)  2

76


77


78

y3 = np.append(np.random.normal(0,2.5,5*10**4), np.random.normal(0,1,5*10**4))

79

uppery3bracket = np.zeros(10**5) + 5

80

lowery3bracket = np.zeros(10**5)  5

81


82

plt.subplot(2, 1, 2)

83

plt.plot(x1, y1vals)

84

plt.plot(x1, y2vals)

85

plt.plot(x1, uppery1bracket, color='k', linestyle='')

86

plt.plot(x1, lowery1bracket, color='k', linestyle='')

87

plt.tick_params(

88

axis='both',

89

direction='in',

90

which='both',

91

bottom=True,

92

top=True,

93

left = True,

94

right = True,

95

labelleft=True,

96

labelbottom=True)

97

plt.axvline(x=0.5, color='k', linestyle='')

98

plt.xlabel(r'$\tau$')

99


100

plt.subplot(2, 1, 1)

101

plt.rc('xtick', labelsize=8)

102

plt.rc('ytick', labelsize=8)

103

plt.rc('axes', labelsize=8)

104

plt.plot(x1, y3)

105


106

plt.plot(x1, uppery3bracket, color='k', linestyle='')

107

plt.plot(x1, lowery3bracket, color='k', linestyle='')

108

plt.tick_params(

109

axis='both',

110

direction='in',

111

which='both',

112

bottom=True,

113

top=True,

114

left = True,

115

right = True,

116

labelleft=True,

117

labelbottom=True)

118

plt.axvline(x=0.5, color='k', linestyle='')

119

plt.xlabel('time (s)')

120


121


122


123

plt.xlabel(r'$\tau$')

124


125


126

plt.legend()

127

plt.show()

128


129


130

def ECDFplotter():

131

global y2vals, y3

132


133


134

n_bins = 50

135

plt.rc('xtick', labelsize=8)

136

plt.rc('ytick', labelsize=8)

137

plt.rc('axes', labelsize=8)

138


139


140

n, bins, patches = plt.hist(y2vals[:5*10**4], n_bins, density=True, histtype='step',

141

cumulative=True, label='Initial Distribution')

142

n, bins, patches = plt.hist(y2vals[5*10**4:],n_bins, density=True, histtype='step',

143

cumulative=True, label='Final Distribution')

144


145

plt.legend(loc=2)

146


147


148


149


150


151


152

def genclean():

153

"""

154

Generates clean data from the sullied data of '240818RUNLABEL.txt'

155

"""

156

global gdata,x_vectors,y_vectors, y_labels,x_vectors_clean,y_vectors_clean,y_labels_clean

157


158

with open('240818RUNLABEL.txt') as json_data:

159

gdata = json.load(json_data)

160


161

data = gdata[0:1064]

162


163

x_vectors = [i[0:52]for i in data]

164

y_vectors = [i[53:1]for i in data]

165

y_labels = []

166


167

x_vectors_clean = []

168

y_vectors_clean = []

169

y_labels_clean = []

170


171


172

for i in range(len(y_vectors)):

173

baddata = False

174

for readout in y_vectors[i]:

175

if readout == 999999 or readout == 0:

176

baddata = True

177

break

178

if baddata == False:

179

x_vectors_clean.append(x_vectors[i])

180

y_vectors_clean.append(y_vectors[i])

181


182

epintensities = [y_vector[4] for y_vector in y_vectors_clean]

183

ephor = [y_vector[13] for y_vector in y_vectors_clean]

184

epvert = [y_vector[27] for y_vector in y_vectors_clean]

185

epim, epistd = np.mean(epintensities), np.std(epintensities)

186

ephorm, ephorstd = np.mean(ephor), np.std(ephor)

187

epvertm, epvertstd = np.mean(epvert), np.std(epvert)

188


189


190


191

for i in range(len(epintensities)):

192

label = 1

193

if epintensities[i] < epim  epistd or epintensities[i] > epim + epistd:

194

label = 0

195

if ephor[i] < ephorm  ephorstd or ephor[i] > ephorm + ephorstd:

196

label = 0

197

if epvert[i] < epvertm  epvertstd or epvert[i] > epvertm + epvertstd:

198

label = 0

199


200

y_labels_clean.append(label)

201


202

def plotbpmdistold(n):

203

"""

204

Plot distribution of values on BPM number n

205

"""

206

x_values1 = [i[n1] for i in y_vectors if i[n1] < 99997]

207

y_values1 = [i[n+13] for i in y_vectors if i[n+13] < 99997]

208

x_values2 = [i[n1] for i in y_vectors if i[n1] > 99997]

209

y_values2 = [i[n+13] for i in y_vectors if i[n+13] > 99997]

210

plt.scatter(x_values1, y_values1,label="Physical Data", linewidth=1.0)

211

plt.scatter(x_values2, y_values2,label="Unphysical Data",linewidth=1.0)

212


213


214

plt.title('Scan distribution in output space')

215

plt.ylabel('BPM13 Horizontal Position')

216

plt.xlabel('BPM13 Horizontal Position')

217


218


219

plt.legend()

220

plt.show()

221


222


223

def plotbpmdist(n,mstart,mend):

224

"""

225

Plot distribution of values on BPM number n

226

"""

227

x_values = [i[n1] for i in y_vectors_clean[mstart:mend]]

228

y_values = [i[n+13] for i in y_vectors_clean[mstart:mend]]

229


230

plt.scatter(x_values, y_values,linewidth=2.0)

231


232


233

plt.title('Scan distribution in output space')

234

plt.ylabel('BPM2 Horizontal Position')

235

plt.xlabel('BPM2 Horizontal Position')

236


237


238

plt.legend()

239

plt.show()

240


241

def plotscandist(n,mstart,mend):

242

"""

243

Plot distribution of values

244

"""

245

x_values = [i[(n1)*2] for i in x_vectors_clean[mstart:mend]]

246

y_values = [i[(n1)*2+1] for i in x_vectors_clean[mstart:mend]]

247


248

plt.scatter(x_values, y_values,linewidth=2.0)

249


250


251

plt.title('Scan distribution in parameter space')

252

plt.xlabel('C13 LE Corrector')

253

plt.ylabel('C14 LE Corrector')

254


255


256

plt.legend()

257

plt.show()

258


259

def ephisto():

260

epintensities = [y_vector[4] for y_vector in y_vectors_clean]

261

plt.hist(epintensities, bins='auto')

262

plt.title("Intensity distribution of dataset")

263

plt.tick_params(

264

axis='both',

265

direction='in',

266

which='both',

267

bottom=True,

268

top=True,

269

left = True,

270

right = True,

271

labelleft=True,

272

labelbottom=True)

273

plt.axvline(x=2000, color='r', linestyle='')

274

plt.show()

275


276


277

def cc():

278

"""

279

Plots the graph

280

# Code source: GaÃ«l Varoquaux

281

# Andreas MÃ¼ller

282

# Modified for documentation by Jaques Grobler

283

# License: BSD 3 clause

284

"""

285

import numpy as np

286

import matplotlib.pyplot as plt

287

from matplotlib.colors import ListedColormap

288

from sklearn.model_selection import train_test_split

289

from sklearn.preprocessing import StandardScaler

290

from sklearn.datasets import make_moons, make_circles, make_classification

291

from sklearn.neural_network import MLPClassifier

292

from sklearn.neighbors import KNeighborsClassifier

293

from sklearn.svm import SVC

294

from sklearn.gaussian_process import GaussianProcessClassifier

295

from sklearn.gaussian_process.kernels import RBF

296

from sklearn.tree import DecisionTreeClassifier

297

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

298

from sklearn.naive_bayes import GaussianNB

299

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

300


301

h = .02

302

global X,y

303

names = ["Nearest Neighbors", "RBF SVM", "Gaussian Process",

304

"Decision Tree", "Neural Net",

305

"Naive Bayes"]

306


307

classifiers = [

308

KNeighborsClassifier(3),

309

SVC(gamma=2, C=1),

310

GaussianProcessClassifier(1.0 * RBF(1.0)),

311

DecisionTreeClassifier(max_depth=5),

312

MLPClassifier(alpha=1),

313

GaussianNB()]

314


315

X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,

316

random_state=1, n_clusters_per_class=1)

317

rng = np.random.RandomState(2)

318

X += 2 * rng.uniform(size=X.shape)

319


320

global datasets, Xnew, Ynew

321


322


323

Xnew = np.array(x_vectors_clean)

324

ynew = np.array(y_labels_clean)

325

linearly_separable = (Xnew, ynew)

326


327

datasets = [make_moons(noise=0.3, random_state=0),

328

make_circles(noise=0.2, factor=0.5, random_state=1),

329

linearly_separable

330

]

331


332

figure = plt.figure(figsize=(18, 9))

333

i = 1

334


335

for ds_cnt, ds in enumerate(datasets):

336


337

X, y = ds

338

X = StandardScaler().fit_transform(X)

339

X_train, X_test, y_train, y_test = \

340

train_test_split(X, y, test_size=.4, random_state=42)

341


342

x_min, x_max = X[:, 0].min()  .5, X[:, 0].max() + .5

343

y_min, y_max = X[:, 1].min()  .5, X[:, 1].max() + .5

344

xx, yy = np.meshgrid(np.arange(x_min, x_max, h),

345

np.arange(y_min, y_max, h))

346


347


348

cm = plt.cm.RdBu

349

cm_bright = ListedColormap(['#FF0000', '#0000FF'])

350

ax = plt.subplot(len(datasets), len(classifiers) + 1, i)

351

if ds_cnt == 0:

352

ax.set_title("Input data", fontsize=15)

353


354

ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,

355

edgecolors='k')

356


357

ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,

358

edgecolors='k')

359

ax.set_xlim(xx.min(), xx.max())

360

ax.set_ylim(yy.min(), yy.max())

361


362

ax.set_xticks(())

363

ax.set_yticks(())

364

i += 1

365


366


367

for name, clf in zip(names, classifiers):

368

ax = plt.subplot(len(datasets), len(classifiers) + 1, i)

369

clf.fit(X_train, y_train)

370

score = clf.score(X_test, y_test)

371


372


373


374

if hasattr(clf, "decision_function"):

375

Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])

376

else:

377


378

Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

379


380

Z = Z.reshape(xx.shape)

381

ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

382


383


384

ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,

385

edgecolors='k')

386


387

ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,

388

edgecolors='k', alpha=0.6)

389


390

ax.set_xlim(xx.min(), xx.max())

391

ax.set_ylim(yy.min(), yy.max())

392


393

ax.set_xticks(())

394

ax.set_yticks(())

395

if ds_cnt == 0:

396

ax.set_title(name, fontsize=15)

397

ax.text(xx.max()  .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),

398

size=15, horizontalalignment='right')

399

i += 1

400


401

plt.tight_layout()

402

plt.show()

403


404


405


406


407

def cc2():

408

import numpy as np

409

import matplotlib.pyplot as plt

410

from matplotlib.colors import ListedColormap

411

from sklearn.model_selection import train_test_split

412

from sklearn.preprocessing import StandardScaler

413

from sklearn.datasets import make_moons, make_circles, make_classification

414

from sklearn.neural_network import MLPClassifier

415

from sklearn.neighbors import KNeighborsClassifier

416

from sklearn.svm import SVC

417

from sklearn.gaussian_process import GaussianProcessClassifier

418

from sklearn.gaussian_process.kernels import RBF

419

from sklearn.tree import DecisionTreeClassifier

420

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

421

from sklearn.naive_bayes import GaussianNB

422

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

423


424

global X,y

425

names = ["Nearest Neighbors", "RBF SVM", "Gaussian Process",

426

"Boosting Decision Tree", "Neural Net",

427

"Naive Bayes"]

428


429

classifiers = [

430

KNeighborsClassifier(10),

431

SVC(gamma=8, C=1),

432

GaussianProcessClassifier(1.0 * RBF(1.0)),

433

GradientBoostingClassifier(n_estimators=200, learning_rate=1.0, max_depth=5, random_state=0),

434

MLPClassifier(alpha=1),

435

GaussianNB()]

436


437


438

global datasets, Xnew, Ynew

439


440


441


442


443


444


445


446


447


448

for name, clf in zip(names, classifiers):

449

X = np.array(x_vectors_clean)

450

y = np.array(y_labels_clean)

451

print(str(name) + " Training started at " + str(time.ctime()))

452


453

if name == "Neural Net" or name == "Naive Bayes":

454

X = StandardScaler().fit_transform(X)

455

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

456

clf.fit(X_train, y_train)

457

print(str(name) + " Training ended at " + str(time.ctime()))

458

y_pred = clf.fit(X_train, y_train).predict(X_test)

459


460

else:

461

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

462

clf.fit(X_train, y_train)

463

print(str(name) + " Training ended at " + str(time.ctime()))

464

y_pred = clf.fit(X_train, y_train).predict(X_test)

465


466


467

print("Accuracy : " + str(name) + " is " + str(accuracy_score(y_test, y_pred)))

468

print("F1 score : " + str(name) + " is " + str(f1_score(y_test, y_pred)))

469

print("AUC : " + str(name) + " is " + str(roc_auc_score(y_test, y_pred)))

470

print

471


472

def od():

473

import numpy as np

474

from scipy import stats

475

import matplotlib.pyplot as plt

476

import matplotlib.font_manager

477


478

from sklearn import svm

479

from sklearn.covariance import EllipticEnvelope

480

from sklearn.ensemble import IsolationForest

481

from sklearn.neighbors import LocalOutlierFactor

482


483


484

rng = np.random.RandomState(42)

485


486


487

n_samples = 200

488

outliers_fraction = 0.1

489

clusters_separation = [0, 1, 2]

490


491


492

classifiers = {

493

"OneClass SVM": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,

494

kernel="rbf", gamma=0.1),

495

"Robust covariance": EllipticEnvelope(contamination=outliers_fraction),

496

"Isolation Forest": IsolationForest(max_samples=n_samples,

497

contamination=outliers_fraction,

498

random_state=rng),

499

"Local Outlier Factor": LocalOutlierFactor(

500

n_neighbors=35,

501

contamination=outliers_fraction)}

502


503


504


505


506

xx, yy = np.meshgrid(np.linspace(10000, 10000, 200), np.linspace(10000, 10000, 200))

507


508


509


510


511


512


513

for i, offset in enumerate(clusters_separation):

514

np.random.seed(42)

515


516

global X

517


518


519


520


521


522


523


524


525

X = np.array(list(zip([y_vector[13] for y_vector in y_vectors],[y_vector[27] for y_vector in y_vectors])))

526


527

plt.figure(figsize=(9, 7))

528

for i, (clf_name, clf) in enumerate(classifiers.items()):

529


530

if clf_name == "Local Outlier Factor":

531

y_pred = clf.fit_predict(X)

532

scores_pred = clf.negative_outlier_factor_

533

else:

534

clf.fit(X)

535

scores_pred = clf.decision_function(X)

536

y_pred = clf.predict(X)

537

threshold = stats.scoreatpercentile(scores_pred,

538

100 * outliers_fraction)

539


540


541

if clf_name == "Local Outlier Factor":

542

Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])

543

else:

544

Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])

545

Z = Z.reshape(xx.shape)

546

subplot = plt.subplot(2, 2, i + 1)

547

subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),

548

cmap=plt.cm.Blues_r)

549

a = subplot.contour(xx, yy, Z, levels=[threshold],

550

linewidths=2, colors='red')

551


552


553


554


555


556


557

subplot.axis('tight')

558

subplot.legend(

559

[a.collections[0], b, c],

560

['learned decision function', 'true inliers', 'true outliers'],

561

prop=matplotlib.font_manager.FontProperties(size=10),

562

loc='lower right')

563

subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))

564

subplot.set_xlim((7, 7))

565

subplot.set_ylim((7, 7))

566

plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)

567

plt.suptitle("Outlier detection")

568


569

plt.show()
