"""

Created on Tue Aug 14 14:52:18 2018

@author: Jonathan

"""

from __future__ import print_function

import numpy as np

import matplotlib.pyplot as plt

import random

import time

import keras

from keras.models import Sequential

from keras.layers import Dense

from keras.layers import Dropout

from keras import regularizers

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

import pandas as pd

import sklearn.preprocessing as preprocessing

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import confusion_matrix

from sklearn.metrics import f1_score

from sklearn.metrics import accuracy_score

from sklearn.feature_selection import SelectKBest

from sklearn.feature_selection import chi2

from sklearn.feature_selection import VarianceThreshold

from sklearn.ensemble import GradientBoostingClassifier

from sklearn.metrics import roc_curve, auc, roc_auc_score

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn import svm

from sklearn.neighbors import KNeighborsClassifier

import tensorflow as tf

import json

global gdata, x_vectors, y_vectors

random.seed(1)

np.random.seed(2)

def failuremodes():

"""

Plots two failure modes of the system: decreasing variance and change in

covariance structure

Parameters

device_name: String. Name of device eg 'N:C1CGRD'

Returns

none

"""

x1 = np.linspace(0,1,10**5)

global y2vals, y3

initialy = np.random.multivariate_normal([0,0], [[1,0],[0,1]],5*10**4)

finaly = np.random.multivariate_normal([0,0], [[1,0.9],[0.9,1]],5*10**4)

y1vals = [i[0]for i in initialy] + [i[0]for i in finaly]

y2vals = [i[1]for i in initialy] + [i[1]for i in finaly]

uppery1bracket = np.zeros(10**5) + 2

lowery1bracket = np.zeros(10**5)  2

y3 = np.append(np.random.normal(0,2.5,5*10**4), np.random.normal(0,1,5*10**4))

uppery3bracket = np.zeros(10**5) + 5

lowery3bracket = np.zeros(10**5)  5

plt.subplot(2, 1, 2)

plt.plot(x1, y1vals)

plt.plot(x1, y2vals)

plt.plot(x1, uppery1bracket, color='k', linestyle='')

plt.plot(x1, lowery1bracket, color='k', linestyle='')

plt.tick_params(

axis='both',

direction='in',

which='both',

bottom=True,

top=True,

left = True,

right = True,

labelleft=True,

labelbottom=True)

plt.axvline(x=0.5, color='k', linestyle='')

plt.xlabel(r'$\tau$')

plt.subplot(2, 1, 1)

plt.rc('xtick', labelsize=8)

plt.rc('ytick', labelsize=8)

plt.rc('axes', labelsize=8)

plt.plot(x1, y3)

plt.plot(x1, uppery3bracket, color='k', linestyle='')

plt.plot(x1, lowery3bracket, color='k', linestyle='')

plt.tick_params(

axis='both',

direction='in',

which='both',

bottom=True,

top=True,

left = True,

right = True,

labelleft=True,

labelbottom=True)

plt.axvline(x=0.5, color='k', linestyle='')

plt.xlabel('time (s)')

plt.xlabel(r'$\tau$')

plt.legend()

plt.show()

def ECDFplotter():

global y2vals, y3

n_bins = 50

plt.rc('xtick', labelsize=8)

plt.rc('ytick', labelsize=8)

plt.rc('axes', labelsize=8)

n, bins, patches = plt.hist(y2vals[:5*10**4], n_bins, density=True, histtype='step',

cumulative=True, label='Initial Distribution')

n, bins, patches = plt.hist(y2vals[5*10**4:],n_bins, density=True, histtype='step',

cumulative=True, label='Final Distribution')

plt.legend(loc=2)

def genclean():

"""

Generates clean data from the sullied data of '240818RUNLABEL.txt'

"""

global gdata,x_vectors,y_vectors, y_labels,x_vectors_clean,y_vectors_clean,y_labels_clean

158

159

160


162


x_vectors = [i[0:52]for i in data]

y_vectors = [i[53:1]for i in data]

y_labels = []

167

168

169

170


for i in range(len(y_vectors)):

baddata = False

for readout in y_vectors[i]:

if readout == 999999 or readout == 0:

baddata = True

break

if baddata == False:

x_vectors_clean.append(x_vectors[i])

y_vectors_clean.append(y_vectors[i])

epintensities = [y_vector[4] for y_vector in y_vectors_clean]

ephor = [y_vector[13] for y_vector in y_vectors_clean]

epvert = [y_vector[27] for y_vector in y_vectors_clean]

epim, epistd = np.mean(epintensities), np.std(epintensities)

ephorm, ephorstd = np.mean(ephor), np.std(ephor)

epvertm, epvertstd = np.mean(epvert), np.std(epvert)

189


191

192

193

194

195

196

197

198

199


y_labels_clean.append(label)

202

203

204

205

206

207

208

209

210

211

212


214

215

216

217


plt.legend()

plt.show()

def plotbpmdist(n,mstart,mend):

"""

Plot distribution of values on BPM number n

"""

x_values = [i[n1] for i in y_vectors_clean[mstart:mend]]

y_values = [i[n+13] for i in y_vectors_clean[mstart:mend]]

plt.scatter(x_values, y_values,linewidth=2.0)

plt.title('Scan distribution in output space')

plt.ylabel('BPM2 Horizontal Position')

plt.xlabel('BPM2 Horizontal Position')

plt.legend()

plt.show()

def plotscandist(n,mstart,mend):

"""

Plot distribution of values

"""

x_values = [i[(n1)*2] for i in x_vectors_clean[mstart:mend]]

y_values = [i[(n1)*2+1] for i in x_vectors_clean[mstart:mend]]

248

249


plt.title('Scan distribution in parameter space')

plt.xlabel('C13 LE Corrector')

plt.ylabel('C14 LE Corrector')

plt.legend()

plt.show()

def ephisto():

epintensities = [y_vector[4] for y_vector in y_vectors_clean]

plt.hist(epintensities, bins='auto')

plt.title("Intensity distribution of dataset")

plt.tick_params(

axis='both',

direction='in',

which='both',

bottom=True,

top=True,

left = True,

right = True,

labelleft=True,

labelbottom=True)

plt.axvline(x=2000, color='r', linestyle='')

plt.show()

def cc():

"""

Plots the graph

# Code source: GaÃ«l Varoquaux

# Andreas MÃ¼ller

# Modified for documentation by Jaques Grobler

# License: BSD 3 clause

"""

import numpy as np

import matplotlib.pyplot as plt

from matplotlib.colors import ListedColormap

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.datasets import make_moons, make_circles, make_classification

from sklearn.neural_network import MLPClassifier

from sklearn.neighbors import KNeighborsClassifier

from sklearn.svm import SVC

from sklearn.gaussian_process import GaussianProcessClassifier

from sklearn.gaussian_process.kernels import RBF

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

from sklearn.naive_bayes import GaussianNB

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

h = .02

global X,y

names = ["Nearest Neighbors", "RBF SVM", "Gaussian Process",

"Decision Tree", "Neural Net",

"Naive Bayes"]

307

308

309

310

311

312

313

314


X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,

random_state=1, n_clusters_per_class=1)

rng = np.random.RandomState(2)

X += 2 * rng.uniform(size=X.shape)

320

321


323

324

325

326


datasets = [make_moons(noise=0.3, random_state=0),

make_circles(noise=0.2, factor=0.5, random_state=1),

linearly_separable

]

332

333

i = 1

335

for ds_cnt, ds in enumerate(datasets):

337

338

339

340

train_test_split(X, y, test_size=.4, random_state=42)

x_min, x_max = X[:, 0].min()  .5, X[:, 0].max() + .5

y_min, y_max = X[:, 1].min()  .5, X[:, 1].max() + .5

xx, yy = np.meshgrid(np.arange(x_min, x_max, h),

np.arange(y_min, y_max, h))

cm = plt.cm.RdBu

cm_bright = ListedColormap(['#FF0000', '#0000FF'])

ax = plt.subplot(len(datasets), len(classifiers) + 1, i)

if ds_cnt == 0:

ax.set_title("Input data", fontsize=15)

ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,

edgecolors='k')

ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,

edgecolors='k')

ax.set_xlim(xx.min(), xx.max())

ax.set_ylim(yy.min(), yy.max())

ax.set_xticks(())

ax.set_yticks(())

i += 1

for name, clf in zip(names, classifiers):

ax = plt.subplot(len(datasets), len(classifiers) + 1, i)

clf.fit(X_train, y_train)

score = clf.score(X_test, y_test)

if hasattr(clf, "decision_function"):

Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])

else:

378

Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

Z = Z.reshape(xx.shape)

ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,

edgecolors='k')

ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,

edgecolors='k', alpha=0.6)

ax.set_xlim(xx.min(), xx.max())

ax.set_ylim(yy.min(), yy.max())

ax.set_xticks(())

ax.set_yticks(())

if ds_cnt == 0:

ax.set_title(name, fontsize=15)

ax.text(xx.max()  .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),

size=15, horizontalalignment='right')

i += 1

plt.tight_layout()

plt.show()

def cc2():

import numpy as np

import matplotlib.pyplot as plt

from matplotlib.colors import ListedColormap

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.datasets import make_moons, make_circles, make_classification

from sklearn.neural_network import MLPClassifier

from sklearn.neighbors import KNeighborsClassifier

from sklearn.svm import SVC

from sklearn.gaussian_process import GaussianProcessClassifier

from sklearn.gaussian_process.kernels import RBF

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

from sklearn.naive_bayes import GaussianNB

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

global X,y

names = ["Nearest Neighbors", "RBF SVM", "Gaussian Process",

"Boosting Decision Tree", "Neural Net",

"Naive Bayes"]

classifiers = [

KNeighborsClassifier(10),

SVC(gamma=8, C=1),

GaussianProcessClassifier(1.0 * RBF(1.0)),

GradientBoostingClassifier(n_estimators=200, learning_rate=1.0, max_depth=5, random_state=0),

MLPClassifier(alpha=1),

GaussianNB()]

global datasets, Xnew, Ynew

for name, clf in zip(names, classifiers):

X = np.array(x_vectors_clean)

y = np.array(y_labels_clean)

print(str(name) + " Training started at " + str(time.ctime()))

if name == "Neural Net" or name == "Naive Bayes":

X = StandardScaler().fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

clf.fit(X_train, y_train)

print(str(name) + " Training ended at " + str(time.ctime()))

y_pred = clf.fit(X_train, y_train).predict(X_test)

else:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

clf.fit(X_train, y_train)

print(str(name) + " Training ended at " + str(time.ctime()))

y_pred = clf.fit(X_train, y_train).predict(X_test)

print("Accuracy : " + str(name) + " is " + str(accuracy_score(y_test, y_pred)))

print("F1 score : " + str(name) + " is " + str(f1_score(y_test, y_pred)))

print("AUC : " + str(name) + " is " + str(roc_auc_score(y_test, y_pred)))

print

def od():

import numpy as np

from scipy import stats

import matplotlib.pyplot as plt

import matplotlib.font_manager

from sklearn import svm

from sklearn.covariance import EllipticEnvelope

from sklearn.ensemble import IsolationForest

from sklearn.neighbors import LocalOutlierFactor

rng = np.random.RandomState(42)

n_samples = 200

outliers_fraction = 0.1

clusters_separation = [0, 1, 2]

classifiers = {

"OneClass SVM": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,

kernel="rbf", gamma=0.1),

"Robust covariance": EllipticEnvelope(contamination=outliers_fraction),

"Isolation Forest": IsolationForest(max_samples=n_samples,

contamination=outliers_fraction,

random_state=rng),

"Local Outlier Factor": LocalOutlierFactor(

n_neighbors=35,

contamination=outliers_fraction)}

xx, yy = np.meshgrid(np.linspace(10000, 10000, 200), np.linspace(10000, 10000, 200))

for i, offset in enumerate(clusters_separation):

np.random.seed(42)

global X

X = np.array(list(zip([y_vector[13] for y_vector in y_vectors],[y_vector[27] for y_vector in y_vectors])))

plt.figure(figsize=(9, 7))

for i, (clf_name, clf) in enumerate(classifiers.items()):

if clf_name == "Local Outlier Factor":

y_pred = clf.fit_predict(X)

scores_pred = clf.negative_outlier_factor_

else:

clf.fit(X)

scores_pred = clf.decision_function(X)

y_pred = clf.predict(X)

threshold = stats.scoreatpercentile(scores_pred,

100 * outliers_fraction)

if clf_name == "Local Outlier Factor":

Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])

else:

Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])

Z = Z.reshape(xx.shape)

subplot = plt.subplot(2, 2, i + 1)

subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),

cmap=plt.cm.Blues_r)

a = subplot.contour(xx, yy, Z, levels=[threshold],

linewidths=2, colors='red')

subplot.axis('tight')

subplot.legend(

[a.collections[0], b, c],

['learned decision function', 'true inliers', 'true outliers'],

prop=matplotlib.font_manager.FontProperties(size=10),

loc='lower right')

subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))

subplot.set_xlim((7, 7))

subplot.set_ylim((7, 7))

plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)

plt.suptitle("Outlier detection")

plt.show()
