Product Features
...
Machine Learning Models
Model Types
Machine Learning Anomaly Detection
1min
This use case is a customized time series version of making a CNN anomaly detection model from the TensorFlow website.
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
sns.set(style='whitegrid', palette='muted')
# rcParams['figure.figsize'] = 22, 10
csv_path = '20161003_085624.csv'
df = pd.read_csv(csv_path)
print ("\ncolumns: ", df.columns, "Data Frame Length: ", len(df), " rows\n")
features_considered = ['Temperature (C)']
features = df[features_considered]
features.index = np.arange(start=0, stop=len(df), step = 1)
# print(features.index)
print(features.head())
x = features.values
df = features
print(x)
# plt.plot(df)
# plt.show()
train_size = int(len(df) * 0.90)
test_size = len(df) - train_size
train, test = df[0:train_size], df[train_size:len(df)]
print(train, test)
print(train.shape, test.shape)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler = scaler.fit(train[['Temperature (C)']])
train['close'] = scaler.transform(train[['Temperature (C)']])
test['close'] = scaler.transform(test[['Temperature (C)']])
# print(train, test)
def create_dataset(X, y, time_steps=1):
Xs, ys = [], []
for i in range(len(X) - time_steps):
v = X.iloc[i:(i + time_steps)].values
Xs.append(v)ys.append(y.iloc[i + time_steps])
return np.array(Xs), np.array(ys)
TIME_STEPS = 30
# reshape to [samples, time_steps, n_features]
X_train, y_train = create_dataset(train[['Temperature (C)']], train.close, TIME_STEPS)
X_test, y_test = create_dataset(test[['Temperature (C)']], test.close, TIME_STEPS)
print(X_train.shape)
print(y_test.shape)
print(X_train.shape[1], X_train.shape[2], y_train[1])
# model = keras.Sequential()
# model.add(keras.layers.LSTM(
# units=64,
# input_shape=(X_train.shape[1], X_train.shape[2])
# ))
# model.add(keras.layers.Dropout(rate=0.2))
# model.add(keras.layers.RepeatVector(n=X_train.shape[1]))
# model.add(keras.layers.LSTM(units=64, return_sequences=True))
# model.add(keras.layers.Dropout(rate=0.2))
# model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=X_train.shape[2])))
# model.compile(loss='mae', optimizer='adam')
verbose, epochs, batch_size = 0, 2, 128
n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train[1]
model = keras.Sequential()
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(100, activation='relu'))
model.add(tf.keras.layers.Dense(n_outputs, activation='softmax'))
model.compile(loss='mae', optimizer='adam', metrics=['accuracy'])
# fit network
history = model.fit(
X_train, y_train,
epochs=2,
batch_size=256,
validation_split=0.1,
shuffle=False
)
model.summary()
X_train_pred = model.predict(X_train)
# model.save("/anomalyModel/")
train_mae_loss = np.mean(np.abs(X_train_pred - X_train), axis=1)
plt.figure()
sns.distplot(train_mae_loss, bins=50, kde=True);
X_test_pred = model.predict(X_test)
test_mae_loss = np.mean(np.abs(X_test_pred - X_test), axis=1)
print(test_mae_loss)
plt.figure()
sns.distplot(train_mae_loss, bins=50, kde=True)
plt.figure()
THRESHOLD = 0.65
test_score_df = pd.DataFrame(index=test[TIME_STEPS:].index)
test_score_df['loss'] = test_mae_loss
test_score_df['threshold'] = THRESHOLD
test_score_df['anomaly'] = test_score_df.loss > test_score_df.threshold
test_score_df['close'] = test[TIME_STEPS:].close
plt.plot(test_score_df.index, test_score_df.loss, label='loss')
plt.plot(test_score_df.index, test_score_df.threshold, label='threshold')
plt.xticks(rotation=25)
anomalies = test_score_df[test_score_df.anomaly == True]
print(anomalies.head())
plt.plot(
test[TIME_STEPS:].index,
scaler.inverse_transform(test[TIME_STEPS:].close),
label='temp'
);
sns.scatterplot(
anomalies.index,
scaler.inverse_transform(anomalies.close),
color=sns.color_palette()[3],
s=52,
label='anomaly'
)
plt.xticks(rotation=25)
plt.show()