Copyright CRC Press 2023 - present CRC Press
Author: Dr Stephen Lynch National Teaching Fellow FIMA SFHEA
Question 6.1: This is a one-dimensional discrete system:
$$c_{n+1}=f(c_n)=(1-a)c_n+bc_n^re^{-sc_n},$$where $c_n$ is blood cell count.
Fixed points of period one satisfy the equation: $c_{n+1}=c_n=c$, say.
In this case, solve the equation $f(c)-c=0$, to determine the $c^*$ fixed points.
Suppose that there is a fixed point at $c=c^*$. Then, fixed points are stable if:
$$\left| \frac{df(c^*)}{dc} \right|<1.$$# 6.1: Blood cell population model.
# Determining fixed points of a 1D discrete map and their stability.
from sympy import *
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 8))
a , b , r , s = 0.2 , 1.1 * 10**6 , 8 , 16
# Fixed points of period 1, c_{n+1} = c_n = c, say.
c = np.linspace(-0.1 , 1 , 100)
# Plot c against f(c)-c.
plt.plot(c , b * c**r *np.exp(-s * c) - a * c)
plt.axhline(y=0, color="r" , linestyle="-" , linewidth = 1)
plt.xlabel("c", fontsize=15)
plt.ylabel("f(c)-c", fontsize=15)
plt.show()
c = symbols("c")
# Solve the equations numerically in a given range.
# You can see the three fixed points in the figure below.
c1 = nsolve(b * c**r * exp(-s * c) - a * c , c , (0 , 1))
c2 = nsolve(b * c**r * exp(-s * c) - a * c , c , (0.1 , 0.2))
c3 = nsolve(b * c**r * exp(-s * c) - a * c , c , (0.5 , 1))
print("Three fixed points at, c1=",c1 , "c2=", c2 , "c3=", c3)
f = (1 - a) * c + b * c**r * exp(-s * c)
df = diff(f , c)
# Fixed point is stable if |df_c*| < 1.
print("df_c1=" , abs(df.subs(c , c1)) , "Stable")
print("df_c2=" , abs(df.subs(c , c2)) , "Unstable")
print("df_c3=" , abs(df.subs(c , c3)) , "Stable")
# Use similar arguments when a = 0.3.
Three fixed points at, c1= 0 c2= 0.155345314963557 c3= 0.945496383877335 df_c1= 0.800000000000000 Stable df_c2= 1.90289499211662 Unstable df_c3= 0.625588428407471 Stable
# 6.2: Determine the critical points and their stability using eigenvalues.
# Critical points: Solve dx/dt=dy/dt=0.
# There are three critical points in the first quadrant.
# Discard the point at (0,-1) as you can't have negative populations.
from sympy import *
x , y = symbols("x y")
solve([x * (4 - y - x) , y * (3 * x - 1 - y)])
[{x: 0, y: -1}, {x: 0, y: 0}, {x: 5/4, y: 11/4}, {x: 4, y: 0}]
Label the critical points: $O=(0,0)$, $A=(4,0)$ and $B=(\frac{5}{4},\frac{11}{4})$.
# 6.2: Compute the eigenvalues using the Jacobian matrix.
P , Q = x * (4 - y - x) , y * (3 * x - 1 - y)
J = Matrix([[diff(P , x) , diff(P , y)],[diff(Q , x) , diff(Q , y)]])
JO = J.subs([(x , 0) , (y , 0)])
eigJO = JO.eigenvals()
print("eigJO =" , eigJO)
JA = J.subs([(x , 4) , (y , 0)])
eigJA = JA.eigenvals()
print("eigJA = ", eigJA)
JB = J.subs([(x , 5 / 4) , (y , 11 / 4)])
eigJB = JB.eigenvals()
print("eigJB = ", eigJB)
eigJO = {4: 1, -1: 1} eigJA = {11: 1, -4: 1} eigJB = {-2.0 - 3.1224989991992*I: 1, -2.0 + 3.1224989991992*I: 1}
The eigenvalues are:
For critical point O: $\lambda_1=4$, $\lambda_2=-1$, and the critical point is unstable, since one eigenvalue is positive.
For critical point A: $\lambda_1=11$, $\lambda_2=-4$, and the critical point is unstable, since one eigenvalue is positive.
For critical point B: $\lambda_1=-2-3.1225i$, $\lambda_2=-2+3.1225i$, and the critical point is stable, as both eigenvalues have negative real part.
# 6.2: Phase portrait of a predator-prey system.
# The populations stabilize to constant values - the species co-exist.
# Note that the axes will be scaled by large numbers.
import matplotlib.pyplot as plt
import numpy as np
from scipy.integrate import odeint
plt.figure(figsize=(8, 8))
import pylab as pl
# The two-dimensional system.
def dx_dt(x, t):
return [x[0]*(4-x[1]-x[0]), x[1]*(3*x[0]-1-x[1])]
# Use 1000 points to get smooth trajectories.
ts = np.linspace(0, 20, 1000)
xs = odeint(dx_dt, [4,0.01], ts)
plt.plot(xs[:, 0], xs[:, 1], 'r-')
xs = odeint(dx_dt, [0.5,6], ts)
plt.plot(xs[:, 0], xs[:, 1], 'r-')
xs = odeint(dx_dt, [0.01,0], ts)
plt.plot(xs[:, 0], xs[:, 1], 'r-',linewidth=3)
xs = odeint(dx_dt, [6,2], ts)
plt.plot(xs[:, 0], xs[:, 1], 'r-')
xs = odeint(dx_dt, [0,6], ts)
plt.plot(xs[:, 0], xs[:, 1], 'r-',linewidth=3)
xs = odeint(dx_dt, [6,0], ts)
plt.plot(xs[:, 0], xs[:, 1], 'r-',linewidth=3)
# Label the axes and set fontsizes.
plt.xlabel("x", fontsize=15)
plt.ylabel("y", fontsize=15)
plt.tick_params(labelsize=15)
# Plot the vectorfield.
X, Y = np.mgrid[0:6:12j, 0:6:12j]
u = X*(4-X-Y)
v = Y*(3*X-1-Y)
# Plot a vector field with blue arrows.
pl.quiver(X, Y, u, v, color='b')
# Show the critical points in the phase portrait (red dots).
plt.plot([0], [0], marker="o", markersize=10,markeredgecolor="red", markerfacecolor="red")
plt.plot([4], [0], marker="o", markersize=10,markeredgecolor="red", markerfacecolor="red")
plt.plot([5 / 4], [11 / 4], marker="o", markersize=10,markeredgecolor="red", markerfacecolor="red")
plt.axis("square")
plt.show()
Phase portrait of a predator-prey system. The axes would be scaled by large numbers in applications and the species would co-exist with scaled populations of $x=\frac{5}{4}$ and $y=\frac{11}{4}$, assuming of course that neither of the populations are zero.
# 6.3: Compartmental model of Covid-19 in Manchester.
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from math import floor
beta , eta , xi ,ppi , eps , gamma = 10**(-4) , 0.2 , 0.1 , 0.1 ,0.01 , 0.15
S0,L10,L20,I10,I20,A10,A20,R0 = 2700000,0,0,1,0,0,0,0
# Maximum time point and total number of time points
tmax , n = 1000 , 1000
def Covid(X, t, beta , eta , xi ,ppi , eps , gamma):
#The Differential Equations
S,L1,L2,I1,I2,A1,A2,R = X
dS = -beta*S*(I1+I2+xi*(A1+A2)+eta*L2)
dL1 = beta*S*(I1+I2+xi*(A1+A2)+eta*L2)-eps*L1
dL2 = eps*(L1-L2)
dI1 = (1-ppi)*eps*L2-gamma*I1
dI2 = gamma*(I1-I2)
dA1 = ppi*eps*L2-gamma*A1
dA2 = gamma*(A1-A2)
dR = gamma*(I2+A2)
return (dS,dL1,dL2,dI1,dI2,dA1,dA2,dR)
# Integrate differential equations on the time grid t.
t = np.linspace(0, tmax, n)
odesol = odeint(Covid, (S0,L10,L20,I10,I20,A10,A20,R0), t, \
args=(beta , eta , xi ,ppi , eps , gamma))
S,L1,L2,I1,I2,A1,A2,R = odesol.T
plt.xlabel("Time")
plt.ylabel("Populations")
plt.title("Covid-19 Compartmental Model")
plt.plot(t, S, label="S")
plt.plot(t, L1, label="L1")
plt.plot(t, L2, label="L2")
plt.plot(t, I1, label="I1")
plt.plot(t, I2, label="I2")
plt.plot(t, A1, label="A1")
plt.plot(t, A2, label="A2")
plt.plot(t, R, label="R")
legend = plt.legend(loc='best')
plt.show()
# Plot graph of smaller population numbers.
plt.xlabel("Time")
plt.ylabel("Populations")
plt.title("Covid-19 Compartmental Model")
plt.plot(t, I1, label="I1")
plt.plot(t, I2, label="I2")
plt.plot(t, A1, label="A1")
plt.plot(t, A2, label="A2")
legend = plt.legend(loc='best')
plt.show()
print("A1 max=",floor(max(A1)))
print("I1 max=",floor(max(I1)))
A1 max= 6604 I1 max= 59440
# 6.4: Single fiber muscle model without holding.
import numpy as np
import matplotlib.pyplot as plt
Length , a , b = 800 , 380*0.098 , 0.325
F0 = a / 0.257 # Initial force.
vm , alpha , LSE0 , k = F0*b/a , F0/0.1 , 0.3 , a/25
t = [0+0.01*i for i in range(801)] # Time.
A = [1.001+0.001*i for i in range(100)]
B = [1.099-0.001*i for i in range(100)]
D = [0.999-0.001*i for i in range(100)]
E = [0.901+0.001*i for i in range(100)]
G = [1.001+0.001*i for i in range(100)]
H = [1.099-0.001*i for i in range(100)]
J = [0.999-0.001*i for i in range(100)]
K = [0.901+0.001*i for i in range(101)]
L = A+B+D+E+G+H+J+K
plt.figure()
plt.subplots(figsize=(12, 6))
plt.subplot(221)
plt.plot(L)
plt.xlabel('Time (s)', fontsize=15)
plt.ylabel('Fraction of Muscle Length (L)', fontsize=15)
plt.tick_params(labelsize=15)
LSE=np.zeros(800).tolist();
LCE=np.zeros(800).tolist();
# F is force and L represents length.
# Solve Hill's ordinary differential equations using Euler's method.
F=np.zeros(801).tolist();
for i in range(800):
LSE[i] = 0.3+F[i]/alpha;
LCE[i] = L[i]-LSE[i];
dt=t[i+1]-t[i];
dL=L[i+1]-L[i];
dF=alpha*((dL/dt)+b*((F0-F[i])/(a+F[i])))*dt;
F[i+1]=F[i]+dF;
F=np.array(F)
FF=(F0/100)*np.random.randn(801)
F=F+FF
F=F.tolist()
plt.subplot(222)
plt.plot(L,F)
plt.xlabel("Fraction of Muscle Length (mm)", fontsize=15)
plt.ylabel("Force (mN/mm$^2)$", fontsize=15)
plt.tick_params(labelsize=15)
plt.show()
<Figure size 432x288 with 0 Axes>
# 7.1: Balancing chemical-reaction equations.
from sympy import *
# Construct the augmented matrix.
ACCM=Matrix([[1,0,3,0,0],\
[1,8,5,2,0],\
[1,6,6,0,1],\
[3,7,7,1,2],\
[0,0,0,0,1]])
print(ACCM)
invACCM=ACCM.inv() # Find the inverse matrix.
print(invACCM)
Nullv=invACCM.col(4) / min(abs(invACCM.col(4)))
print(Nullv) # Scaled null-space vector.
# Na=sodium, H=hydrogen, C=carbon, and O=oxygen.
Matrix([[1, 0, 3, 0, 0], [1, 8, 5, 2, 0], [1, 6, 6, 0, 1], [3, 7, 7, 1, 2], [0, 0, 0, 0, 1]]) Matrix([[-1/3, -1/3, -1/3, 2/3, -1], [-7/18, -1/18, 1/9, 1/9, -1/3], [4/9, 1/9, 1/9, -2/9, 1/3], [11/18, 11/18, -5/9, -2/9, 1], [0, 0, 0, 0, 1]]) Matrix([[-3], [-1], [1], [3], [3]])
Thus, the balanced chemical reaction equation is:
$$3 \mathrm{NaHCO}_3+\mathrm{H}_3\mathrm{C}_6\mathrm{H}_5\mathrm{O}_7 \rightleftharpoons \mathrm{Na}_3\mathrm{C}_6\mathrm{H}_5\mathrm{O}_7+3\mathrm{H}_2\mathrm{O}+3\mathrm{C}\mathrm{O}_2.$$# 7.2: A catalytic reaction.
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
# Set the parameters.
k1 , k2 = 0.1, 0.05
a0, b0, x0 , y0 = 1, 1, 1, 0
tmax, n = 250, 1000
def Catalytic(X, t, k1 , k2):
a, b, x, y = X
da = -k1 * a * x
db = -k2 * b * y
dx = -k1 * a * x +k2 * b * y
dy = k1 * a * x - k2 * b * y
return(da, db, dx , dy)
t = np.linspace(0, tmax, n)
odesol = odeint(Catalytic, (a0, b0, x0 , y0), t, args = (k1, k2))
a , b , x , y = odesol.T
plt.xlabel("Time (s)")
plt.ylabel("Concentrations")
plt.title("Catalytic Reaction")
plt.plot(t, a, label = "|a|")
plt.plot(t, b, label = "|b|")
plt.plot(t, x, label = "|x|")
plt.plot(t, y, label = "|y|")
legend = plt.legend(loc = 'best')
plt.show()
# 7.3: Chapman cycle of ozone production.
# This is a stiff system of ODEs.
# There are large (concentrations) and small (rates) parameters involved.
import numpy as np
from scipy.integrate import odeint
# Set the parameters.
k1 , k2 , k3 , k4 = 3e-12,1.22e-33,5.5e-4,6.86e-16
x0 , y0 , z0 , M = 4e16 , 2e16 , 2e16 , 9e17
tmax, n = 1e8, 10001
# Let x=|O|, y=|O2|, z=|O3|.
def Chapman(X, t, k1 , k2 , k3 , k4 , M):
x , y , z = X
dx = 2*k1*y+k3*z-k2*x*y*M-k4*x*z
dy = k3*z+2*k4*x*z-k1*y-k2*x*y*M
dz = k2*x*y*M-k3*z-k4*x*z
return(dx, dy, dz)
t = np.linspace(0, tmax, n)
odesol = odeint(Chapman, (x0 , y0 , z0), t, args = (k1, k2, k3, k4 , M))
x , y , z = odesol.T
print("|O|=", x[10000], "molec/cm cubed")
print("|O2|=", y[10000], "molec/cm cubed")
print("|O3|=", z[10000], "molec/cm cubed")
|O|= 46805158.11585307 molec/cm cubed |O2|= 6.999019071196589e+16 molec/cm cubed |O3|= 6539509754323.823 molec/cm cubed
# 7.4: Common ion effect in solubility.
# Silver chloride in a potassium chloride solution.
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt
initAg , initAgCl2, initCl , initK , datapts = 0 ,0 , 0.1 , 0.1 , 100
logxrange = np.logspace(-5 , -1 , num = datapts)
initparams = (initAg, initAgCl2, initCl, initK)
def AgCl_sol2(concentrations):
(Ag_conc2, AgCl2_conc2, Cl_conc2, K_conc2) = concentrations
firstEq = Ag_conc2 * Cl_conc2 - 1.82E-10
secondEq = AgCl2_conc2 - Ag_conc2 * Cl_conc2 ** 2 * 1.78E5
thirdEq = Ag_conc2 + K_conc2 - Cl_conc2 - AgCl2_conc2
fourthEq = K_conc2 - K_conc2
return[firstEq, secondEq, thirdEq, fourthEq]
solution = opt.fsolve(AgCl_sol2,initparams)
solubility = "{:.2E}".format(solution[0] + solution[1])
print("At a KCl concentration of", initK, "AgCl solubility is", solubility)
datapts=100
guess_array2 = tuple(zip(np.zeros(datapts),np.zeros(datapts),logxrange,logxrange))
out_array2 = []
silver_conc2 = []
silverchloride_conc2 = []
chloride_conc2 = []
potassium_conc2 = []
for num in range(0,datapts):
out_array2.append(list(opt.fsolve(AgCl_sol2,guess_array2[num])))
silver_conc2.append(out_array2[num][0])
silverchloride_conc2.append(out_array2[num][1])
chloride_conc2.append(out_array2[num][2])
potassium_conc2.append(out_array2[num][3])
total_solubility = np.add(silver_conc2, silverchloride_conc2)
plt.plot(potassium_conc2,np.log10(total_solubility),'r.')
plt.title("Silver chloride concentration")
plt.xlabel('KCl concentration (Moles per litre)')
plt.ylabel('Log(solubility of AgCl)')
plt.show()
At a KCl concentration of 0.1 AgCl solubility is 3.24E-06
# 8.1: Create a DataFrame, Membership Satisfaction at a Gym.
import numpy as np
import pandas as pd
# pd.set_option('display.colheader_justify', 'center')
df=pd.DataFrame({
"Date": pd.date_range("20220109", periods=7),
"Day " : pd.Categorical(["Sunday","Monday","Tuesday","Wednesday",
"Thursday","Friday","Saturday"]),
"Member Numbers" : pd.Series([126,34,42,100,54,41,105],
dtype="int32"),
"Staff Numbers" : pd.Series([12,6,6,12,6,6,12],
dtype="int32"),
"Member/Staff Ratio" : pd.Series([10.5,5.7,7.0,8.3,9.0,6.8,8.8],dtype="float32"),
"Satisfaction (%)" : pd.Series([34,58,74,54,85,88,45],dtype="float32")
})
df = df.set_index("Date")
df
Day | Member Numbers | Staff Numbers | Member/Staff Ratio | Satisfaction (%) | |
---|---|---|---|---|---|
Date | |||||
2022-01-09 | Sunday | 126 | 12 | 10.5 | 34.0 |
2022-01-10 | Monday | 34 | 6 | 5.7 | 58.0 |
2022-01-11 | Tuesday | 42 | 6 | 7.0 | 74.0 |
2022-01-12 | Wednesday | 100 | 12 | 8.3 | 54.0 |
2022-01-13 | Thursday | 54 | 6 | 9.0 | 85.0 |
2022-01-14 | Friday | 41 | 6 | 6.8 | 88.0 |
2022-01-15 | Saturday | 105 | 12 | 8.8 | 45.0 |
# 8.2 (a): Feasibility region for Example 8.2.2.
# Minimize C = 20000x+25000y.
# Given 400x+300y>=25000, 300x+400y>=27000,200x+500y>=30000.
# x,y >= 0.
import numpy as np
import matplotlib.pyplot as plt
m = np.linspace(0,200,200)
x , y = np.meshgrid(m , m)
plt.imshow( ((x>=0) & (y>=0) & (400*x+300*y>=25000) & (300*x+400*y>=27000) & (200*x+500*y>=30000)).astype(int) ,
extent=(x.min(),x.max(),y.min(),y.max()),origin="lower", cmap="Greys", alpha = 0.3)
# Plot the constraint lines.
x = np.linspace(0, 200, 200)
y1 = (-400*x+25000) / 300
y2 = (-300*x+27000) / 400
y3 = (-200*x+30000) / 500
plt.rcParams["font.size"] = "14"
plt.plot(x, y1, label=r"$400x+300y \geq 25000$" , linewidth = 4)
plt.plot(x, y2, label=r"$300x+400y \geq 27000$" , linewidth = 4)
plt.plot(x, y3, label=r"$200x+500y \geq 30000$" , linewidth = 4)
plt.plot(x , (-20000 * x + 1750000) / 25000 , label = r"$C_{min}$")
plt.xlim(0,160)
plt.ylim(0,100)
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()
# 8.2(b): Simplex Method problem.
# References are given in the book for the Simplex Method.
from scipy.optimize import linprog
obj = [-20, -10, -15] # Minimize -z = -20*x1 - 10*x2 - 15*x3.
# Maximize z = 20*x1 + 10*x2 + 15*x3.
lhs_ineq = [[ 3, 2, 5], # 3*x1 + 2*x2 +5*x3.
[ 2, 1, 1], # 2*x1+x2+x3.
[ 1, 1, 3], # x1+x2+3*x3
[ 5, 2, 4]] # 5*x1+2*x2+4*x3.
rhs_ineq = [55 , 26 , 30 , 57]
# Bounds of x1 , x2 , x3.
bnd = [(0, float("inf")), (0, float("inf")) , (0, float("inf"))]
opt = linprog(c=obj, A_ub=lhs_ineq, b_ub=rhs_ineq, bounds=bnd,
method="revised simplex")
opt
con: array([], dtype=float64) fun: -268.0 message: 'Optimization terminated successfully.' nit: 3 slack: array([ 0.00000000e+00, -3.55271368e-15, 2.60000000e+00, 0.00000000e+00]) status: 0 success: True x: array([ 1.8, 20.8, 1.6])
The optimal solution is, $z=268$, with $\left(x_1,x_2,x_3 \right)=1.8,20.8,1.6$.
# 8.3: K-Means Clustering, Column 12 (Pupil to Teacher Ratio).
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
fs = 20
from keras.datasets import boston_housing
(x_train, y_train) = boston_housing.load_data(path='boston_housing.npz',test_split=0.2,seed=113)
num_houses , datacol = 100 , 12
plt.scatter(x_train[0][:,datacol][1:num_houses],y_train[1][1:num_houses],marker="+")
plt.xlabel("Pupil to Teacher Ratio (Column 12)",fontsize = fs)
plt.ylabel("Median Value",fontsize = fs)
plt.show()
X = np.vstack((x_train[0][:,datacol][1:num_houses] , y_train[1][1:num_houses])).T
import seaborn as sns
from sklearn.cluster import KMeans
elbow=[]
for i in range(1, 20):
kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 101)
kmeans.fit(X)
elbow.append(kmeans.inertia_)
sns.lineplot(range(1, 20), elbow,color='blue')
plt.title('ELBOW METHOD',fontsize = fs)
plt.xlabel("Numbers of Clusters",fontsize = fs)
plt.ylabel("Sum of Squared Errors",fontsize = fs)
plt.show()
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz 57026/57026 [==============================] - 0s 0us/step
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
# Program_8f.py: Mean Shift Clustering Algorithm.
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.datasets import make_blobs
# The following bandwidth can be automatically detected using
X = np.vstack((x_train[0][:,datacol][1:num_houses] , y_train[1][1:num_houses])).T
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=100)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
import matplotlib.pyplot as plt
from itertools import cycle
plt.figure(1)
plt.clf()
colors = cycle("bgrcmykbgrcmykbgrcmykbgrcmyk")
for k, col in zip(range(n_clusters_), colors):
my_members = labels == k
cluster_center = cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], col + ".")
plt.plot(
cluster_center[0],
cluster_center[1],
"o",
markerfacecolor=col,
markeredgecolor="k",
markersize=14,
)
fs = 20
plt.title("Estimated number of clusters: %d" % n_clusters_,fontsize=fs)
plt.xlabel("Numbers of Clusters",fontsize = fs)
plt.ylabel("Sum of Squared Errors",fontsize = fs)
plt.show()
number of estimated clusters : 3
# 8.3: Outlier detection.
import numpy as np
from sklearn.covariance import EllipticEnvelope
from sklearn.svm import OneClassSVM # OCSVM: One-Class Support Vector Machine.
import matplotlib.pyplot as plt
import matplotlib.font_manager
from sklearn.datasets import load_wine
# Define "classifiers" to be used
classifiers = {
"Empirical Covariance": EllipticEnvelope(support_fraction=1.0, contamination=0.25),
"Robust Covariance (Minimum Covariance Determinant)": EllipticEnvelope(
contamination=0.25
),
"OCSVM": OneClassSVM(nu=0.25, gamma=0.35),
}
colors = ["r", "g", "b"]
legend1 = {}
legend2 = {}
# Get data
X1 = np.vstack((x_train[0][:,datacol][1:num_houses] , y_train[1][1:num_houses])).T
# Learn a frontier for outlier detection with several classifiers
xx1, yy1 = np.meshgrid(np.linspace(-5, 35, 500), np.linspace(0, 60, 500))
for i, (clf_name, clf) in enumerate(classifiers.items()):
plt.figure(1)
clf.fit(X1)
Z1 = clf.decision_function(np.c_[xx1.ravel(), yy1.ravel()])
Z1 = Z1.reshape(xx1.shape)
legend1[clf_name] = plt.contour(
xx1, yy1, Z1, levels=[0], linewidths=2, colors=colors[i]
)
legend1_values_list = list(legend1.values())
legend1_keys_list = list(legend1.keys())
# Plot the results (= shape of the data points cloud)
plt.figure(1)
plt.title("Outlier detection on a real data set (Boston Housing)")
plt.scatter(X1[:, 0], X1[:, 1], color="black")
bbox_args = dict(boxstyle="round", fc="0.8")
arrow_args = dict(arrowstyle="->")
plt.annotate(
"outlying points",
xy=(5, 2),
xycoords="data",
textcoords="data",
xytext=(5, 2),
bbox=bbox_args,
arrowprops=arrow_args,
)
plt.xlim((xx1.min(), xx1.max()))
plt.ylim((yy1.min(), yy1.max()))
plt.legend(
(
legend1_values_list[0].collections[0],
legend1_values_list[1].collections[0],
legend1_values_list[2].collections[0],
),
(legend1_keys_list[0], legend1_keys_list[1], legend1_keys_list[2]),
loc="upper left",
prop=matplotlib.font_manager.FontProperties(size=11),
)
fs = 20
plt.title("Outlier detection on a real data set", fontsize=fs)
plt.xlabel("Attribute 12, Pupil/Teacher Ratio",fontsize = fs)
plt.ylabel("Mean Value",fontsize = fs)
plt.show()