Code from Sklearn's demo:http://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html# Sphx-glr-auto-examples-ensemble-plot-isolation-forest-py
ImportNumPy as NP fromSciPyImportStatsImportMatplotlib.pyplot as PltImportMatplotlib.font_manager fromSklearnImportSVM fromSklearn.covarianceImportEllipticenvelope fromSklearn.ensembleImportisolationforest fromSklearn.neighborsImportlocaloutlierfactorrng= Np.random.RandomState (42)#Example SettingsN_samples = 200outliers_fraction= 0.25clusters_separation= [0, 1, 2]#define outlier detection tools to be comparedclassifiers = { "One-class SVM": SVM. ONECLASSSVM (nu=0.95 * outliers_fraction + 0.05, Kernel="RBF", gamma=0.1), "robust covariance": Ellipticenvelope (contamination=outliers_fraction),"Isolation Forest": Isolationforest (max_samples=N_samples, Contamination=outliers_fraction, Random_state=rng),"Local Outlier Factor": Localoutlierfactor (n_neighbors=35, Contamination=outliers_fraction)}#Compare given classifiers under given settingsxx, yy = Np.meshgrid (Np.linspace ( -7, 7, +), Np.linspace (-7, 7, 100)) N_inliers= Int ((1.-outliers_fraction) *n_samples) N_outliers= Int (Outliers_fraction *n_samples) Ground_truth= Np.ones (N_samples, dtype=int) ground_truth[-n_outliers:] = 1#Fit the problem with varying cluster separation forI, offsetinchEnumerate (clusters_separation): Np.random.seed (42) #Data GenerationX1 = 0.3 * NP.RANDOM.RANDN (N_inliers//2, 2)-offset X2= 0.3 * NP.RANDOM.RANDN (N_inliers//2, 2) +offset X=np.r_[x1, X2]#Add OutliersX = Np.r_[x, Np.random.uniform (low=-6, high=6, size= (N_outliers, 2))] #Fit the ModelPlt.figure (figsize= (9, 7)) forI, (Clf_name, CLF)inchEnumerate (Classifiers.items ()):#fit the data and tag outliers ifClf_name = ="Local Outlier Factor": y_pred=clf.fit_predict (X) scores_pred=Clf.negative_outlier_factor_Else: Clf.fit (X) scores_pred=clf.decision_function (X) y_pred=clf.predict (X)#Select the dividing line for the first 25% of the predetermined score as the threshold valueThreshold = Stats.scoreatpercentile (scores_pred,100 *outliers_fraction)#Calculation ErrorN_errors = (y_pred! =ground_truth). SUM ()#Draw Contour Lines ifClf_name = ="Local Outlier Factor": #decision_function is private for LOFZ =clf._decision_function (Np.c_[xx.ravel (), Yy.ravel ())Else: Z=clf.decision_function (Np.c_[xx.ravel (), Yy.ravel ()) Z=Z.reshape (xx.shape) subplot= Plt.subplot (2, 2, i + 1) Subplot.contourf (xx, yy, Z, Levels=np.linspace (Z.min (), Threshold, 7), CMap=plt.cm.Blues_r)#draw threshold boundaries with a red lineA = Subplot.contour (xx, yy, Z, levels=[threshold], linewidths=2, colors='Red') #fills the background in the threshold area with orangeSubplot.contourf (xx, yy, Z, levels=[Threshold, Z.max ()], colors='Orange') b= Subplot.scatter (x[:-n_outliers, 0], x[:-n_outliers, 1], c=' White', S=20, edgecolor='k') C= Subplot.scatter (x[-n_outliers:, 0], x[-n_outliers:, 1], c='Black', S=20, edgecolor='k') Subplot.axis ('Tight') Subplot.legend ([A.collections[0], B, c], ['learned decision function','true Inliers','true Outliers'], prop=matplotlib.font_manager. Fontproperties (size=10), loc='Lower Right') Subplot.set_xlabel ("%d.%s (errors:%d)"% (i + 1), Clf_name, n_errors)) Subplot.set_xlim ((-7, 7)) Subplot.set_ylim (-7, 7)) Plt.subplots_adjust (0.04, 0.1, 0.96, 0.94, 0.1, 0.26) Plt.suptitle ("Outlier Detection") plt.show ()
Some detection algorithms of anomaly points