PYTHON 19
AI LAB - Algorithms Guest on 15th April 2022 11:15:15 PM
  1. KNN:
  2. import numpy as np
  3. import matplotlib.pyplot as pit
  4. import pandas as pd
  5.  
  6.  
  7. # In[13]:
  8.  
  9.  
  10. url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
  11. names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
  12. dataset = pd.read_csv(url, names=names)
  13.  
  14.  
  15. # In[14]:
  16.  
  17.  
  18. dataset.head()
  19.  
  20.  
  21. # In[15]:
  22.  
  23.  
  24. X = dataset.iloc[:,:-1].values
  25. y = dataset.iloc[:, 4].values
  26.  
  27.  
  28. # In[16]:
  29.  
  30.  
  31. from sklearn.model_selection import train_test_split
  32. X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)
  33.  
  34.  
  35. # In[17]:
  36.  
  37.  
  38. from sklearn.preprocessing import StandardScaler
  39. scaler = StandardScaler()
  40. scaler.fit(X_train)
  41. X_train = scaler.transform(X_train)
  42. X_test = scaler.transform(X_test)
  43.  
  44.  
  45. # In[18]:
  46.  
  47.  
  48. from sklearn.neighbors import KNeighborsClassifier
  49. classifier = KNeighborsClassifier(n_neighbors=5)
  50. classifier.fit(X_train, y_train)
  51.  
  52.  
  53. # In[19]:
  54.  
  55.  
  56. y_pred = classifier.predict(X_test)
  57.  
  58.  
  59. # In[21]:
  60.  
  61.  
  62. from sklearn.metrics import classification_report, confusion_matrix
  63. print(classification_report(y_test, y_pred))
  64. print(confusion_matrix(y_test, y_pred))
  65.  
  66. Decision Tree:
  67. import numpy as np
  68. import pandas as pd
  69. from sklearn.model_selection import train_test_split
  70. from sklearn.tree import DecisionTreeClassifier
  71. from sklearn.metrics import accuracy_score
  72. from sklearn import tree
  73.  
  74.  
  75. # In[95]:
  76.  
  77.  
  78. balance_data = pd.read_csv("C:/Users/test.LAB/Desktop/balance.csv")
  79.  
  80.  
  81. # In[96]:
  82.  
  83.  
  84. balance_data.head()
  85.  
  86.  
  87. # In[97]:
  88.  
  89.  
  90. X = balance_data.values[:, 1:5]
  91. Y = balance_data.values[:, 0]
  92.  
  93.  
  94. # In[98]:
  95.  
  96.  
  97. X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)
  98.  
  99.  
  100. # In[99]:
  101.  
  102.  
  103. clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state=100, max_depth=3, min_samples_leaf=5)
  104. clf_entropy.fit(X_train, y_train)
  105.  
  106.  
  107. # In[100]:
  108.  
  109.  
  110. y_pred_en = clf_entropy.predict(X_test)
  111. y_pred_en
  112.  
  113.  
  114. # In[101]:
  115.  
  116.  
  117. print(("Accuracy is"), accuracy_score(y_test, y_pred_en)*100)
  118.  
  119. K Means Clustering:
  120.  
  121. import numpy as np
  122. import matplotlib.pyplot as plt
  123. import pandas as pd
  124.  
  125. #Importing the mall dataset with pandas
  126.  
  127. dataset = pd.read_csv('Mall_Customers.csv')
  128. X = dataset.iloc[:,[3,4]].values
  129.  
  130. # Using the elbow method to find the optimal number of clusters
  131.  
  132. from sklearn.cluster import KMeans
  133. wcss =[]
  134. for i in range (1,11):
  135.     kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter =300, n_init = 10, random_state = 0)
  136.     kmeans.fit(X)
  137.     wcss.append(kmeans.inertia_)
  138.  
  139. # Plot the graph to visualize the Elbow Method to find the optimal number of cluster  
  140. plt.plot(range(1,11),wcss)
  141. plt.title('The Elbow Method')
  142. plt.xlabel('Number of clusters')
  143. plt.ylabel('WCSS')
  144. plt.show()
  145.  
  146. # Applying KMeans to the dataset with the optimal number of cluster
  147.  
  148. kmeans=KMeans(n_clusters= 5, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
  149. y_kmeans = kmeans.fit_predict(X)
  150.  
  151. # Visualising the clusters
  152.  
  153. plt.scatter(X[Y_Kmeans == 0, 0], X[Y_Kmeans == 0,1],s = 100, c='red', label = 'Cluster 1')
  154.  
  155. plt.scatter(X[Y_Kmeans == 1, 0], X[Y_Kmeans == 1,1],s = 100, c='blue', label = 'Cluster 2')
  156.  
  157. plt.scatter(X[Y_Kmeans == 2, 0], X[Y_Kmeans == 2,1],s = 100, c='green', label = 'Cluster 3')
  158.  
  159. plt.scatter(X[Y_Kmeans == 3, 0], X[Y_Kmeans == 3,1],s = 100, c='cyan', label = 'Cluster 4')
  160.  
  161. plt.scatter(X[Y_Kmeans == 4, 0], X[Y_Kmeans == 4,1],s = 100, c='magenta', label = 'Cluster 5')
  162.  
  163. plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], s = 300, c = 'yellow', label = 'Centroids')
  164.    
  165. plt.title('Clusters of clients')
  166. plt.xlabel('Annual Income (k$)')
  167. plt.ylabel('Spending score (1-100)')
  168. plt.legend()
  169. plt.show()

Coding Base is for source code and general debugging text.

Login or Register to edit, delete and keep track of your pastes and more.

Raw Paste

Login or Register to edit or fork this paste. It's free.