From ef5a0a9f9f621e0550dc05ebddbae3c3eac8f352 Mon Sep 17 00:00:00 2001 From: Navan Chauhan Date: Sat, 18 Jan 2020 19:47:54 +0530 Subject: Publish deploy 2020-01-18 19:47 --- .../index.html | 214 ++++--- posts/2019-12-08-Splitting-Zips/index.html | 12 +- .../index.html | 30 +- .../index.html | 674 +++++++++++---------- posts/2019-12-22-Fake-News-Detector/index.html | 289 +++++---- .../index.html | 32 +- posts/index.html | 2 +- 7 files changed, 696 insertions(+), 557 deletions(-) (limited to 'posts') diff --git a/posts/2019-12-08-Image-Classifier-Tensorflow/index.html b/posts/2019-12-08-Image-Classifier-Tensorflow/index.html index a98de67..c835a5e 100644 --- a/posts/2019-12-08-Image-Classifier-Tensorflow/index.html +++ b/posts/2019-12-08-Image-Classifier-Tensorflow/index.html @@ -1,101 +1,123 @@ -Creating a Custom Image Classifier using Tensorflow 2.x and Keras for Detecting Malaria | Navan Chauhan
🕑 3 minute read.

Creating a Custom Image Classifier using Tensorflow 2.x and Keras for Detecting Malaria

Done during Google Code-In. Org: Tensorflow.

Imports

%tensorflow_version 2.x #This is for telling Colab that you want to use TF 2.0, ignore if running on local machine
+Creating a Custom Image Classifier using Tensorflow 2.x and Keras for Detecting Malaria | Navan Chauhan
🕑 4 minute read.

Creating a Custom Image Classifier using Tensorflow 2.x and Keras for Detecting Malaria

Done during Google Code-In. Org: Tensorflow.

Imports

%tensorflow_version 2.x #This is for telling Colab that you want to use TF 2.0, ignore if running on local machine -from PIL import Image # We use the PIL Library to resize images -import numpy as np -import os -import cv2 -import tensorflow as tf -from tensorflow.keras import datasets, layers, models -import pandas as pd -import matplotlib.pyplot as plt -from keras.models import Sequential -from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout -

Dataset

Fetching the Data

!wget ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
-!unzip cell_images.zip
-

Processing the Data

We resize all the images as 50x50 and add the numpy array of that image as well as their label names (Infected or Not) to common arrays.

data = []
-labels = []
+from PIL import Image # We use the PIL Library to resize images
+import numpy as np
+import os
+import cv2
+import tensorflow as tf
+from tensorflow.keras import datasets, layers, models
+import pandas as pd
+import matplotlib.pyplot as plt
+from keras.models import Sequential
+from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
+
-Parasitized = os.listdir("./cell_images/Parasitized/") -for parasite in Parasitized: - try: - image=cv2.imread("./cell_images/Parasitized/"+parasite) - image_from_array = Image.fromarray(image, 'RGB') - size_image = image_from_array.resize((50, 50)) - data.append(np.array(size_image)) - labels.append(0) - except AttributeError: - print("") +

Dataset

Fetching the Data

!wget ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip +!unzip cell_images.zip +
-Uninfected = os.listdir("./cell_images/Uninfected/") -for uninfect in Uninfected: - try: - image=cv2.imread("./cell_images/Uninfected/"+uninfect) - image_from_array = Image.fromarray(image, 'RGB') - size_image = image_from_array.resize((50, 50)) - data.append(np.array(size_image)) - labels.append(1) - except AttributeError: - print("") -

Splitting Data

df = np.array(data)
-labels = np.array(labels)
-(X_train, X_test) = df[(int)(0.1*len(df)):],df[:(int)(0.1*len(df))]
-(y_train, y_test) = labels[(int)(0.1*len(labels)):],labels[:(int)(0.1*len(labels))]
-
s=np.arange(X_train.shape[0])
-np.random.shuffle(s)
-X_train=X_train[s]
-y_train=y_train[s]
-X_train = X_train/255.0
-

Model

Creating Model

By creating a sequential model, we create a linear stack of layers.

Note: The input shape for the first layer is 50,50 which corresponds with the sizes of the resized images

model = models.Sequential()
-model.add(layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(50,50,3)))
-model.add(layers.MaxPooling2D(pool_size=2))
-model.add(layers.Conv2D(filters=32,kernel_size=2,padding='same',activation='relu'))
-model.add(layers.MaxPooling2D(pool_size=2))
-model.add(layers.Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
-model.add(layers.MaxPooling2D(pool_size=2))
-model.add(layers.Dropout(0.2))
-model.add(layers.Flatten())
-model.add(layers.Dense(500,activation="relu"))
-model.add(layers.Dropout(0.2))
-model.add(layers.Dense(2,activation="softmax"))#2 represent output layer neurons 
-model.summary()
-

Compiling Model

We use the adam optimiser as it is an adaptive learning rate optimization algorithm that's been designed specifically for training deep neural networks, which means it changes its learning rate automaticaly to get the best results

model.compile(optimizer="adam",
-              loss="sparse_categorical_crossentropy", 
-             metrics=["accuracy"])
-

Training Model

We train the model for 10 epochs on the training data and then validate it using the testing data

history = model.fit(X_train,y_train, epochs=10, validation_data=(X_test,y_test))
-
Train on 24803 samples, validate on 2755 samples
-Epoch 1/10
-24803/24803 [==============================] - 57s 2ms/sample - loss: 0.0786 - accuracy: 0.9729 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-Epoch 2/10
-24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0746 - accuracy: 0.9731 - val_loss: 0.0290 - val_accuracy: 0.9996
-Epoch 3/10
-24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0672 - accuracy: 0.9764 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-Epoch 4/10
-24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0601 - accuracy: 0.9789 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-Epoch 5/10
-24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0558 - accuracy: 0.9804 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-Epoch 6/10
-24803/24803 [==============================] - 57s 2ms/sample - loss: 0.0513 - accuracy: 0.9819 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-Epoch 7/10
-24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0452 - accuracy: 0.9849 - val_loss: 0.3190 - val_accuracy: 0.9985
-Epoch 8/10
-24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0404 - accuracy: 0.9858 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-Epoch 9/10
-24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0352 - accuracy: 0.9878 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-Epoch 10/10
-24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0373 - accuracy: 0.9865 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-

Results

accuracy = history.history['accuracy'][-1]*100
-loss = history.history['loss'][-1]*100
-val_accuracy = history.history['val_accuracy'][-1]*100
-val_loss = history.history['val_loss'][-1]*100
+

Processing the Data

We resize all the images as 50x50 and add the numpy array of that image as well as their label names (Infected or Not) to common arrays.

data = [] +labels = [] + +Parasitized = os.listdir("./cell_images/Parasitized/") +for parasite in Parasitized: + try: + image=cv2.imread("./cell_images/Parasitized/"+parasite) + image_from_array = Image.fromarray(image, 'RGB') + size_image = image_from_array.resize((50, 50)) + data.append(np.array(size_image)) + labels.append(0) + except AttributeError: + print("") + +Uninfected = os.listdir("./cell_images/Uninfected/") +for uninfect in Uninfected: + try: + image=cv2.imread("./cell_images/Uninfected/"+uninfect) + image_from_array = Image.fromarray(image, 'RGB') + size_image = image_from_array.resize((50, 50)) + data.append(np.array(size_image)) + labels.append(1) + except AttributeError: + print("") +
+ +

Splitting Data

df = np.array(data) +labels = np.array(labels) +(X_train, X_test) = df[(int)(0.1*len(df)):],df[:(int)(0.1*len(df))] +(y_train, y_test) = labels[(int)(0.1*len(labels)):],labels[:(int)(0.1*len(labels))] +
+ +
s=np.arange(X_train.shape[0]) +np.random.shuffle(s) +X_train=X_train[s] +y_train=y_train[s] +X_train = X_train/255.0 +
+ +

Model

Creating Model

By creating a sequential model, we create a linear stack of layers.

Note: The input shape for the first layer is 50,50 which corresponds with the sizes of the resized images

model = models.Sequential() +model.add(layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(50,50,3))) +model.add(layers.MaxPooling2D(pool_size=2)) +model.add(layers.Conv2D(filters=32,kernel_size=2,padding='same',activation='relu')) +model.add(layers.MaxPooling2D(pool_size=2)) +model.add(layers.Conv2D(filters=64,kernel_size=2,padding="same",activation="relu")) +model.add(layers.MaxPooling2D(pool_size=2)) +model.add(layers.Dropout(0.2)) +model.add(layers.Flatten()) +model.add(layers.Dense(500,activation="relu")) +model.add(layers.Dropout(0.2)) +model.add(layers.Dense(2,activation="softmax"))#2 represent output layer neurons +model.summary() +
+ +

Compiling Model

We use the adam optimiser as it is an adaptive learning rate optimization algorithm that's been designed specifically for training deep neural networks, which means it changes its learning rate automaticaly to get the best results

model.compile(optimizer="adam", + loss="sparse_categorical_crossentropy", + metrics=["accuracy"]) +
+ +

Training Model

We train the model for 10 epochs on the training data and then validate it using the testing data

history = model.fit(X_train,y_train, epochs=10, validation_data=(X_test,y_test)) +
+ +
Train on 24803 samples, validate on 2755 samples +Epoch 1/10 +24803/24803 [==============================] - 57s 2ms/sample - loss: 0.0786 - accuracy: 0.9729 - val_loss: 0.0000e+00 - val_accuracy: 1.0000 +Epoch 2/10 +24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0746 - accuracy: 0.9731 - val_loss: 0.0290 - val_accuracy: 0.9996 +Epoch 3/10 +24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0672 - accuracy: 0.9764 - val_loss: 0.0000e+00 - val_accuracy: 1.0000 +Epoch 4/10 +24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0601 - accuracy: 0.9789 - val_loss: 0.0000e+00 - val_accuracy: 1.0000 +Epoch 5/10 +24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0558 - accuracy: 0.9804 - val_loss: 0.0000e+00 - val_accuracy: 1.0000 +Epoch 6/10 +24803/24803 [==============================] - 57s 2ms/sample - loss: 0.0513 - accuracy: 0.9819 - val_loss: 0.0000e+00 - val_accuracy: 1.0000 +Epoch 7/10 +24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0452 - accuracy: 0.9849 - val_loss: 0.3190 - val_accuracy: 0.9985 +Epoch 8/10 +24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0404 - accuracy: 0.9858 - val_loss: 0.0000e+00 - val_accuracy: 1.0000 +Epoch 9/10 +24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0352 - accuracy: 0.9878 - val_loss: 0.0000e+00 - val_accuracy: 1.0000 +Epoch 10/10 +24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0373 - accuracy: 0.9865 - val_loss: 0.0000e+00 - val_accuracy: 1.0000 +
+ +

Results

accuracy = history.history['accuracy'][-1]*100 +loss = history.history['loss'][-1]*100 +val_accuracy = history.history['val_accuracy'][-1]*100 +val_loss = history.history['val_loss'][-1]*100 + +print( + 'Accuracy:', accuracy, + '\nLoss:', loss, + '\nValidation Accuracy:', val_accuracy, + '\nValidation Loss:', val_loss +) +
+ +
Accuracy: 98.64532351493835 +Loss: 3.732407123270176 +Validation Accuracy: 100.0 +Validation Loss: 0.0 +
-print( - 'Accuracy:', accuracy, - '\nLoss:', loss, - '\nValidation Accuracy:', val_accuracy, - '\nValidation Loss:', val_loss -) -
Accuracy: 98.64532351493835 
-Loss: 3.732407123270176 
-Validation Accuracy: 100.0 
-Validation Loss: 0.0
 

We have achieved 98% Accuracy!

Link to Colab Notebook

Tagged with:
\ No newline at end of file diff --git a/posts/2019-12-08-Splitting-Zips/index.html b/posts/2019-12-08-Splitting-Zips/index.html index be90fa6..35c3998 100644 --- a/posts/2019-12-08-Splitting-Zips/index.html +++ b/posts/2019-12-08-Splitting-Zips/index.html @@ -1,4 +1,10 @@ -Splitting ZIPs into Multiple Parts | Navan Chauhan
🕑 0 minute read.

Splitting ZIPs into Multiple Parts

Tested on macOS

Creating the archive:

zip -r -s 5 oodlesofnoodles.zip website/
-

5 stands for each split files' size (in mb, kb and gb can also be specified)

For encrypting the zip:

zip -er -s 5 oodlesofnoodles.zip website
-

Extracting Files

First we need to collect all parts, then

zip -F oodlesofnoodles.zip --out merged.zip
+Splitting ZIPs into Multiple Parts | Navan Chauhan
🕑 0 minute read.

Splitting ZIPs into Multiple Parts

Tested on macOS

Creating the archive:

zip -r -s 5 oodlesofnoodles.zip website/ +
+ +

5 stands for each split files' size (in mb, kb and gb can also be specified)

For encrypting the zip:

zip -er -s 5 oodlesofnoodles.zip website +
+ +

Extracting Files

First we need to collect all parts, then

zip -F oodlesofnoodles.zip --out merged.zip +
+
Tagged with:
\ No newline at end of file diff --git a/posts/2019-12-10-TensorFlow-Model-Prediction/index.html b/posts/2019-12-10-TensorFlow-Model-Prediction/index.html index aa51948..24da573 100644 --- a/posts/2019-12-10-TensorFlow-Model-Prediction/index.html +++ b/posts/2019-12-10-TensorFlow-Model-Prediction/index.html @@ -1,9 +1,23 @@ -Making Predictions using Image Classifier (TensorFlow) | Navan Chauhan
🕑 1 minute read.

Making Predictions using Image Classifier (TensorFlow)

This was tested on TF 2.x and works as of 2019-12-10

If you want to understand how to make your own custom image classifier, please refer to my previous post.

If you followed my last post, then you created a model which took an image of dimensions 50x50 as an input.

First we import the following if we have not imported these before

import cv2
-import os
-

Then we read the file using OpenCV.

image=cv2.imread(imagePath)
-

The cv2. imread() function returns a NumPy array representing the image. Therefore, we need to convert it before we can use it.

image_from_array = Image.fromarray(image, 'RGB')
-

Then we resize the image

size_image = image_from_array.resize((50,50))
-

After this we create a batch consisting of only one image

p = np.expand_dims(size_image, 0)
-

We then convert this uint8 datatype to a float32 datatype

img = tf.cast(p, tf.float32)
-

Finally we make the prediction

print(['Infected','Uninfected'][np.argmax(model.predict(img))])
+Making Predictions using Image Classifier (TensorFlow) | Navan Chauhan
🕑 1 minute read.

Making Predictions using Image Classifier (TensorFlow)

This was tested on TF 2.x and works as of 2019-12-10

If you want to understand how to make your own custom image classifier, please refer to my previous post.

If you followed my last post, then you created a model which took an image of dimensions 50x50 as an input.

First we import the following if we have not imported these before

import cv2 +import os +
+ +

Then we read the file using OpenCV.

image=cv2.imread(imagePath) +
+ +

The cv2. imread() function returns a NumPy array representing the image. Therefore, we need to convert it before we can use it.

image_from_array = Image.fromarray(image, 'RGB') +
+ +

Then we resize the image

size_image = image_from_array.resize((50,50)) +
+ +

After this we create a batch consisting of only one image

p = np.expand_dims(size_image, 0) +
+ +

We then convert this uint8 datatype to a float32 datatype

img = tf.cast(p, tf.float32) +
+ +

Finally we make the prediction

print(['Infected','Uninfected'][np.argmax(model.predict(img))]) +
+

Infected

Tagged with:
\ No newline at end of file diff --git a/posts/2019-12-16-TensorFlow-Polynomial-Regression/index.html b/posts/2019-12-16-TensorFlow-Polynomial-Regression/index.html index 37b0269..8872687 100644 --- a/posts/2019-12-16-TensorFlow-Polynomial-Regression/index.html +++ b/posts/2019-12-16-TensorFlow-Polynomial-Regression/index.html @@ -1,307 +1,369 @@ -Polynomial Regression Using TensorFlow | Navan Chauhan
🕑 15 minute read.

Polynomial Regression Using TensorFlow

In this tutorial you will learn about polynomial regression and how you can implement it in Tensorflow.

In this, we will be performing polynomial regression using 5 types of equations -

  • Linear
  • Quadratic
  • Cubic
  • Quartic
  • Quintic

Regression

What is Regression?

Regression is a statistical measurement that is used to try to determine the relationship between a dependent variable (often denoted by Y), and series of varying variables (called independent variables, often denoted by X ).

What is Polynomial Regression

This is a form of Regression Analysis where the relationship between Y and X is denoted as the nth degree/power of X. Polynomial regression even fits a non-linear relationship (e.g when the points don't form a straight line).

Imports

import tensorflow.compat.v1 as tf
-tf.disable_v2_behavior()
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-

Dataset

Creating Random Data

Even though in this tutorial we will use a Position Vs Salary datasset, it is important to know how to create synthetic data

To create 50 values spaced evenly between 0 and 50, we use NumPy's linspace funtion

linspace(lower_limit, upper_limit, no_of_observations)

x = np.linspace(0, 50, 50)
-y = np.linspace(0, 50, 50)
-

We use the following function to add noise to the data, so that our values

x += np.random.uniform(-4, 4, 50)
-y += np.random.uniform(-4, 4, 50)
-

Position vs Salary Dataset

We will be using https://drive.google.com/file/d/1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9/view (Salary vs Position Dataset)

!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
-
df = pd.read_csv("data.csv")
-
df # this gives us a preview of the dataset we are working with
-
| Position          | Level | Salary  |
-|-------------------|-------|---------|
-| Business Analyst  | 1     | 45000   |
-| Junior Consultant | 2     | 50000   |
-| Senior Consultant | 3     | 60000   |
-| Manager           | 4     | 80000   |
-| Country Manager   | 5     | 110000  |
-| Region Manager    | 6     | 150000  |
-| Partner           | 7     | 200000  |
-| Senior Partner    | 8     | 300000  |
-| C-level           | 9     | 500000  |
-| CEO               | 10    | 1000000 |
-

We convert the salary column as the ordinate (y-cordinate) and level column as the abscissa

abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
-ordinate = df["Salary"].to_list() # ordinate = [45000,50000,60000,80000,110000,150000,200000,300000,500000,1000000]
-
n = len(abscissa) # no of observations
-plt.scatter(abscissa, ordinate)
-plt.ylabel('Salary')
-plt.xlabel('Position')
-plt.title("Salary vs Position")
-plt.show()
-

Defining Stuff

X = tf.placeholder("float")
-Y = tf.placeholder("float")
-

Defining Variables

We first define all the coefficients and constant as tensorflow variables haveing a random intitial value

a = tf.Variable(np.random.randn(), name = "a")
-b = tf.Variable(np.random.randn(), name = "b")
-c = tf.Variable(np.random.randn(), name = "c")
-d = tf.Variable(np.random.randn(), name = "d")
-e = tf.Variable(np.random.randn(), name = "e")
-f = tf.Variable(np.random.randn(), name = "f")
-

Model Configuration

learning_rate = 0.2
-no_of_epochs = 25000
-

Equations

deg1 = a*X + b
-deg2 = a*tf.pow(X,2) + b*X + c
-deg3 = a*tf.pow(X,3) + b*tf.pow(X,2) + c*X + d
-deg4 = a*tf.pow(X,4) + b*tf.pow(X,3) + c*tf.pow(X,2) + d*X + e
-deg5 = a*tf.pow(X,5) + b*tf.pow(X,4) + c*tf.pow(X,3) + d*tf.pow(X,2) + e*X + f
-

Cost Function

We use the Mean Squared Error Function

mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
-mse2 = tf.reduce_sum(tf.pow(deg2-Y,2))/(2*n)
-mse3 = tf.reduce_sum(tf.pow(deg3-Y,2))/(2*n)
-mse4 = tf.reduce_sum(tf.pow(deg4-Y,2))/(2*n)
-mse5 = tf.reduce_sum(tf.pow(deg5-Y,2))/(2*n)
-

Optimizer

We use the AdamOptimizer for the polynomial functions and GradientDescentOptimizer for the linear function

optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
-optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(mse2)
-optimizer3 = tf.train.AdamOptimizer(learning_rate).minimize(mse3)
-optimizer4 = tf.train.AdamOptimizer(learning_rate).minimize(mse4)
-optimizer5 = tf.train.AdamOptimizer(learning_rate).minimize(mse5)
-
init=tf.global_variables_initializer()
-

Model Predictions

For each type of equation first we make the model predict the values of the coefficient(s) and constant, once we get these values we use it to predict the Y values using the X values. We then plot it to compare the actual data and predicted line.

Linear Equation

with tf.Session() as sess:
-    sess.run(init)
-    for epoch in range(no_of_epochs):
-      for (x,y) in zip(abscissa, ordinate):
-        sess.run(optimizer1, feed_dict={X:x, Y:y})
-      if (epoch+1)%1000==0:
-        cost = sess.run(mse1,feed_dict={X:abscissa,Y:ordinate})
-        print("Epoch",(epoch+1), ": Training Cost:", cost," a,b:",sess.run(a),sess.run(b))
-
-        training_cost = sess.run(mse1,feed_dict={X:abscissa,Y:ordinate})
-        coefficient1 = sess.run(a)
-        constant = sess.run(b)
-
-print(training_cost, coefficient1, constant)
-
Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 2000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 3000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 4000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 5000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 6000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 7000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 8000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 9000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 10000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 11000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 12000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 13000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 14000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 15000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 16000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 17000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 18000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 19000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 20000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 21000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 22000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 23000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 24000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-Epoch 25000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
-88999125000.0 180396.42 -478869.12
-
predictions = []
-for x in abscissa:
-  predictions.append((coefficient1*x + constant))
-plt.plot(abscissa , ordinate, 'ro', label ='Original data')
-plt.plot(abscissa, predictions, label ='Fitted line')
-plt.title('Linear Regression Result')
-plt.legend()
-plt.show()
-

Quadratic Equation

with tf.Session() as sess:
-    sess.run(init)
-    for epoch in range(no_of_epochs):
-      for (x,y) in zip(abscissa, ordinate):
-        sess.run(optimizer2, feed_dict={X:x, Y:y})
-      if (epoch+1)%1000==0:
-        cost = sess.run(mse2,feed_dict={X:abscissa,Y:ordinate})
-        print("Epoch",(epoch+1), ": Training Cost:", cost," a,b,c:",sess.run(a),sess.run(b),sess.run(c))
-
-        training_cost = sess.run(mse2,feed_dict={X:abscissa,Y:ordinate})
-        coefficient1 = sess.run(a)
-        coefficient2 = sess.run(b)
-        constant = sess.run(c)
-
-print(training_cost, coefficient1, coefficient2, constant)
-
Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
-Epoch 2000 : Training Cost: 37798890000.0  a,b,c: 1952.4263 2130.2825 2469.7756
-Epoch 3000 : Training Cost: 26751185000.0  a,b,c: 2839.5825 3081.6118 3554.351
-Epoch 4000 : Training Cost: 19020106000.0  a,b,c: 3644.56 3922.9563 4486.3135
-Epoch 5000 : Training Cost: 14060446000.0  a,b,c: 4345.042 4621.4233 5212.693
-Epoch 6000 : Training Cost: 11201084000.0  a,b,c: 4921.1855 5148.1504 5689.0713
-Epoch 7000 : Training Cost: 9732740000.0  a,b,c: 5364.764 5493.0156 5906.754
-Epoch 8000 : Training Cost: 9050918000.0  a,b,c: 5685.4067 5673.182 5902.0728
-Epoch 9000 : Training Cost: 8750394000.0  a,b,c: 5906.9814 5724.8906 5734.746
-Epoch 10000 : Training Cost: 8613128000.0  a,b,c: 6057.3677 5687.3364 5461.167
-Epoch 11000 : Training Cost: 8540034600.0  a,b,c: 6160.547 5592.3022 5122.8633
-Epoch 12000 : Training Cost: 8490983000.0  a,b,c: 6233.9175 5462.025 4747.111
-Epoch 13000 : Training Cost: 8450816500.0  a,b,c: 6289.048 5310.7583 4350.6997
-Epoch 14000 : Training Cost: 8414082000.0  a,b,c: 6333.199 5147.394 3943.9294
-Epoch 15000 : Training Cost: 8378841600.0  a,b,c: 6370.7944 4977.1704 3532.476
-Epoch 16000 : Training Cost: 8344471000.0  a,b,c: 6404.468 4803.542 3120.2087
-Epoch 17000 : Training Cost: 8310785500.0  a,b,c: 6435.365 4628.1523 2709.1445
-Epoch 18000 : Training Cost: 8277482000.0  a,b,c: 6465.5493 4451.833 2300.2783
-Epoch 19000 : Training Cost: 8244650000.0  a,b,c: 6494.609 4274.826 1894.3738
-Epoch 20000 : Training Cost: 8212349000.0  a,b,c: 6522.8247 4098.1733 1491.9915
-Epoch 21000 : Training Cost: 8180598300.0  a,b,c: 6550.6567 3922.7405 1093.3868
-Epoch 22000 : Training Cost: 8149257700.0  a,b,c: 6578.489 3747.8362 698.53357
-Epoch 23000 : Training Cost: 8118325000.0  a,b,c: 6606.1973 3573.2742 307.3541
-Epoch 24000 : Training Cost: 8088001000.0  a,b,c: 6632.96 3399.878 -79.89219
-Epoch 25000 : Training Cost: 8058094600.0  a,b,c: 6659.793 3227.2517 -463.03156
-8058094600.0 6659.793 3227.2517 -463.03156
-
predictions = []
-for x in abscissa:
-  predictions.append((coefficient1*pow(x,2) + coefficient2*x + constant))
-plt.plot(abscissa , ordinate, 'ro', label ='Original data')
-plt.plot(abscissa, predictions, label ='Fitted line')
-plt.title('Quadratic Regression Result')
-plt.legend()
-plt.show()
-

Cubic

with tf.Session() as sess:
-    sess.run(init)
-    for epoch in range(no_of_epochs):
-      for (x,y) in zip(abscissa, ordinate):
-        sess.run(optimizer3, feed_dict={X:x, Y:y})
-      if (epoch+1)%1000==0:
-        cost = sess.run(mse3,feed_dict={X:abscissa,Y:ordinate})
-        print("Epoch",(epoch+1), ": Training Cost:", cost," a,b,c,d:",sess.run(a),sess.run(b),sess.run(c),sess.run(d))
-
-        training_cost = sess.run(mse3,feed_dict={X:abscissa,Y:ordinate})
-        coefficient1 = sess.run(a)
-        coefficient2 = sess.run(b)
-        coefficient3 = sess.run(c)
-        constant = sess.run(d)
-
-print(training_cost, coefficient1, coefficient2, coefficient3, constant)
-
Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
-Epoch 2000 : Training Cost: 3770950400.0  a,b,c,d: 742.6414 666.3489 636.94525 859.2088
-Epoch 3000 : Training Cost: 3717708300.0  a,b,c,d: 756.2582 569.3339 448.105 748.23956
-Epoch 4000 : Training Cost: 3667464000.0  a,b,c,d: 769.4476 474.0318 265.5761 654.75525
-Epoch 5000 : Training Cost: 3620040700.0  a,b,c,d: 782.32324 380.54272 89.39888 578.5136
-Epoch 6000 : Training Cost: 3575265800.0  a,b,c,d: 794.8898 288.83356 -80.5215 519.13654
-Epoch 7000 : Training Cost: 3532972000.0  a,b,c,d: 807.1608 198.87044 -244.31102 476.2061
-Epoch 8000 : Training Cost: 3493009200.0  a,b,c,d: 819.13513 110.64169 -402.0677 449.3291
-Epoch 9000 : Training Cost: 3455228400.0  a,b,c,d: 830.80255 24.0964 -553.92804 438.0652
-Epoch 10000 : Training Cost: 3419475500.0  a,b,c,d: 842.21594 -60.797424 -700.0123 441.983
-Epoch 11000 : Training Cost: 3385625300.0  a,b,c,d: 853.3363 -144.08699 -840.467 460.6356
-Epoch 12000 : Training Cost: 3353544700.0  a,b,c,d: 864.19135 -225.8125 -975.4196 493.57703
-Epoch 13000 : Training Cost: 3323125000.0  a,b,c,d: 874.778 -305.98932 -1104.9867 540.39465
-Epoch 14000 : Training Cost: 3294257000.0  a,b,c,d: 885.1007 -384.63474 -1229.277 600.65607
-Epoch 15000 : Training Cost: 3266820000.0  a,b,c,d: 895.18823 -461.819 -1348.4417 673.9051
-Epoch 16000 : Training Cost: 3240736000.0  a,b,c,d: 905.0128 -537.541 -1462.6171 759.7118
-Epoch 17000 : Training Cost: 3215895000.0  a,b,c,d: 914.60065 -611.8676 -1571.9058 857.6638
-Epoch 18000 : Training Cost: 3192216800.0  a,b,c,d: 923.9603 -684.8093 -1676.4642 967.30475
-Epoch 19000 : Training Cost: 3169632300.0  a,b,c,d: 933.08594 -756.3582 -1776.4275 1088.2198
-Epoch 20000 : Training Cost: 3148046300.0  a,b,c,d: 941.9928 -826.6257 -1871.9355 1219.9702
-Epoch 21000 : Training Cost: 3127394800.0  a,b,c,d: 950.67896 -895.6205 -1963.0989 1362.1665
-Epoch 22000 : Training Cost: 3107608600.0  a,b,c,d: 959.1487 -963.38116 -2050.0586 1514.4026
-Epoch 23000 : Training Cost: 3088618200.0  a,b,c,d: 967.4355 -1029.9625 -2132.961 1676.2717
-Epoch 24000 : Training Cost: 3070361300.0  a,b,c,d: 975.52875 -1095.4292 -2211.854 1847.4485
-Epoch 25000 : Training Cost: 3052791300.0  a,b,c,d: 983.4346 -1159.7922 -2286.9412 2027.4857
-3052791300.0 983.4346 -1159.7922 -2286.9412 2027.4857
-
predictions = []
-for x in abscissa:
-  predictions.append((coefficient1*pow(x,3) + coefficient2*pow(x,2) + coefficient3*x + constant))
-plt.plot(abscissa , ordinate, 'ro', label ='Original data')
-plt.plot(abscissa, predictions, label ='Fitted line')
-plt.title('Cubic Regression Result')
-plt.legend()
-plt.show()
-

Quartic

with tf.Session() as sess:
-    sess.run(init)
-    for epoch in range(no_of_epochs):
-      for (x,y) in zip(abscissa, ordinate):
-        sess.run(optimizer4, feed_dict={X:x, Y:y})
-      if (epoch+1)%1000==0:
-        cost = sess.run(mse4,feed_dict={X:abscissa,Y:ordinate})
-        print("Epoch",(epoch+1), ": Training Cost:", cost," a,b,c,d:",sess.run(a),sess.run(b),sess.run(c),sess.run(d),sess.run(e))
-
-        training_cost = sess.run(mse4,feed_dict={X:abscissa,Y:ordinate})
-        coefficient1 = sess.run(a)
-        coefficient2 = sess.run(b)
-        coefficient3 = sess.run(c)
-        coefficient4 = sess.run(d)
-        constant = sess.run(e)
-
-print(training_cost, coefficient1, coefficient2, coefficient3, coefficient4, constant)
-
Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
-Epoch 2000 : Training Cost: 1854316200.0  a,b,c,d: 88.998955 13.073557 14.276088 223.55667 1056.4655
-Epoch 3000 : Training Cost: 1812812400.0  a,b,c,d: 92.9462 -22.331177 -15.262934 327.41858 1634.9054
-Epoch 4000 : Training Cost: 1775716000.0  a,b,c,d: 96.42522 -54.64535 -35.829437 449.5028 2239.1392
-Epoch 5000 : Training Cost: 1741494100.0  a,b,c,d: 99.524734 -84.43976 -49.181057 585.85876 2862.4915
-Epoch 6000 : Training Cost: 1709199600.0  a,b,c,d: 102.31984 -112.19895 -56.808075 733.1876 3499.6199
-Epoch 7000 : Training Cost: 1678261800.0  a,b,c,d: 104.87324 -138.32709 -59.9442 888.79626 4146.2944
-Epoch 8000 : Training Cost: 1648340600.0  a,b,c,d: 107.23536 -163.15173 -59.58964 1050.524 4798.979
-Epoch 9000 : Training Cost: 1619243400.0  a,b,c,d: 109.44742 -186.9409 -56.53944 1216.6432 5454.9463
-Epoch 10000 : Training Cost: 1590821900.0  a,b,c,d: 111.54233 -209.91287 -51.423084 1385.8513 6113.5137
-Epoch 11000 : Training Cost: 1563042200.0  a,b,c,d: 113.54405 -232.21953 -44.73371 1557.1084 6771.7046
-Epoch 12000 : Training Cost: 1535855600.0  a,b,c,d: 115.471565 -253.9838 -36.851135 1729.535 7429.069
-Epoch 13000 : Training Cost: 1509255300.0  a,b,c,d: 117.33939 -275.29697 -28.0714 1902.5308 8083.9634
-Epoch 14000 : Training Cost: 1483227000.0  a,b,c,d: 119.1605 -296.2472 -18.618649 2075.6094 8735.381
-Epoch 15000 : Training Cost: 1457726700.0  a,b,c,d: 120.94584 -316.915 -8.650095 2248.3247 9384.197
-Epoch 16000 : Training Cost: 1432777300.0  a,b,c,d: 122.69806 -337.30704 1.7027153 2420.5771 10028.871
-Epoch 17000 : Training Cost: 1408365000.0  a,b,c,d: 124.42179 -357.45245 12.33499 2592.2983 10669.157
-Epoch 18000 : Training Cost: 1384480000.0  a,b,c,d: 126.12332 -377.39734 23.168756 2763.0933 11305.027
-Epoch 19000 : Training Cost: 1361116800.0  a,b,c,d: 127.80568 -397.16415 34.160156 2933.0452 11935.669
-Epoch 20000 : Training Cost: 1338288100.0  a,b,c,d: 129.4674 -416.72803 45.259155 3101.7727 12561.179
-Epoch 21000 : Training Cost: 1315959700.0  a,b,c,d: 131.11403 -436.14285 56.4436 3269.3142 13182.058
-Epoch 22000 : Training Cost: 1294164700.0  a,b,c,d: 132.74377 -455.3779 67.6757 3435.3833 13796.807
-Epoch 23000 : Training Cost: 1272863600.0  a,b,c,d: 134.35779 -474.45316 78.96117 3600.264 14406.58
-Epoch 24000 : Training Cost: 1252052600.0  a,b,c,d: 135.9583 -493.38254 90.268616 3764.0078 15010.481
-Epoch 25000 : Training Cost: 1231713700.0  a,b,c,d: 137.54753 -512.1876 101.59372 3926.4897 15609.368
-1231713700.0 137.54753 -512.1876 101.59372 3926.4897 15609.368
-
predictions = []
-for x in abscissa:
-  predictions.append((coefficient1*pow(x,4) + coefficient2*pow(x,3) + coefficient3*pow(x,2) + coefficient4*x + constant))
-plt.plot(abscissa , ordinate, 'ro', label ='Original data')
-plt.plot(abscissa, predictions, label ='Fitted line')
-plt.title('Quartic Regression Result')
-plt.legend()
-plt.show()
-

Quintic

with tf.Session() as sess:
-    sess.run(init)
-    for epoch in range(no_of_epochs):
-      for (x,y) in zip(abscissa, ordinate):
-        sess.run(optimizer5, feed_dict={X:x, Y:y})
-      if (epoch+1)%1000==0:
-        cost = sess.run(mse5,feed_dict={X:abscissa,Y:ordinate})
-        print("Epoch",(epoch+1), ": Training Cost:", cost," a,b,c,d,e,f:",sess.run(a),sess.run(b),sess.run(c),sess.run(d),sess.run(e),sess.run(f))
-
-        training_cost = sess.run(mse5,feed_dict={X:abscissa,Y:ordinate})
-        coefficient1 = sess.run(a)
-        coefficient2 = sess.run(b)
-        coefficient3 = sess.run(c)
-        coefficient4 = sess.run(d)
-        coefficient5 = sess.run(e)
-        constant = sess.run(f)
-
Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
-Epoch 2000 : Training Cost: 1306882400.0  a,b,c,d,e,f: 8.732181 -4.0085897 73.25298 315.90103 904.08887 2004.9749
-Epoch 3000 : Training Cost: 1212606000.0  a,b,c,d,e,f: 9.732249 -16.90125 86.28379 437.06552 1305.055 2966.2188
-Epoch 4000 : Training Cost: 1123640400.0  a,b,c,d,e,f: 10.74851 -29.82692 98.59997 555.331 1698.4631 3917.9155
-Epoch 5000 : Training Cost: 1039694300.0  a,b,c,d,e,f: 11.75426 -42.598194 110.698326 671.64355 2085.5513 4860.8535
-Epoch 6000 : Training Cost: 960663550.0  a,b,c,d,e,f: 12.745439 -55.18337 122.644936 786.00214 2466.1638 5794.3735
-Epoch 7000 : Training Cost: 886438340.0  a,b,c,d,e,f: 13.721028 -67.57168 134.43822 898.3691 2839.9958 6717.659
-Epoch 8000 : Training Cost: 816913100.0  a,b,c,d,e,f: 14.679965 -79.75113 146.07385 1008.66895 3206.6692 7629.812
-Epoch 9000 : Training Cost: 751971500.0  a,b,c,d,e,f: 15.62181 -91.71608 157.55713 1116.7715 3565.8323 8529.976
-Epoch 10000 : Training Cost: 691508740.0  a,b,c,d,e,f: 16.545347 -103.4531 168.88321 1222.6348 3916.9785 9416.236
-Epoch 11000 : Training Cost: 635382000.0  a,b,c,d,e,f: 17.450052 -114.954254 180.03932 1326.1565 4259.842 10287.99
-Epoch 12000 : Training Cost: 583477250.0  a,b,c,d,e,f: 18.334944 -126.20821 191.02948 1427.2095 4593.8 11143.449
-Epoch 13000 : Training Cost: 535640400.0  a,b,c,d,e,f: 19.198917 -137.20206 201.84718 1525.6926 4918.5327 11981.633
-Epoch 14000 : Training Cost: 491722240.0  a,b,c,d,e,f: 20.041153 -147.92719 212.49709 1621.5496 5233.627 12800.468
-Epoch 15000 : Training Cost: 451559520.0  a,b,c,d,e,f: 20.860966 -158.37456 222.97133 1714.7141 5538.676 13598.337
-Epoch 16000 : Training Cost: 414988960.0  a,b,c,d,e,f: 21.657421 -168.53406 233.27422 1805.0874 5833.1978 14373.658
-Epoch 17000 : Training Cost: 381837920.0  a,b,c,d,e,f: 22.429693 -178.39536 243.39914 1892.5883 6116.847 15124.394
-Epoch 18000 : Training Cost: 351931300.0  a,b,c,d,e,f: 23.176882 -187.94789 253.3445 1977.137 6389.117 15848.417
-Epoch 19000 : Training Cost: 325074400.0  a,b,c,d,e,f: 23.898485 -197.18741 263.12512 2058.6716 6649.8037 16543.95
-Epoch 20000 : Training Cost: 301073570.0  a,b,c,d,e,f: 24.593851 -206.10497 272.72385 2137.1797 6898.544 17209.367
-Epoch 21000 : Training Cost: 279727000.0  a,b,c,d,e,f: 25.262104 -214.69217 282.14642 2212.6372 7135.217 17842.854
-Epoch 22000 : Training Cost: 260845550.0  a,b,c,d,e,f: 25.903376 -222.94969 291.4003 2284.9844 7359.4644 18442.408
-Epoch 23000 : Training Cost: 244218030.0  a,b,c,d,e,f: 26.517094 -230.8697 300.45532 2354.3003 7571.261 19007.49
-Epoch 24000 : Training Cost: 229660080.0  a,b,c,d,e,f: 27.102589 -238.44817 309.35342 2420.4185 7770.5728 19536.19
-Epoch 25000 : Training Cost: 216972400.0  a,b,c,d,e,f: 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
-216972400.0 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
-
predictions = []
-for x in abscissa:
-  predictions.append((coefficient1*pow(x,5) + coefficient2*pow(x,4) + coefficient3*pow(x,3) + coefficient4*pow(x,2) + coefficient5*x + constant))
-plt.plot(abscissa , ordinate, 'ro', label ='Original data')
-plt.plot(abscissa, predictions, label ='Fitted line')
-plt.title('Quintic Regression Result')
-plt.legend()
-plt.show()
+Polynomial Regression Using TensorFlow | Navan Chauhan
🕑 16 minute read.

Polynomial Regression Using TensorFlow

In this tutorial you will learn about polynomial regression and how you can implement it in Tensorflow.

In this, we will be performing polynomial regression using 5 types of equations -

  • Linear
  • Quadratic
  • Cubic
  • Quartic
  • Quintic

Regression

What is Regression?

Regression is a statistical measurement that is used to try to determine the relationship between a dependent variable (often denoted by Y), and series of varying variables (called independent variables, often denoted by X ).

What is Polynomial Regression

This is a form of Regression Analysis where the relationship between Y and X is denoted as the nth degree/power of X. Polynomial regression even fits a non-linear relationship (e.g when the points don't form a straight line).

Imports

import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +
+ +

Dataset

Creating Random Data

Even though in this tutorial we will use a Position Vs Salary datasset, it is important to know how to create synthetic data

To create 50 values spaced evenly between 0 and 50, we use NumPy's linspace funtion

linspace(lower_limit, upper_limit, no_of_observations)

x = np.linspace(0, 50, 50) +y = np.linspace(0, 50, 50) +
+ +

We use the following function to add noise to the data, so that our values

x += np.random.uniform(-4, 4, 50) +y += np.random.uniform(-4, 4, 50) +
+ +

Position vs Salary Dataset

We will be using https://drive.google.com/file/d/1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9/view (Salary vs Position Dataset)

!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv +
+ +
df = pd.read_csv("data.csv") +
+ +
df # this gives us a preview of the dataset we are working with +
+ +
| Position | Level | Salary | +|-------------------|-------|---------| +| Business Analyst | 1 | 45000 | +| Junior Consultant | 2 | 50000 | +| Senior Consultant | 3 | 60000 | +| Manager | 4 | 80000 | +| Country Manager | 5 | 110000 | +| Region Manager | 6 | 150000 | +| Partner | 7 | 200000 | +| Senior Partner | 8 | 300000 | +| C-level | 9 | 500000 | +| CEO | 10 | 1000000 | +
+ +

We convert the salary column as the ordinate (y-cordinate) and level column as the abscissa

abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10] +ordinate = df["Salary"].to_list() # ordinate = [45000,50000,60000,80000,110000,150000,200000,300000,500000,1000000] +
+ +
n = len(abscissa) # no of observations +plt.scatter(abscissa, ordinate) +plt.ylabel('Salary') +plt.xlabel('Position') +plt.title("Salary vs Position") +plt.show() +
+ +

Defining Stuff

X = tf.placeholder("float") +Y = tf.placeholder("float") +
+ +

Defining Variables

We first define all the coefficients and constant as tensorflow variables haveing a random intitial value

a = tf.Variable(np.random.randn(), name = "a") +b = tf.Variable(np.random.randn(), name = "b") +c = tf.Variable(np.random.randn(), name = "c") +d = tf.Variable(np.random.randn(), name = "d") +e = tf.Variable(np.random.randn(), name = "e") +f = tf.Variable(np.random.randn(), name = "f") +
+ +

Model Configuration

learning_rate = 0.2 +no_of_epochs = 25000 +
+ +

Equations

deg1 = a*X + b +deg2 = a*tf.pow(X,2) + b*X + c +deg3 = a*tf.pow(X,3) + b*tf.pow(X,2) + c*X + d +deg4 = a*tf.pow(X,4) + b*tf.pow(X,3) + c*tf.pow(X,2) + d*X + e +deg5 = a*tf.pow(X,5) + b*tf.pow(X,4) + c*tf.pow(X,3) + d*tf.pow(X,2) + e*X + f +
+ +

Cost Function

We use the Mean Squared Error Function

mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n) +mse2 = tf.reduce_sum(tf.pow(deg2-Y,2))/(2*n) +mse3 = tf.reduce_sum(tf.pow(deg3-Y,2))/(2*n) +mse4 = tf.reduce_sum(tf.pow(deg4-Y,2))/(2*n) +mse5 = tf.reduce_sum(tf.pow(deg5-Y,2))/(2*n) +
+ +

Optimizer

We use the AdamOptimizer for the polynomial functions and GradientDescentOptimizer for the linear function

optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1) +optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(mse2) +optimizer3 = tf.train.AdamOptimizer(learning_rate).minimize(mse3) +optimizer4 = tf.train.AdamOptimizer(learning_rate).minimize(mse4) +optimizer5 = tf.train.AdamOptimizer(learning_rate).minimize(mse5) +
+ +
init=tf.global_variables_initializer() +
+ +

Model Predictions

For each type of equation first we make the model predict the values of the coefficient(s) and constant, once we get these values we use it to predict the Y values using the X values. We then plot it to compare the actual data and predicted line.

Linear Equation

with tf.Session() as sess: + sess.run(init) + for epoch in range(no_of_epochs): + for (x,y) in zip(abscissa, ordinate): + sess.run(optimizer1, feed_dict={X:x, Y:y}) + if (epoch+1)%1000==0: + cost = sess.run(mse1,feed_dict={X:abscissa,Y:ordinate}) + print("Epoch",(epoch+1), ": Training Cost:", cost," a,b:",sess.run(a),sess.run(b)) + + training_cost = sess.run(mse1,feed_dict={X:abscissa,Y:ordinate}) + coefficient1 = sess.run(a) + constant = sess.run(b) + +print(training_cost, coefficient1, constant) +
+ +
Epoch 1000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 2000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 3000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 4000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 5000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 6000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 7000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 8000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 9000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 10000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 11000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 12000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 13000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 14000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 15000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 16000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 17000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 18000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 19000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 20000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 21000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 22000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 23000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 24000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +Epoch 25000 : Training Cost: 88999125000.0 a,b: 180396.42 -478869.12 +88999125000.0 180396.42 -478869.12 +
+ +
predictions = [] +for x in abscissa: + predictions.append((coefficient1*x + constant)) +plt.plot(abscissa , ordinate, 'ro', label ='Original data') +plt.plot(abscissa, predictions, label ='Fitted line') +plt.title('Linear Regression Result') +plt.legend() +plt.show() +
+ +

Quadratic Equation

with tf.Session() as sess: + sess.run(init) + for epoch in range(no_of_epochs): + for (x,y) in zip(abscissa, ordinate): + sess.run(optimizer2, feed_dict={X:x, Y:y}) + if (epoch+1)%1000==0: + cost = sess.run(mse2,feed_dict={X:abscissa,Y:ordinate}) + print("Epoch",(epoch+1), ": Training Cost:", cost," a,b,c:",sess.run(a),sess.run(b),sess.run(c)) + + training_cost = sess.run(mse2,feed_dict={X:abscissa,Y:ordinate}) + coefficient1 = sess.run(a) + coefficient2 = sess.run(b) + constant = sess.run(c) + +print(training_cost, coefficient1, coefficient2, constant) +
+ +
Epoch 1000 : Training Cost: 52571360000.0 a,b,c: 1002.4456 1097.0197 1276.6921 +Epoch 2000 : Training Cost: 37798890000.0 a,b,c: 1952.4263 2130.2825 2469.7756 +Epoch 3000 : Training Cost: 26751185000.0 a,b,c: 2839.5825 3081.6118 3554.351 +Epoch 4000 : Training Cost: 19020106000.0 a,b,c: 3644.56 3922.9563 4486.3135 +Epoch 5000 : Training Cost: 14060446000.0 a,b,c: 4345.042 4621.4233 5212.693 +Epoch 6000 : Training Cost: 11201084000.0 a,b,c: 4921.1855 5148.1504 5689.0713 +Epoch 7000 : Training Cost: 9732740000.0 a,b,c: 5364.764 5493.0156 5906.754 +Epoch 8000 : Training Cost: 9050918000.0 a,b,c: 5685.4067 5673.182 5902.0728 +Epoch 9000 : Training Cost: 8750394000.0 a,b,c: 5906.9814 5724.8906 5734.746 +Epoch 10000 : Training Cost: 8613128000.0 a,b,c: 6057.3677 5687.3364 5461.167 +Epoch 11000 : Training Cost: 8540034600.0 a,b,c: 6160.547 5592.3022 5122.8633 +Epoch 12000 : Training Cost: 8490983000.0 a,b,c: 6233.9175 5462.025 4747.111 +Epoch 13000 : Training Cost: 8450816500.0 a,b,c: 6289.048 5310.7583 4350.6997 +Epoch 14000 : Training Cost: 8414082000.0 a,b,c: 6333.199 5147.394 3943.9294 +Epoch 15000 : Training Cost: 8378841600.0 a,b,c: 6370.7944 4977.1704 3532.476 +Epoch 16000 : Training Cost: 8344471000.0 a,b,c: 6404.468 4803.542 3120.2087 +Epoch 17000 : Training Cost: 8310785500.0 a,b,c: 6435.365 4628.1523 2709.1445 +Epoch 18000 : Training Cost: 8277482000.0 a,b,c: 6465.5493 4451.833 2300.2783 +Epoch 19000 : Training Cost: 8244650000.0 a,b,c: 6494.609 4274.826 1894.3738 +Epoch 20000 : Training Cost: 8212349000.0 a,b,c: 6522.8247 4098.1733 1491.9915 +Epoch 21000 : Training Cost: 8180598300.0 a,b,c: 6550.6567 3922.7405 1093.3868 +Epoch 22000 : Training Cost: 8149257700.0 a,b,c: 6578.489 3747.8362 698.53357 +Epoch 23000 : Training Cost: 8118325000.0 a,b,c: 6606.1973 3573.2742 307.3541 +Epoch 24000 : Training Cost: 8088001000.0 a,b,c: 6632.96 3399.878 -79.89219 +Epoch 25000 : Training Cost: 8058094600.0 a,b,c: 6659.793 3227.2517 -463.03156 +8058094600.0 6659.793 3227.2517 -463.03156 +
+ +
predictions = [] +for x in abscissa: + predictions.append((coefficient1*pow(x,2) + coefficient2*x + constant)) +plt.plot(abscissa , ordinate, 'ro', label ='Original data') +plt.plot(abscissa, predictions, label ='Fitted line') +plt.title('Quadratic Regression Result') +plt.legend() +plt.show() +
+ +

Cubic

with tf.Session() as sess: + sess.run(init) + for epoch in range(no_of_epochs): + for (x,y) in zip(abscissa, ordinate): + sess.run(optimizer3, feed_dict={X:x, Y:y}) + if (epoch+1)%1000==0: + cost = sess.run(mse3,feed_dict={X:abscissa,Y:ordinate}) + print("Epoch",(epoch+1), ": Training Cost:", cost," a,b,c,d:",sess.run(a),sess.run(b),sess.run(c),sess.run(d)) + + training_cost = sess.run(mse3,feed_dict={X:abscissa,Y:ordinate}) + coefficient1 = sess.run(a) + coefficient2 = sess.run(b) + coefficient3 = sess.run(c) + constant = sess.run(d) + +print(training_cost, coefficient1, coefficient2, coefficient3, constant) +
+ +
Epoch 1000 : Training Cost: 4279814000.0 a,b,c,d: 670.1527 694.4212 751.4653 903.9527 +Epoch 2000 : Training Cost: 3770950400.0 a,b,c,d: 742.6414 666.3489 636.94525 859.2088 +Epoch 3000 : Training Cost: 3717708300.0 a,b,c,d: 756.2582 569.3339 448.105 748.23956 +Epoch 4000 : Training Cost: 3667464000.0 a,b,c,d: 769.4476 474.0318 265.5761 654.75525 +Epoch 5000 : Training Cost: 3620040700.0 a,b,c,d: 782.32324 380.54272 89.39888 578.5136 +Epoch 6000 : Training Cost: 3575265800.0 a,b,c,d: 794.8898 288.83356 -80.5215 519.13654 +Epoch 7000 : Training Cost: 3532972000.0 a,b,c,d: 807.1608 198.87044 -244.31102 476.2061 +Epoch 8000 : Training Cost: 3493009200.0 a,b,c,d: 819.13513 110.64169 -402.0677 449.3291 +Epoch 9000 : Training Cost: 3455228400.0 a,b,c,d: 830.80255 24.0964 -553.92804 438.0652 +Epoch 10000 : Training Cost: 3419475500.0 a,b,c,d: 842.21594 -60.797424 -700.0123 441.983 +Epoch 11000 : Training Cost: 3385625300.0 a,b,c,d: 853.3363 -144.08699 -840.467 460.6356 +Epoch 12000 : Training Cost: 3353544700.0 a,b,c,d: 864.19135 -225.8125 -975.4196 493.57703 +Epoch 13000 : Training Cost: 3323125000.0 a,b,c,d: 874.778 -305.98932 -1104.9867 540.39465 +Epoch 14000 : Training Cost: 3294257000.0 a,b,c,d: 885.1007 -384.63474 -1229.277 600.65607 +Epoch 15000 : Training Cost: 3266820000.0 a,b,c,d: 895.18823 -461.819 -1348.4417 673.9051 +Epoch 16000 : Training Cost: 3240736000.0 a,b,c,d: 905.0128 -537.541 -1462.6171 759.7118 +Epoch 17000 : Training Cost: 3215895000.0 a,b,c,d: 914.60065 -611.8676 -1571.9058 857.6638 +Epoch 18000 : Training Cost: 3192216800.0 a,b,c,d: 923.9603 -684.8093 -1676.4642 967.30475 +Epoch 19000 : Training Cost: 3169632300.0 a,b,c,d: 933.08594 -756.3582 -1776.4275 1088.2198 +Epoch 20000 : Training Cost: 3148046300.0 a,b,c,d: 941.9928 -826.6257 -1871.9355 1219.9702 +Epoch 21000 : Training Cost: 3127394800.0 a,b,c,d: 950.67896 -895.6205 -1963.0989 1362.1665 +Epoch 22000 : Training Cost: 3107608600.0 a,b,c,d: 959.1487 -963.38116 -2050.0586 1514.4026 +Epoch 23000 : Training Cost: 3088618200.0 a,b,c,d: 967.4355 -1029.9625 -2132.961 1676.2717 +Epoch 24000 : Training Cost: 3070361300.0 a,b,c,d: 975.52875 -1095.4292 -2211.854 1847.4485 +Epoch 25000 : Training Cost: 3052791300.0 a,b,c,d: 983.4346 -1159.7922 -2286.9412 2027.4857 +3052791300.0 983.4346 -1159.7922 -2286.9412 2027.4857 +
+ +
predictions = [] +for x in abscissa: + predictions.append((coefficient1*pow(x,3) + coefficient2*pow(x,2) + coefficient3*x + constant)) +plt.plot(abscissa , ordinate, 'ro', label ='Original data') +plt.plot(abscissa, predictions, label ='Fitted line') +plt.title('Cubic Regression Result') +plt.legend() +plt.show() +
+ +

Quartic

with tf.Session() as sess: + sess.run(init) + for epoch in range(no_of_epochs): + for (x,y) in zip(abscissa, ordinate): + sess.run(optimizer4, feed_dict={X:x, Y:y}) + if (epoch+1)%1000==0: + cost = sess.run(mse4,feed_dict={X:abscissa,Y:ordinate}) + print("Epoch",(epoch+1), ": Training Cost:", cost," a,b,c,d:",sess.run(a),sess.run(b),sess.run(c),sess.run(d),sess.run(e)) + + training_cost = sess.run(mse4,feed_dict={X:abscissa,Y:ordinate}) + coefficient1 = sess.run(a) + coefficient2 = sess.run(b) + coefficient3 = sess.run(c) + coefficient4 = sess.run(d) + constant = sess.run(e) + +print(training_cost, coefficient1, coefficient2, coefficient3, coefficient4, constant) +
+ +
Epoch 1000 : Training Cost: 1902632600.0 a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343 +Epoch 2000 : Training Cost: 1854316200.0 a,b,c,d: 88.998955 13.073557 14.276088 223.55667 1056.4655 +Epoch 3000 : Training Cost: 1812812400.0 a,b,c,d: 92.9462 -22.331177 -15.262934 327.41858 1634.9054 +Epoch 4000 : Training Cost: 1775716000.0 a,b,c,d: 96.42522 -54.64535 -35.829437 449.5028 2239.1392 +Epoch 5000 : Training Cost: 1741494100.0 a,b,c,d: 99.524734 -84.43976 -49.181057 585.85876 2862.4915 +Epoch 6000 : Training Cost: 1709199600.0 a,b,c,d: 102.31984 -112.19895 -56.808075 733.1876 3499.6199 +Epoch 7000 : Training Cost: 1678261800.0 a,b,c,d: 104.87324 -138.32709 -59.9442 888.79626 4146.2944 +Epoch 8000 : Training Cost: 1648340600.0 a,b,c,d: 107.23536 -163.15173 -59.58964 1050.524 4798.979 +Epoch 9000 : Training Cost: 1619243400.0 a,b,c,d: 109.44742 -186.9409 -56.53944 1216.6432 5454.9463 +Epoch 10000 : Training Cost: 1590821900.0 a,b,c,d: 111.54233 -209.91287 -51.423084 1385.8513 6113.5137 +Epoch 11000 : Training Cost: 1563042200.0 a,b,c,d: 113.54405 -232.21953 -44.73371 1557.1084 6771.7046 +Epoch 12000 : Training Cost: 1535855600.0 a,b,c,d: 115.471565 -253.9838 -36.851135 1729.535 7429.069 +Epoch 13000 : Training Cost: 1509255300.0 a,b,c,d: 117.33939 -275.29697 -28.0714 1902.5308 8083.9634 +Epoch 14000 : Training Cost: 1483227000.0 a,b,c,d: 119.1605 -296.2472 -18.618649 2075.6094 8735.381 +Epoch 15000 : Training Cost: 1457726700.0 a,b,c,d: 120.94584 -316.915 -8.650095 2248.3247 9384.197 +Epoch 16000 : Training Cost: 1432777300.0 a,b,c,d: 122.69806 -337.30704 1.7027153 2420.5771 10028.871 +Epoch 17000 : Training Cost: 1408365000.0 a,b,c,d: 124.42179 -357.45245 12.33499 2592.2983 10669.157 +Epoch 18000 : Training Cost: 1384480000.0 a,b,c,d: 126.12332 -377.39734 23.168756 2763.0933 11305.027 +Epoch 19000 : Training Cost: 1361116800.0 a,b,c,d: 127.80568 -397.16415 34.160156 2933.0452 11935.669 +Epoch 20000 : Training Cost: 1338288100.0 a,b,c,d: 129.4674 -416.72803 45.259155 3101.7727 12561.179 +Epoch 21000 : Training Cost: 1315959700.0 a,b,c,d: 131.11403 -436.14285 56.4436 3269.3142 13182.058 +Epoch 22000 : Training Cost: 1294164700.0 a,b,c,d: 132.74377 -455.3779 67.6757 3435.3833 13796.807 +Epoch 23000 : Training Cost: 1272863600.0 a,b,c,d: 134.35779 -474.45316 78.96117 3600.264 14406.58 +Epoch 24000 : Training Cost: 1252052600.0 a,b,c,d: 135.9583 -493.38254 90.268616 3764.0078 15010.481 +Epoch 25000 : Training Cost: 1231713700.0 a,b,c,d: 137.54753 -512.1876 101.59372 3926.4897 15609.368 +1231713700.0 137.54753 -512.1876 101.59372 3926.4897 15609.368 +
+ +
predictions = [] +for x in abscissa: + predictions.append((coefficient1*pow(x,4) + coefficient2*pow(x,3) + coefficient3*pow(x,2) + coefficient4*x + constant)) +plt.plot(abscissa , ordinate, 'ro', label ='Original data') +plt.plot(abscissa, predictions, label ='Fitted line') +plt.title('Quartic Regression Result') +plt.legend() +plt.show() +
+ +

Quintic

with tf.Session() as sess: + sess.run(init) + for epoch in range(no_of_epochs): + for (x,y) in zip(abscissa, ordinate): + sess.run(optimizer5, feed_dict={X:x, Y:y}) + if (epoch+1)%1000==0: + cost = sess.run(mse5,feed_dict={X:abscissa,Y:ordinate}) + print("Epoch",(epoch+1), ": Training Cost:", cost," a,b,c,d,e,f:",sess.run(a),sess.run(b),sess.run(c),sess.run(d),sess.run(e),sess.run(f)) + + training_cost = sess.run(mse5,feed_dict={X:abscissa,Y:ordinate}) + coefficient1 = sess.run(a) + coefficient2 = sess.run(b) + coefficient3 = sess.run(c) + coefficient4 = sess.run(d) + coefficient5 = sess.run(e) + constant = sess.run(f) +
+ +
Epoch 1000 : Training Cost: 1409200100.0 a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083 +Epoch 2000 : Training Cost: 1306882400.0 a,b,c,d,e,f: 8.732181 -4.0085897 73.25298 315.90103 904.08887 2004.9749 +Epoch 3000 : Training Cost: 1212606000.0 a,b,c,d,e,f: 9.732249 -16.90125 86.28379 437.06552 1305.055 2966.2188 +Epoch 4000 : Training Cost: 1123640400.0 a,b,c,d,e,f: 10.74851 -29.82692 98.59997 555.331 1698.4631 3917.9155 +Epoch 5000 : Training Cost: 1039694300.0 a,b,c,d,e,f: 11.75426 -42.598194 110.698326 671.64355 2085.5513 4860.8535 +Epoch 6000 : Training Cost: 960663550.0 a,b,c,d,e,f: 12.745439 -55.18337 122.644936 786.00214 2466.1638 5794.3735 +Epoch 7000 : Training Cost: 886438340.0 a,b,c,d,e,f: 13.721028 -67.57168 134.43822 898.3691 2839.9958 6717.659 +Epoch 8000 : Training Cost: 816913100.0 a,b,c,d,e,f: 14.679965 -79.75113 146.07385 1008.66895 3206.6692 7629.812 +Epoch 9000 : Training Cost: 751971500.0 a,b,c,d,e,f: 15.62181 -91.71608 157.55713 1116.7715 3565.8323 8529.976 +Epoch 10000 : Training Cost: 691508740.0 a,b,c,d,e,f: 16.545347 -103.4531 168.88321 1222.6348 3916.9785 9416.236 +Epoch 11000 : Training Cost: 635382000.0 a,b,c,d,e,f: 17.450052 -114.954254 180.03932 1326.1565 4259.842 10287.99 +Epoch 12000 : Training Cost: 583477250.0 a,b,c,d,e,f: 18.334944 -126.20821 191.02948 1427.2095 4593.8 11143.449 +Epoch 13000 : Training Cost: 535640400.0 a,b,c,d,e,f: 19.198917 -137.20206 201.84718 1525.6926 4918.5327 11981.633 +Epoch 14000 : Training Cost: 491722240.0 a,b,c,d,e,f: 20.041153 -147.92719 212.49709 1621.5496 5233.627 12800.468 +Epoch 15000 : Training Cost: 451559520.0 a,b,c,d,e,f: 20.860966 -158.37456 222.97133 1714.7141 5538.676 13598.337 +Epoch 16000 : Training Cost: 414988960.0 a,b,c,d,e,f: 21.657421 -168.53406 233.27422 1805.0874 5833.1978 14373.658 +Epoch 17000 : Training Cost: 381837920.0 a,b,c,d,e,f: 22.429693 -178.39536 243.39914 1892.5883 6116.847 15124.394 +Epoch 18000 : Training Cost: 351931300.0 a,b,c,d,e,f: 23.176882 -187.94789 253.3445 1977.137 6389.117 15848.417 +Epoch 19000 : Training Cost: 325074400.0 a,b,c,d,e,f: 23.898485 -197.18741 263.12512 2058.6716 6649.8037 16543.95 +Epoch 20000 : Training Cost: 301073570.0 a,b,c,d,e,f: 24.593851 -206.10497 272.72385 2137.1797 6898.544 17209.367 +Epoch 21000 : Training Cost: 279727000.0 a,b,c,d,e,f: 25.262104 -214.69217 282.14642 2212.6372 7135.217 17842.854 +Epoch 22000 : Training Cost: 260845550.0 a,b,c,d,e,f: 25.903376 -222.94969 291.4003 2284.9844 7359.4644 18442.408 +Epoch 23000 : Training Cost: 244218030.0 a,b,c,d,e,f: 26.517094 -230.8697 300.45532 2354.3003 7571.261 19007.49 +Epoch 24000 : Training Cost: 229660080.0 a,b,c,d,e,f: 27.102589 -238.44817 309.35342 2420.4185 7770.5728 19536.19 +Epoch 25000 : Training Cost: 216972400.0 a,b,c,d,e,f: 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707 +216972400.0 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707 +
+ +
predictions = [] +for x in abscissa: + predictions.append((coefficient1*pow(x,5) + coefficient2*pow(x,4) + coefficient3*pow(x,3) + coefficient4*pow(x,2) + coefficient5*x + constant)) +plt.plot(abscissa , ordinate, 'ro', label ='Original data') +plt.plot(abscissa, predictions, label ='Fitted line') +plt.title('Quintic Regression Result') +plt.legend() +plt.show() +
+

Results and Conclusion

You just learnt Polynomial Regression using TensorFlow!

Notes

Overfitting

> Overfitting refers to a model that models the training data too well.Overfitting happens when a model learns the detail and noise in the training data to the extent that it negatively impacts the performance of the model on new data. This means that the noise or random fluctuations in the training data is picked up and learned as concepts by the model. The problem is that these concepts do not apply to new data and negatively impact the models ability to generalize.

Source: Machine Learning Mastery

Basically if you train your machine learning model on a small dataset for a really large number of epochs, the model will learn all the deformities/noise in the data and will actually think that it is a normal part. Therefore when it will see some new data, it will discard that new data as noise and will impact the accuracy of the model in a negative manner

Tagged with:
\ No newline at end of file diff --git a/posts/2019-12-22-Fake-News-Detector/index.html b/posts/2019-12-22-Fake-News-Detector/index.html index 6eb49ff..5b46c95 100644 --- a/posts/2019-12-22-Fake-News-Detector/index.html +++ b/posts/2019-12-22-Fake-News-Detector/index.html @@ -1,142 +1,173 @@ -Building a Fake News Detector with Turicreate | Navan Chauhan
🕑 6 minute read.

Building a Fake News Detector with Turicreate

In this tutorial we will build a fake news detecting app from scratch, using Turicreate for the machine learning model and SwiftUI for building the app

Note: These commands are written as if you are running a jupyter notebook.

Building the Machine Learning Model

Data Gathering

To build a classifier, you need a lot of data. George McIntire (GH: @joolsa) has created a wonderful dataset containing the headline, body and wheter it is fake or real. Whenever you are looking for a dataset, always try searching on Kaggle and GitHub before you start building your own

Dependencies

I used a Google Colab instance for training my model. If you also plan on using Google Colab then I reccomend choosing a GPU Instance (It is Free) This allows you to train the model on the GPU. Turicreat is built on top of Apache's MXNet Framework, for us to use GPU we need to install a CUDA compatible MXNet package.

!pip install turicreate
-!pip uninstall -y mxnet
-!pip install mxnet-cu100==1.4.0.post0
-

If you do not wish to train on GPU or are running it on your computer, you can ignore the last two lines

Downloading the Dataset

!wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip"
-!unzip fake_or_real_news.csv.zip
-

Model Creation

import turicreate as tc
-tc.config.set_num_gpus(-1) # If you do not wish to use GPUs, set it to 0
-
dataSFrame = tc.SFrame('fake_or_real_news.csv')
-

The dataset contains a column named "X1", which is of no use to us. Therefore, we simply drop it

dataSFrame.remove_column('X1')
-

Splitting Dataset

train, test = dataSFrame.random_split(.9)
-

Training

model = tc.text_classifier.create(
-    dataset=train,
-    target='label',
-    features=['title','text']
-)
-
+-----------+----------+-----------+--------------+-------------------+---------------------+
-| Iteration | Passes   | Step size | Elapsed Time | Training Accuracy | Validation Accuracy |
-+-----------+----------+-----------+--------------+-------------------+---------------------+
-| 0         | 2        | 1.000000  | 1.156349     | 0.889680          | 0.790036            |
-| 1         | 4        | 1.000000  | 1.359196     | 0.985952          | 0.918149            |
-| 2         | 6        | 0.820091  | 1.557205     | 0.990260          | 0.914591            |
-| 3         | 7        | 1.000000  | 1.684872     | 0.998689          | 0.925267            |
-| 4         | 8        | 1.000000  | 1.814194     | 0.999063          | 0.925267            |
-| 9         | 14       | 1.000000  | 2.507072     | 1.000000          | 0.911032            |
-+-----------+----------+-----------+--------------+-------------------+---------------------+
-

Testing the Model

est_predictions = model.predict(test)
-accuracy = tc.evaluation.accuracy(test['label'], test_predictions)
-print(f'Topic classifier model has a testing accuracy of {accuracy*100}% ', flush=True)
-
Topic classifier model has a testing accuracy of 92.3076923076923%
-

We have just created our own Fake News Detection Model which has an accuracy of 92%!

example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]}
-example_prediction = model.classify(tc.SFrame(example_text))
-print(example_prediction, flush=True)
-
+-------+--------------------+
-| class |    probability     |
-+-------+--------------------+
-|  FAKE | 0.9245648658345308 |
-+-------+--------------------+
-[1 rows x 2 columns]
-

Exporting the Model

model_name = 'FakeNews'
-coreml_model_name = model_name + '.mlmodel'
-exportedModel = model.export_coreml(coreml_model_name)
-

Note: To download files from Google Volab, simply click on the files section in the sidebar, right click on filename and then click on downlaod

Link to Colab Notebook

Building the App using SwiftUI

Initial Setup

First we create a single view app (make sure you check the use SwiftUI button)

Then we copy our .mlmodel file to our project (Just drag and drop the file in the XCode Files Sidebar)

Our ML Model does not take a string directly as an input, rather it takes bag of words as an input. DescriptionThe bag-of-words model is a simplifying representation used in NLP, in this text is represented as a bag of words, without any regatd of grammar or order, but noting multiplicity

We define our bag of words function

func bow(text: String) -> [String: Double] {
-        var bagOfWords = [String: Double]()
+Building a Fake News Detector with Turicreate | Navan Chauhan
🕑 6 minute read.

Building a Fake News Detector with Turicreate

In this tutorial we will build a fake news detecting app from scratch, using Turicreate for the machine learning model and SwiftUI for building the app

Note: These commands are written as if you are running a jupyter notebook.

Building the Machine Learning Model

Data Gathering

To build a classifier, you need a lot of data. George McIntire (GH: @joolsa) has created a wonderful dataset containing the headline, body and wheter it is fake or real. Whenever you are looking for a dataset, always try searching on Kaggle and GitHub before you start building your own

Dependencies

I used a Google Colab instance for training my model. If you also plan on using Google Colab then I reccomend choosing a GPU Instance (It is Free) This allows you to train the model on the GPU. Turicreat is built on top of Apache's MXNet Framework, for us to use GPU we need to install a CUDA compatible MXNet package.

!pip install turicreate +!pip uninstall -y mxnet +!pip install mxnet-cu100==1.4.0.post0 +
+ +

If you do not wish to train on GPU or are running it on your computer, you can ignore the last two lines

Downloading the Dataset

!wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip" +!unzip fake_or_real_news.csv.zip +
+ +

Model Creation

import turicreate as tc +tc.config.set_num_gpus(-1) # If you do not wish to use GPUs, set it to 0 +
+ +
dataSFrame = tc.SFrame('fake_or_real_news.csv') +
+ +

The dataset contains a column named "X1", which is of no use to us. Therefore, we simply drop it

dataSFrame.remove_column('X1') +
+ +

Splitting Dataset

train, test = dataSFrame.random_split(.9) +
+ +

Training

model = tc.text_classifier.create( + dataset=train, + target='label', + features=['title','text'] +) +
+ +
+-----------+----------+-----------+--------------+-------------------+---------------------+ +| Iteration | Passes | Step size | Elapsed Time | Training Accuracy | Validation Accuracy | ++-----------+----------+-----------+--------------+-------------------+---------------------+ +| 0 | 2 | 1.000000 | 1.156349 | 0.889680 | 0.790036 | +| 1 | 4 | 1.000000 | 1.359196 | 0.985952 | 0.918149 | +| 2 | 6 | 0.820091 | 1.557205 | 0.990260 | 0.914591 | +| 3 | 7 | 1.000000 | 1.684872 | 0.998689 | 0.925267 | +| 4 | 8 | 1.000000 | 1.814194 | 0.999063 | 0.925267 | +| 9 | 14 | 1.000000 | 2.507072 | 1.000000 | 0.911032 | ++-----------+----------+-----------+--------------+-------------------+---------------------+ +
+ +

Testing the Model

est_predictions = model.predict(test) +accuracy = tc.evaluation.accuracy(test['label'], test_predictions) +print(f'Topic classifier model has a testing accuracy of {accuracy*100}% ', flush=True) +
+ +
Topic classifier model has a testing accuracy of 92.3076923076923% +
+ +

We have just created our own Fake News Detection Model which has an accuracy of 92%!

example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]} +example_prediction = model.classify(tc.SFrame(example_text)) +print(example_prediction, flush=True) +
+ +
+-------+--------------------+ +| class | probability | ++-------+--------------------+ +| FAKE | 0.9245648658345308 | ++-------+--------------------+ +[1 rows x 2 columns] +
+ +

Exporting the Model

model_name = 'FakeNews' +coreml_model_name = model_name + '.mlmodel' +exportedModel = model.export_coreml(coreml_model_name) +
+ +

Note: To download files from Google Volab, simply click on the files section in the sidebar, right click on filename and then click on downlaod

Link to Colab Notebook

Building the App using SwiftUI

Initial Setup

First we create a single view app (make sure you check the use SwiftUI button)

Then we copy our .mlmodel file to our project (Just drag and drop the file in the XCode Files Sidebar)

Our ML Model does not take a string directly as an input, rather it takes bag of words as an input. DescriptionThe bag-of-words model is a simplifying representation used in NLP, in this text is represented as a bag of words, without any regatd of grammar or order, but noting multiplicity

We define our bag of words function

func bow(text: String) -> [String: Double] { + var bagOfWords = [String: Double]() - let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0) - let range = NSRange(location: 0, length: text.utf16.count) - let options: NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace] - tagger.string = text + let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0) + let range = NSRange(location: 0, length: text.utf16.count) + let options: NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace] + tagger.string = text - tagger.enumerateTags(in: range, unit: .word, scheme: .tokenType, options: options) { _, tokenRange, _ in - let word = (text as NSString).substring(with: tokenRange) - if bagOfWords[word] != nil { - bagOfWords[word]! += 1 - } else { - bagOfWords[word] = 1 - } - } + tagger.enumerateTags(in: range, unit: .word, scheme: .tokenType, options: options) { _, tokenRange, _ in + let word = (text as NSString).substring(with: tokenRange) + if bagOfWords[word] != nil { + bagOfWords[word]! += 1 + } else { + bagOfWords[word] = 1 + } + } - return bagOfWords - } -

We also declare our variables

@State private var title: String = ""
-@State private var headline: String = ""
-@State private var alertTitle = ""
-@State private var alertText = ""
-@State private var showingAlert = false
-

Finally, we implement a simple function which reads the two text fields, creates their bag of words representation and displays an alert with the appropriate result

Complete Code

import SwiftUI
-
-struct ContentView: View {
-    @State private var title: String = ""
-    @State private var headline: String = ""
+        return bagOfWords
+    }
+
+ +

We also declare our variables

@State private var title: String = "" +@State private var headline: String = "" +@State private var alertTitle = "" +@State private var alertText = "" +@State private var showingAlert = false +
+ +

Finally, we implement a simple function which reads the two text fields, creates their bag of words representation and displays an alert with the appropriate result

Complete Code

import SwiftUI + +struct ContentView: View { + @State private var title: String = "" + @State private var headline: String = "" - @State private var alertTitle = "" - @State private var alertText = "" - @State private var showingAlert = false + @State private var alertTitle = "" + @State private var alertText = "" + @State private var showingAlert = false - var body: some View { - NavigationView { - VStack(alignment: .leading) { - Text("Headline").font(.headline) - TextField("Please Enter Headline", text: $title) - .lineLimit(nil) - Text("Body").font(.headline) - TextField("Please Enter the content", text: $headline) - .lineLimit(nil) - } - .navigationBarTitle("Fake News Checker") - .navigationBarItems(trailing: - Button(action: classifyFakeNews) { - Text("Check") - }) - .padding() - .alert(isPresented: $showingAlert){ - Alert(title: Text(alertTitle), message: Text(alertText), dismissButton: .default(Text("OK"))) - } - } + var body: some View { + NavigationView { + VStack(alignment: .leading) { + Text("Headline").font(.headline) + TextField("Please Enter Headline", text: $title) + .lineLimit(nil) + Text("Body").font(.headline) + TextField("Please Enter the content", text: $headline) + .lineLimit(nil) + } + .navigationBarTitle("Fake News Checker") + .navigationBarItems(trailing: + Button(action: classifyFakeNews) { + Text("Check") + }) + .padding() + .alert(isPresented: $showingAlert){ + Alert(title: Text(alertTitle), message: Text(alertText), dismissButton: .default(Text("OK"))) + } + } - } + } - func classifyFakeNews(){ - let model = FakeNews() - let myTitle = bow(text: title) - let myText = bow(text: headline) - do { - let prediction = try model.prediction(title: myTitle, text: myText) - alertTitle = prediction.label - alertText = "It is likely that this piece of news is \(prediction.label.lowercased())." - print(alertText) - } catch { - alertTitle = "Error" - alertText = "Sorry, could not classify if the input news was fake or not." - } + func classifyFakeNews(){ + let model = FakeNews() + let myTitle = bow(text: title) + let myText = bow(text: headline) + do { + let prediction = try model.prediction(title: myTitle, text: myText) + alertTitle = prediction.label + alertText = "It is likely that this piece of news is \(prediction.label.lowercased())." + print(alertText) + } catch { + alertTitle = "Error" + alertText = "Sorry, could not classify if the input news was fake or not." + } - showingAlert = true - } - func bow(text: String) -> [String: Double] { - var bagOfWords = [String: Double]() + showingAlert = true + } + func bow(text: String) -> [String: Double] { + var bagOfWords = [String: Double]() - let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0) - let range = NSRange(location: 0, length: text.utf16.count) - let options: NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace] - tagger.string = text + let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0) + let range = NSRange(location: 0, length: text.utf16.count) + let options: NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace] + tagger.string = text - tagger.enumerateTags(in: range, unit: .word, scheme: .tokenType, options: options) { _, tokenRange, _ in - let word = (text as NSString).substring(with: tokenRange) - if bagOfWords[word] != nil { - bagOfWords[word]! += 1 - } else { - bagOfWords[word] = 1 - } - } + tagger.enumerateTags(in: range, unit: .word, scheme: .tokenType, options: options) { _, tokenRange, _ in + let word = (text as NSString).substring(with: tokenRange) + if bagOfWords[word] != nil { + bagOfWords[word]! += 1 + } else { + bagOfWords[word] = 1 + } + } - return bagOfWords - } -} - -struct ContentView_Previews: PreviewProvider { - static var previews: some View { - ContentView() - } -} + return bagOfWords + } +} + +struct ContentView_Previews: PreviewProvider { + static var previews: some View { + ContentView() + } +} +
Tagged with:
\ No newline at end of file diff --git a/posts/2020-01-14-Converting-between-PIL-NumPy/index.html b/posts/2020-01-14-Converting-between-PIL-NumPy/index.html index 6c885f1..93b57d2 100644 --- a/posts/2020-01-14-Converting-between-PIL-NumPy/index.html +++ b/posts/2020-01-14-Converting-between-PIL-NumPy/index.html @@ -1,15 +1,19 @@ -Converting between image and NumPy array | Navan Chauhan
🕑 0 minute read.

Converting between image and NumPy array

import numpy
-import PIL
-
-# Convert PIL Image to NumPy array
-img = PIL.Image.open("foo.jpg")
-arr = numpy.array(img)
-
-# Convert array to Image
-img = PIL.Image.fromarray(arr)
-

Saving an Image

try:
-    img.save(destination, "JPEG", quality=80, optimize=True, progressive=True)
-except IOError:
-    PIL.ImageFile.MAXBLOCK = img.size[0] * img.size[1]
-    img.save(destination, "JPEG", quality=80, optimize=True, progressive=True)
+Converting between image and NumPy array | Navan Chauhan
🕑 0 minute read.

Converting between image and NumPy array

import numpy +import PIL + +# Convert PIL Image to NumPy array +img = PIL.Image.open("foo.jpg") +arr = numpy.array(img) + +# Convert array to Image +img = PIL.Image.fromarray(arr) +
+ +

Saving an Image

try: + img.save(destination, "JPEG", quality=80, optimize=True, progressive=True) +except IOError: + PIL.ImageFile.MAXBLOCK = img.size[0] * img.size[1] + img.save(destination, "JPEG", quality=80, optimize=True, progressive=True) +
+
Tagged with:
\ No newline at end of file diff --git a/posts/index.html b/posts/index.html index 2fafc8c..be42737 100644 --- a/posts/index.html +++ b/posts/index.html @@ -1 +1 @@ -Posts | Navan Chauhan

Posts

Tips, tricks and tutorials which I think might be useful.

\ No newline at end of file +Posts | Navan Chauhan

Posts

Tips, tricks and tutorials which I think might be useful.

\ No newline at end of file -- cgit v1.2.3