From d75527f7eecc4e2fcdd18ab157412506717c8adb Mon Sep 17 00:00:00 2001 From: navanchauhan Date: Mon, 7 Nov 2022 23:36:11 -0700 Subject: add blog post --- ...019-12-16-TensorFlow-Polynomial-Regression.html | 186 ++++++++++++++------- 1 file changed, 124 insertions(+), 62 deletions(-) (limited to 'docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html') diff --git a/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html b/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html index 7bfe8d4..f0dad82 100644 --- a/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html +++ b/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html @@ -69,12 +69,14 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d

Imports

-
import tensorflow.compat.v1 as tf
+
+
import tensorflow.compat.v1 as tf
 tf.disable_v2_behavior()
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-
+
+

Dataset

@@ -86,30 +88,41 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d

linspace(lower_limit, upper_limit, no_of_observations)

-
x = np.linspace(0, 50, 50)
+
+
x = np.linspace(0, 50, 50)
 y = np.linspace(0, 50, 50)
-
+
+

We use the following function to add noise to the data, so that our values

-
x += np.random.uniform(-4, 4, 50)
+
+
x += np.random.uniform(-4, 4, 50)
 y += np.random.uniform(-4, 4, 50)
-
+
+

Position vs Salary Dataset

We will be using https://drive.google.com/file/d/1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9/view (Salary vs Position Dataset)

-
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
-
+
+
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
+
+
-
df = pd.read_csv("data.csv")
-
+
+
df = pd.read_csv("data.csv")
+
+
-
df # this gives us a preview of the dataset we are working with
-
+
+
df # this gives us a preview of the dataset we are working with
+
+
-
| Position          | Level | Salary  |
+
+
| Position          | Level | Salary  |
 |-------------------|-------|---------|
 | Business Analyst  | 1     | 45000   |
 | Junior Consultant | 2     | 50000   |
@@ -121,81 +134,100 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d
 | Senior Partner    | 8     | 300000  |
 | C-level           | 9     | 500000  |
 | CEO               | 10    | 1000000 |
-
+
+

We convert the salary column as the ordinate (y-coordinate) and level column as the abscissa

-
abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
+
+
abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
 ordinate = df["Salary"].to_list() # ordinate = [45000,50000,60000,80000,110000,150000,200000,300000,500000,1000000]
-
+
+
-
n = len(abscissa) # no of observations
+
+
n = len(abscissa) # no of observations
 plt.scatter(abscissa, ordinate)
 plt.ylabel('Salary')
 plt.xlabel('Position')
 plt.title("Salary vs Position")
 plt.show()
-
+
+

Defining Stuff

-
X = tf.placeholder("float")
+
+
X = tf.placeholder("float")
 Y = tf.placeholder("float")
-
+
+

Defining Variables

We first define all the coefficients and constant as tensorflow variables having a random initial value

-
a = tf.Variable(np.random.randn(), name = "a")
+
+
a = tf.Variable(np.random.randn(), name = "a")
 b = tf.Variable(np.random.randn(), name = "b")
 c = tf.Variable(np.random.randn(), name = "c")
 d = tf.Variable(np.random.randn(), name = "d")
 e = tf.Variable(np.random.randn(), name = "e")
 f = tf.Variable(np.random.randn(), name = "f")
-
+
+

Model Configuration

-
learning_rate = 0.2
+
+
learning_rate = 0.2
 no_of_epochs = 25000
-
+
+

Equations

-
deg1 = a*X + b
+
+
deg1 = a*X + b
 deg2 = a*tf.pow(X,2) + b*X + c
 deg3 = a*tf.pow(X,3) + b*tf.pow(X,2) + c*X + d
 deg4 = a*tf.pow(X,4) + b*tf.pow(X,3) + c*tf.pow(X,2) + d*X + e
 deg5 = a*tf.pow(X,5) + b*tf.pow(X,4) + c*tf.pow(X,3) + d*tf.pow(X,2) + e*X + f
-
+
+

Cost Function

We use the Mean Squared Error Function

-
mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
+
+
mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
 mse2 = tf.reduce_sum(tf.pow(deg2-Y,2))/(2*n)
 mse3 = tf.reduce_sum(tf.pow(deg3-Y,2))/(2*n)
 mse4 = tf.reduce_sum(tf.pow(deg4-Y,2))/(2*n)
 mse5 = tf.reduce_sum(tf.pow(deg5-Y,2))/(2*n)
-
+
+

Optimizer

We use the AdamOptimizer for the polynomial functions and GradientDescentOptimizer for the linear function

-
optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
+
+
optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
 optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(mse2)
 optimizer3 = tf.train.AdamOptimizer(learning_rate).minimize(mse3)
 optimizer4 = tf.train.AdamOptimizer(learning_rate).minimize(mse4)
 optimizer5 = tf.train.AdamOptimizer(learning_rate).minimize(mse5)
-
+
+
-
init=tf.global_variables_initializer()
-
+
+
init=tf.global_variables_initializer()
+
+

Model Predictions

@@ -204,7 +236,8 @@ values using the X values. We then plot it to compare the actual data and predic

Linear Equation

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -218,9 +251,11 @@ values using the X values. We then plot it to compare the actual data and predic
         constant = sess.run(b)
 
 print(training_cost, coefficient1, constant)
-
+
+
-
Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
+
+
Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 Epoch 2000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 Epoch 3000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 Epoch 4000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
@@ -246,9 +281,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 Epoch 25000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 88999125000.0 180396.42 -478869.12
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -256,13 +293,15 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Linear Regression Result')
 plt.legend()
 plt.show()
-
+
+

Quadratic Equation

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -277,9 +316,11 @@ values using the X values. We then plot it to compare the actual data and predic
         constant = sess.run(c)
 
 print(training_cost, coefficient1, coefficient2, constant)
-
+
+
-
Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
+
+
Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
 Epoch 2000 : Training Cost: 37798890000.0  a,b,c: 1952.4263 2130.2825 2469.7756
 Epoch 3000 : Training Cost: 26751185000.0  a,b,c: 2839.5825 3081.6118 3554.351
 Epoch 4000 : Training Cost: 19020106000.0  a,b,c: 3644.56 3922.9563 4486.3135
@@ -305,9 +346,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 8088001000.0  a,b,c: 6632.96 3399.878 -79.89219
 Epoch 25000 : Training Cost: 8058094600.0  a,b,c: 6659.793 3227.2517 -463.03156
 8058094600.0 6659.793 3227.2517 -463.03156
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*pow(x,2) + coefficient2*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -315,13 +358,15 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Quadratic Regression Result')
 plt.legend()
 plt.show()
-
+
+

Cubic

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -337,9 +382,11 @@ values using the X values. We then plot it to compare the actual data and predic
         constant = sess.run(d)
 
 print(training_cost, coefficient1, coefficient2, coefficient3, constant)
-
+
+
-
Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
+
+
Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
 Epoch 2000 : Training Cost: 3770950400.0  a,b,c,d: 742.6414 666.3489 636.94525 859.2088
 Epoch 3000 : Training Cost: 3717708300.0  a,b,c,d: 756.2582 569.3339 448.105 748.23956
 Epoch 4000 : Training Cost: 3667464000.0  a,b,c,d: 769.4476 474.0318 265.5761 654.75525
@@ -365,9 +412,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 3070361300.0  a,b,c,d: 975.52875 -1095.4292 -2211.854 1847.4485
 Epoch 25000 : Training Cost: 3052791300.0  a,b,c,d: 983.4346 -1159.7922 -2286.9412 2027.4857
 3052791300.0 983.4346 -1159.7922 -2286.9412 2027.4857
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*pow(x,3) + coefficient2*pow(x,2) + coefficient3*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -375,13 +424,15 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Cubic Regression Result')
 plt.legend()
 plt.show()
-
+
+

Quartic

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -398,9 +449,11 @@ values using the X values. We then plot it to compare the actual data and predic
         constant = sess.run(e)
 
 print(training_cost, coefficient1, coefficient2, coefficient3, coefficient4, constant)
-
+
+
-
Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
+
+
Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
 Epoch 2000 : Training Cost: 1854316200.0  a,b,c,d: 88.998955 13.073557 14.276088 223.55667 1056.4655
 Epoch 3000 : Training Cost: 1812812400.0  a,b,c,d: 92.9462 -22.331177 -15.262934 327.41858 1634.9054
 Epoch 4000 : Training Cost: 1775716000.0  a,b,c,d: 96.42522 -54.64535 -35.829437 449.5028 2239.1392
@@ -426,9 +479,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 1252052600.0  a,b,c,d: 135.9583 -493.38254 90.268616 3764.0078 15010.481
 Epoch 25000 : Training Cost: 1231713700.0  a,b,c,d: 137.54753 -512.1876 101.59372 3926.4897 15609.368
 1231713700.0 137.54753 -512.1876 101.59372 3926.4897 15609.368
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*pow(x,4) + coefficient2*pow(x,3) + coefficient3*pow(x,2) + coefficient4*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -436,13 +491,15 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Quartic Regression Result')
 plt.legend()
 plt.show()
-
+
+

Quintic

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -458,9 +515,11 @@ values using the X values. We then plot it to compare the actual data and predic
         coefficient4 = sess.run(d)
         coefficient5 = sess.run(e)
         constant = sess.run(f)
-
+
+
-
Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
+
+
Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
 Epoch 2000 : Training Cost: 1306882400.0  a,b,c,d,e,f: 8.732181 -4.0085897 73.25298 315.90103 904.08887 2004.9749
 Epoch 3000 : Training Cost: 1212606000.0  a,b,c,d,e,f: 9.732249 -16.90125 86.28379 437.06552 1305.055 2966.2188
 Epoch 4000 : Training Cost: 1123640400.0  a,b,c,d,e,f: 10.74851 -29.82692 98.59997 555.331 1698.4631 3917.9155
@@ -486,9 +545,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 229660080.0  a,b,c,d,e,f: 27.102589 -238.44817 309.35342 2420.4185 7770.5728 19536.19
 Epoch 25000 : Training Cost: 216972400.0  a,b,c,d,e,f: 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
 216972400.0 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*pow(x,5) + coefficient2*pow(x,4) + coefficient3*pow(x,3) + coefficient4*pow(x,2) + coefficient5*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -496,7 +557,8 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Quintic Regression Result')
 plt.legend()
 plt.show()
-
+
+

-- cgit v1.2.3