Polynomial Regression Using TensorFlow 2.x
+ +I have a similar post titled Polynomial Regression Using Tensorflow that used tensorflow.compat.v1
(Which still works as of TF 2.16). But, I thought it would be nicer to redo it with newer TF versions.
I will be skipping all the introductions about polynomial regression and jumping straight to the code. Personally, I prefer using scikit-learn
for this task.
Position vs Salary Dataset
+ +Again, we will be using https://drive.google.com/file/d/1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9/view (Salary vs Position Dataset)
+ +If you are in a Python Notebook environment like Kaggle or Google Colaboratory, you can simply run:
+ +!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
+
+Code
+ +If you just want to copy-paste the code, scroll to the bottom for the entire snippet. Here I will try and walk through setting up code for a 3rd-degree (cubic) polynomial
+ +Imports
+ +import pandas as pd
+import tensorflow as tf
+import matplotlib.pyplot as plt
+import numpy as np
+
+Reading the Dataset
+ +df = pd.read_csv("data.csv")
+
+Variables and Constants
+ +Here, we initialize the X and Y values as constants, since they are not going to change. The coefficients are defined as variables.
+ +X = tf.constant(df["Level"], dtype=tf.float32)
+Y = tf.constant(df["Salary"], dtype=tf.float32)
+
+coefficients = [tf.Variable(np.random.randn() * 0.01, dtype=tf.float32) for _ in range(4)]
+
+Here, X
and Y
are the values from our dataset. We initialize the coefficients for the equations as small random values.
These coefficients are evaluated by Tensorflow's tf.math.poyval
function which returns the n-th order polynomial based on how many coefficients are passed. Since our list of coefficients contains 4 different variables, it will be evaluated as:
y = (x**3)*coefficients[3] + (x**2)*coefficients[2] + (x**1)*coefficients[1] (x**0)*coefficients[0]
+
+
+Which is equivalent to the general cubic equation:
+ + + + + +$$ +y = ax^3 + bx^2 + cx + d +$$ + +### Optimizer Selection & Training +optimizer = tf.keras.optimizers.Adam(learning_rate=0.3)
+num_epochs = 10_000
+
+for epoch in range(num_epochs):
+ with tf.GradientTape() as tape:
+ y_pred = tf.math.polyval(coefficients, X)
+ loss = tf.reduce_mean(tf.square(y - y_pred))
+ grads = tape.gradient(loss, coefficients)
+ optimizer.apply_gradients(zip(grads, coefficients))
+ if (epoch+1) % 1000 == 0:
+ print(f"Epoch: {epoch+1}, Loss: {loss.numpy()}"
+
+
+final_coefficients = [c.numpy() for c in coefficients]
+print("Final Coefficients:", final_coefficients)
+
+plt.plot(df["Level"], df["Salary"], label="Original Data")
+plt.plot(df["Level"],[tf.math.polyval(final_coefficients, tf.constant(x, dtype=tf.float32)).numpy() for x in df["Level"]])
+plt.ylabel('Salary')
+plt.xlabel('Position')
+plt.title("Salary vs Position")
+plt.show()
+
+
+import tensorflow as tf
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+df = pd.read_csv("data.csv")
+
+############################
+## Change Parameters Here ##
+############################
+x_column = "Level" #
+y_column = "Salary" #
+degree = 2 #
+learning_rate = 0.3 #
+num_epochs = 25_000 #
+############################
+
+X = tf.constant(df[x_column], dtype=tf.float32)
+Y = tf.constant(df[y_column], dtype=tf.float32)
+
+coefficients = [tf.Variable(np.random.randn() * 0.01, dtype=tf.float32) for _ in range(degree + 1)]
+
+optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
+
+for epoch in range(num_epochs):
+ with tf.GradientTape() as tape:
+ y_pred = tf.math.polyval(coefficients, X)
+ loss = tf.reduce_mean(tf.square(Y - y_pred))
+ grads = tape.gradient(loss, coefficients)
+ optimizer.apply_gradients(zip(grads, coefficients))
+ if (epoch+1) % 1000 == 0:
+ print(f"Epoch: {epoch+1}, Loss: {loss.numpy()}")
+
+final_coefficients = [c.numpy() for c in coefficients]
+print("Final Coefficients:", final_coefficients)
+
+print("Final Equation:", end=" ")
+for i in range(degree+1):
+ print(f"{final_coefficients[i]} * x^{degree-i}", end=" + " if i < degree else "\n")
+
+plt.plot(X, Y, label="Original Data")
+plt.plot(X,[tf.math.polyval(final_coefficients, tf.constant(x, dtype=tf.float32)).numpy() for x in df[x_column]]), label="Our Poynomial"
+plt.ylabel(y_column)
+plt.xlabel(x_column)
+plt.title(f"{x_column} vs {y_column}")
+plt.legend()
+plt.show()
+
+
+import tensorflow as tf
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+df = pd.read_csv("data.csv")
+
+############################
+## Change Parameters Here ##
+############################
+x_column = "Level" #
+y_column = "Salary" #
+degree = 2 #
+learning_rate = 0.3 #
+num_epochs = 25_000 #
+############################
+
+X = tf.constant(df[x_column], dtype=tf.float32)
+Y = tf.constant(df[y_column], dtype=tf.float32)
+
+coefficients = [tf.Variable(np.random.randn() * 0.01, dtype=tf.float32) for _ in range(degree + 1)]
+
+optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
+
+def loss_function():
+ pred_y = tf.math.polyval(coefficients, X)
+ return tf.reduce_mean(tf.square(pred_y - Y))
+
+for epoch in range(num_epochs):
+ optimizer.minimize(loss_function, var_list=coefficients)
+ if (epoch+1) % 1000 == 0:
+ current_loss = loss_function().numpy()
+ print(f"Epoch {epoch+1}: Training Loss: {current_loss}")
+
+final_coefficients = coefficients.numpy()
+print("Final Coefficients:", final_coefficients)
+
+print("Final Equation:", end=" ")
+for i in range(degree+1):
+ print(f"{final_coefficients[i]} * x^{degree-i}", end=" + " if i < degree else "\n")
+
+plt.plot(X, Y, label="Original Data")
+plt.plot(X,[tf.math.polyval(final_coefficients, tf.constant(x, dtype=tf.float32)).numpy() for x in df[x_column]], label="Our Polynomial")
+plt.ylabel(y_column)
+plt.xlabel(x_column)
+plt.legend()
+plt.title(f"{x_column} vs {y_column}")
+plt.show()
+
+
+bx = tf.pow(coefficients[1], X)
+pred_y = tf.math.multiply(coefficients[0], bx)
+loss = tf.reduce_mean(tf.square(pred_y - Y))
+
+
+