From d75527f7eecc4e2fcdd18ab157412506717c8adb Mon Sep 17 00:00:00 2001 From: navanchauhan Date: Mon, 7 Nov 2022 23:36:11 -0700 Subject: add blog post --- .../2019-12-08-Image-Classifier-Tensorflow.html | 60 ++++--- docs/posts/2019-12-08-Splitting-Zips.html | 18 +- .../2019-12-10-TensorFlow-Model-Prediction.html | 42 +++-- ...019-12-16-TensorFlow-Polynomial-Regression.html | 186 ++++++++++++++------- docs/posts/2019-12-22-Fake-News-Detector.html | 96 +++++++---- .../2020-01-14-Converting-between-PIL-NumPy.html | 12 +- ...-01-15-Setting-up-Kaggle-to-use-with-Colab.html | 12 +- ...20-01-16-Image-Classifier-Using-Turicreate.html | 72 +++++--- docs/posts/2020-07-01-Install-rdkit-colab.html | 6 +- .../2020-08-01-Natural-Feature-Tracking-ARJS.html | 24 ++- docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html | 2 +- docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html | 30 ++-- docs/posts/2020-12-1-HTML-JS-RSS-Feed.html | 6 +- docs/posts/2021-06-25-Blog2Twitter-P1.html | 24 ++- ...21-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html | 30 ++-- .../2022-05-21-Similar-Movies-Recommender.html | 36 ++-- docs/posts/2022-11-07-a-new-method-to-blog.html | 90 ++++++++++ docs/posts/index.html | 15 ++ 18 files changed, 542 insertions(+), 219 deletions(-) create mode 100644 docs/posts/2022-11-07-a-new-method-to-blog.html (limited to 'docs/posts') diff --git a/docs/posts/2019-12-08-Image-Classifier-Tensorflow.html b/docs/posts/2019-12-08-Image-Classifier-Tensorflow.html index ac305ac..9ecfff0 100644 --- a/docs/posts/2019-12-08-Image-Classifier-Tensorflow.html +++ b/docs/posts/2019-12-08-Image-Classifier-Tensorflow.html @@ -47,7 +47,8 @@

Imports

-
%tensorflow_version 2.x #This is for telling Colab that you want to use TF 2.0, ignore if running on local machine
+
+
%tensorflow_version 2.x #This is for telling Colab that you want to use TF 2.0, ignore if running on local machine
 
 from PIL import Image # We use the PIL Library to resize images
 import numpy as np
@@ -59,21 +60,25 @@
 import matplotlib.pyplot as plt
 from keras.models import Sequential
 from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
-
+
+

Dataset

Fetching the Data

-
!wget ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
+
+
!wget ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
 !unzip cell_images.zip
-
+
+

Processing the Data

We resize all the images as 50x50 and add the numpy array of that image as well as their label names (Infected or Not) to common arrays.

-
data = []
+
+
data = []
 labels = []
 
 Parasitized = os.listdir("./cell_images/Parasitized/")
@@ -97,15 +102,18 @@
         labels.append(1)
     except AttributeError:
         print("")
-
+
+

Splitting Data

-
df = np.array(data)
+
+
df = np.array(data)
 labels = np.array(labels)
 (X_train, X_test) = df[(int)(0.1*len(df)):],df[:(int)(0.1*len(df))]
 (y_train, y_test) = labels[(int)(0.1*len(labels)):],labels[:(int)(0.1*len(labels))]
-
+
+
s=np.arange(X_train.shape[0])
 np.random.shuffle(s)
@@ -122,7 +130,8 @@ X_train = X_train/255.0
 
 

Note: The input shape for the first layer is 50,50 which corresponds with the sizes of the resized images

-
model = models.Sequential()
+
+
model = models.Sequential()
 model.add(layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(50,50,3)))
 model.add(layers.MaxPooling2D(pool_size=2))
 model.add(layers.Conv2D(filters=32,kernel_size=2,padding='same',activation='relu'))
@@ -135,25 +144,31 @@ X_train = X_train/255.0
 model.add(layers.Dropout(0.2))
 model.add(layers.Dense(2,activation="softmax"))#2 represent output layer neurons 
 model.summary()
-
+
+

Compiling Model

We use the Adam optimiser as it is an adaptive learning rate optimisation algorithm that's been designed specifically for training deep neural networks, which means it changes its learning rate automatically to get the best results

-
model.compile(optimizer="adam",
+
+
model.compile(optimizer="adam",
               loss="sparse_categorical_crossentropy", 
              metrics=["accuracy"])
-
+
+

Training Model

We train the model for 10 epochs on the training data and then validate it using the testing data

-
history = model.fit(X_train,y_train, epochs=10, validation_data=(X_test,y_test))
-
+
+
history = model.fit(X_train,y_train, epochs=10, validation_data=(X_test,y_test))
+
+
-
Train on 24803 samples, validate on 2755 samples
+
+
Train on 24803 samples, validate on 2755 samples
 Epoch 1/10
 24803/24803 [==============================] - 57s 2ms/sample - loss: 0.0786 - accuracy: 0.9729 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
 Epoch 2/10
@@ -174,11 +189,13 @@ X_train = X_train/255.0
 24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0352 - accuracy: 0.9878 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
 Epoch 10/10
 24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0373 - accuracy: 0.9865 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-
+
+

Results

-
accuracy = history.history['accuracy'][-1]*100
+
+
accuracy = history.history['accuracy'][-1]*100
 loss = history.history['loss'][-1]*100
 val_accuracy = history.history['val_accuracy'][-1]*100
 val_loss = history.history['val_loss'][-1]*100
@@ -189,13 +206,16 @@ X_train = X_train/255.0
     '\nValidation Accuracy:', val_accuracy,
     '\nValidation Loss:', val_loss
 )
-
+
+
-
Accuracy: 98.64532351493835 
+
+
Accuracy: 98.64532351493835 
 Loss: 3.732407123270176 
 Validation Accuracy: 100.0 
 Validation Loss: 0.0
-
+
+

We have achieved 98% Accuracy!

diff --git a/docs/posts/2019-12-08-Splitting-Zips.html b/docs/posts/2019-12-08-Splitting-Zips.html index ed9ecff..8464ca1 100644 --- a/docs/posts/2019-12-08-Splitting-Zips.html +++ b/docs/posts/2019-12-08-Splitting-Zips.html @@ -47,22 +47,28 @@

Creating the archive:

-
zip -r -s 5 oodlesofnoodles.zip website/
-
+
+
zip -r -s 5 oodlesofnoodles.zip website/
+
+

5 stands for each split files' size (in mb, kb and gb can also be specified)

For encrypting the zip:

-
zip -er -s 5 oodlesofnoodles.zip website
-
+
+
zip -er -s 5 oodlesofnoodles.zip website
+
+

Extracting Files

First we need to collect all parts, then

-
zip -F oodlesofnoodles.zip --out merged.zip
-
+
+
zip -F oodlesofnoodles.zip --out merged.zip
+
+
diff --git a/docs/posts/2019-12-10-TensorFlow-Model-Prediction.html b/docs/posts/2019-12-10-TensorFlow-Model-Prediction.html index 7187fe8..97ad373 100644 --- a/docs/posts/2019-12-10-TensorFlow-Model-Prediction.html +++ b/docs/posts/2019-12-10-TensorFlow-Model-Prediction.html @@ -51,39 +51,53 @@

First we import the following if we have not imported these before

-
import cv2
+
+
import cv2
 import os
-
+
+

Then we read the file using OpenCV.

-
image=cv2.imread(imagePath)
-
+
+
image=cv2.imread(imagePath)
+
+

The cv2. imread() function returns a NumPy array representing the image. Therefore, we need to convert it before we can use it.

-
image_from_array = Image.fromarray(image, 'RGB')
-
+
+
image_from_array = Image.fromarray(image, 'RGB')
+
+

Then we resize the image

-
size_image = image_from_array.resize((50,50))
-
+
+
size_image = image_from_array.resize((50,50))
+
+

After this we create a batch consisting of only one image

-
p = np.expand_dims(size_image, 0)
-
+
+
p = np.expand_dims(size_image, 0)
+
+

We then convert this uint8 datatype to a float32 datatype

-
img = tf.cast(p, tf.float32)
-
+
+
img = tf.cast(p, tf.float32)
+
+

Finally we make the prediction

-
print(['Infected','Uninfected'][np.argmax(model.predict(img))])
-
+
+
print(['Infected','Uninfected'][np.argmax(model.predict(img))])
+
+

Infected

diff --git a/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html b/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html index 7bfe8d4..f0dad82 100644 --- a/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html +++ b/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html @@ -69,12 +69,14 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d

Imports

-
import tensorflow.compat.v1 as tf
+
+
import tensorflow.compat.v1 as tf
 tf.disable_v2_behavior()
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-
+
+

Dataset

@@ -86,30 +88,41 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d

linspace(lower_limit, upper_limit, no_of_observations)

-
x = np.linspace(0, 50, 50)
+
+
x = np.linspace(0, 50, 50)
 y = np.linspace(0, 50, 50)
-
+
+

We use the following function to add noise to the data, so that our values

-
x += np.random.uniform(-4, 4, 50)
+
+
x += np.random.uniform(-4, 4, 50)
 y += np.random.uniform(-4, 4, 50)
-
+
+

Position vs Salary Dataset

We will be using https://drive.google.com/file/d/1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9/view (Salary vs Position Dataset)

-
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
-
+
+
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
+
+
-
df = pd.read_csv("data.csv")
-
+
+
df = pd.read_csv("data.csv")
+
+
-
df # this gives us a preview of the dataset we are working with
-
+
+
df # this gives us a preview of the dataset we are working with
+
+
-
| Position          | Level | Salary  |
+
+
| Position          | Level | Salary  |
 |-------------------|-------|---------|
 | Business Analyst  | 1     | 45000   |
 | Junior Consultant | 2     | 50000   |
@@ -121,81 +134,100 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d
 | Senior Partner    | 8     | 300000  |
 | C-level           | 9     | 500000  |
 | CEO               | 10    | 1000000 |
-
+
+

We convert the salary column as the ordinate (y-coordinate) and level column as the abscissa

-
abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
+
+
abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
 ordinate = df["Salary"].to_list() # ordinate = [45000,50000,60000,80000,110000,150000,200000,300000,500000,1000000]
-
+
+
-
n = len(abscissa) # no of observations
+
+
n = len(abscissa) # no of observations
 plt.scatter(abscissa, ordinate)
 plt.ylabel('Salary')
 plt.xlabel('Position')
 plt.title("Salary vs Position")
 plt.show()
-
+
+

Defining Stuff

-
X = tf.placeholder("float")
+
+
X = tf.placeholder("float")
 Y = tf.placeholder("float")
-
+
+

Defining Variables

We first define all the coefficients and constant as tensorflow variables having a random initial value

-
a = tf.Variable(np.random.randn(), name = "a")
+
+
a = tf.Variable(np.random.randn(), name = "a")
 b = tf.Variable(np.random.randn(), name = "b")
 c = tf.Variable(np.random.randn(), name = "c")
 d = tf.Variable(np.random.randn(), name = "d")
 e = tf.Variable(np.random.randn(), name = "e")
 f = tf.Variable(np.random.randn(), name = "f")
-
+
+

Model Configuration

-
learning_rate = 0.2
+
+
learning_rate = 0.2
 no_of_epochs = 25000
-
+
+

Equations

-
deg1 = a*X + b
+
+
deg1 = a*X + b
 deg2 = a*tf.pow(X,2) + b*X + c
 deg3 = a*tf.pow(X,3) + b*tf.pow(X,2) + c*X + d
 deg4 = a*tf.pow(X,4) + b*tf.pow(X,3) + c*tf.pow(X,2) + d*X + e
 deg5 = a*tf.pow(X,5) + b*tf.pow(X,4) + c*tf.pow(X,3) + d*tf.pow(X,2) + e*X + f
-
+
+

Cost Function

We use the Mean Squared Error Function

-
mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
+
+
mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
 mse2 = tf.reduce_sum(tf.pow(deg2-Y,2))/(2*n)
 mse3 = tf.reduce_sum(tf.pow(deg3-Y,2))/(2*n)
 mse4 = tf.reduce_sum(tf.pow(deg4-Y,2))/(2*n)
 mse5 = tf.reduce_sum(tf.pow(deg5-Y,2))/(2*n)
-
+
+

Optimizer

We use the AdamOptimizer for the polynomial functions and GradientDescentOptimizer for the linear function

-
optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
+
+
optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
 optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(mse2)
 optimizer3 = tf.train.AdamOptimizer(learning_rate).minimize(mse3)
 optimizer4 = tf.train.AdamOptimizer(learning_rate).minimize(mse4)
 optimizer5 = tf.train.AdamOptimizer(learning_rate).minimize(mse5)
-
+
+
-
init=tf.global_variables_initializer()
-
+
+
init=tf.global_variables_initializer()
+
+

Model Predictions

@@ -204,7 +236,8 @@ values using the X values. We then plot it to compare the actual data and predic

Linear Equation

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -218,9 +251,11 @@ values using the X values. We then plot it to compare the actual data and predic
         constant = sess.run(b)
 
 print(training_cost, coefficient1, constant)
-
+
+
-
Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
+
+
Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 Epoch 2000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 Epoch 3000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 Epoch 4000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
@@ -246,9 +281,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 Epoch 25000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
 88999125000.0 180396.42 -478869.12
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -256,13 +293,15 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Linear Regression Result')
 plt.legend()
 plt.show()
-
+
+

Quadratic Equation

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -277,9 +316,11 @@ values using the X values. We then plot it to compare the actual data and predic
         constant = sess.run(c)
 
 print(training_cost, coefficient1, coefficient2, constant)
-
+
+
-
Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
+
+
Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
 Epoch 2000 : Training Cost: 37798890000.0  a,b,c: 1952.4263 2130.2825 2469.7756
 Epoch 3000 : Training Cost: 26751185000.0  a,b,c: 2839.5825 3081.6118 3554.351
 Epoch 4000 : Training Cost: 19020106000.0  a,b,c: 3644.56 3922.9563 4486.3135
@@ -305,9 +346,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 8088001000.0  a,b,c: 6632.96 3399.878 -79.89219
 Epoch 25000 : Training Cost: 8058094600.0  a,b,c: 6659.793 3227.2517 -463.03156
 8058094600.0 6659.793 3227.2517 -463.03156
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*pow(x,2) + coefficient2*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -315,13 +358,15 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Quadratic Regression Result')
 plt.legend()
 plt.show()
-
+
+

Cubic

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -337,9 +382,11 @@ values using the X values. We then plot it to compare the actual data and predic
         constant = sess.run(d)
 
 print(training_cost, coefficient1, coefficient2, coefficient3, constant)
-
+
+
-
Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
+
+
Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
 Epoch 2000 : Training Cost: 3770950400.0  a,b,c,d: 742.6414 666.3489 636.94525 859.2088
 Epoch 3000 : Training Cost: 3717708300.0  a,b,c,d: 756.2582 569.3339 448.105 748.23956
 Epoch 4000 : Training Cost: 3667464000.0  a,b,c,d: 769.4476 474.0318 265.5761 654.75525
@@ -365,9 +412,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 3070361300.0  a,b,c,d: 975.52875 -1095.4292 -2211.854 1847.4485
 Epoch 25000 : Training Cost: 3052791300.0  a,b,c,d: 983.4346 -1159.7922 -2286.9412 2027.4857
 3052791300.0 983.4346 -1159.7922 -2286.9412 2027.4857
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*pow(x,3) + coefficient2*pow(x,2) + coefficient3*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -375,13 +424,15 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Cubic Regression Result')
 plt.legend()
 plt.show()
-
+
+

Quartic

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -398,9 +449,11 @@ values using the X values. We then plot it to compare the actual data and predic
         constant = sess.run(e)
 
 print(training_cost, coefficient1, coefficient2, coefficient3, coefficient4, constant)
-
+
+
-
Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
+
+
Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
 Epoch 2000 : Training Cost: 1854316200.0  a,b,c,d: 88.998955 13.073557 14.276088 223.55667 1056.4655
 Epoch 3000 : Training Cost: 1812812400.0  a,b,c,d: 92.9462 -22.331177 -15.262934 327.41858 1634.9054
 Epoch 4000 : Training Cost: 1775716000.0  a,b,c,d: 96.42522 -54.64535 -35.829437 449.5028 2239.1392
@@ -426,9 +479,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 1252052600.0  a,b,c,d: 135.9583 -493.38254 90.268616 3764.0078 15010.481
 Epoch 25000 : Training Cost: 1231713700.0  a,b,c,d: 137.54753 -512.1876 101.59372 3926.4897 15609.368
 1231713700.0 137.54753 -512.1876 101.59372 3926.4897 15609.368
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*pow(x,4) + coefficient2*pow(x,3) + coefficient3*pow(x,2) + coefficient4*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -436,13 +491,15 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Quartic Regression Result')
 plt.legend()
 plt.show()
-
+
+

Quintic

-
with tf.Session() as sess:
+
+
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(no_of_epochs):
       for (x,y) in zip(abscissa, ordinate):
@@ -458,9 +515,11 @@ values using the X values. We then plot it to compare the actual data and predic
         coefficient4 = sess.run(d)
         coefficient5 = sess.run(e)
         constant = sess.run(f)
-
+
+
-
Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
+
+
Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
 Epoch 2000 : Training Cost: 1306882400.0  a,b,c,d,e,f: 8.732181 -4.0085897 73.25298 315.90103 904.08887 2004.9749
 Epoch 3000 : Training Cost: 1212606000.0  a,b,c,d,e,f: 9.732249 -16.90125 86.28379 437.06552 1305.055 2966.2188
 Epoch 4000 : Training Cost: 1123640400.0  a,b,c,d,e,f: 10.74851 -29.82692 98.59997 555.331 1698.4631 3917.9155
@@ -486,9 +545,11 @@ values using the X values. We then plot it to compare the actual data and predic
 Epoch 24000 : Training Cost: 229660080.0  a,b,c,d,e,f: 27.102589 -238.44817 309.35342 2420.4185 7770.5728 19536.19
 Epoch 25000 : Training Cost: 216972400.0  a,b,c,d,e,f: 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
 216972400.0 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
-
+
+
-
predictions = []
+
+
predictions = []
 for x in abscissa:
   predictions.append((coefficient1*pow(x,5) + coefficient2*pow(x,4) + coefficient3*pow(x,3) + coefficient4*pow(x,2) + coefficient5*x + constant))
 plt.plot(abscissa , ordinate, 'ro', label ='Original data')
@@ -496,7 +557,8 @@ values using the X values. We then plot it to compare the actual data and predic
 plt.title('Quintic Regression Result')
 plt.legend()
 plt.show()
-
+
+

diff --git a/docs/posts/2019-12-22-Fake-News-Detector.html b/docs/posts/2019-12-22-Fake-News-Detector.html index 46297b0..9b62b00 100644 --- a/docs/posts/2019-12-22-Fake-News-Detector.html +++ b/docs/posts/2019-12-22-Fake-News-Detector.html @@ -60,48 +60,63 @@ Whenever you are looking for a dataset, always try searching on Kaggle and GitHu This allows you to train the model on the GPU. Turicreate is built on top of Apache's MXNet Framework, for us to use GPU we need to install a CUDA compatible MXNet package.

-
!pip install turicreate
+
+
!pip install turicreate
 !pip uninstall -y mxnet
 !pip install mxnet-cu100==1.4.0.post0
-
+
+

If you do not wish to train on GPU or are running it on your computer, you can ignore the last two lines

Downloading the Dataset

-
!wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip"
+
+
!wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip"
 !unzip fake_or_real_news.csv.zip
-
+
+

Model Creation

-
import turicreate as tc
+
+
import turicreate as tc
 tc.config.set_num_gpus(-1) # If you do not wish to use GPUs, set it to 0
-
+
+
-
dataSFrame = tc.SFrame('fake_or_real_news.csv')
-
+
+
dataSFrame = tc.SFrame('fake_or_real_news.csv')
+
+

The dataset contains a column named "X1", which is of no use to us. Therefore, we simply drop it

-
dataSFrame.remove_column('X1')
-
+
+
dataSFrame.remove_column('X1')
+
+

Splitting Dataset

-
train, test = dataSFrame.random_split(.9)
-
+
+
train, test = dataSFrame.random_split(.9)
+
+

Training

-
model = tc.text_classifier.create(
+
+
model = tc.text_classifier.create(
     dataset=train,
     target='label',
     features=['title','text']
 )
-
+
+
-
+-----------+----------+-----------+--------------+-------------------+---------------------+
+
+
+-----------+----------+-----------+--------------+-------------------+---------------------+
 | Iteration | Passes   | Step size | Elapsed Time | Training Accuracy | Validation Accuracy |
 +-----------+----------+-----------+--------------+-------------------+---------------------+
 | 0         | 2        | 1.000000  | 1.156349     | 0.889680          | 0.790036            |
@@ -111,39 +126,50 @@ a CUDA compatible MXNet package.

| 4 | 8 | 1.000000 | 1.814194 | 0.999063 | 0.925267 | | 9 | 14 | 1.000000 | 2.507072 | 1.000000 | 0.911032 | +-----------+----------+-----------+--------------+-------------------+---------------------+ -
+
+

Testing the Model

-
est_predictions = model.predict(test)
+
+
est_predictions = model.predict(test)
 accuracy = tc.evaluation.accuracy(test['label'], test_predictions)
 print(f'Topic classifier model has a testing accuracy of {accuracy*100}% ', flush=True)
-
+
+
-
Topic classifier model has a testing accuracy of 92.3076923076923%
-
+
+
Topic classifier model has a testing accuracy of 92.3076923076923%
+
+

We have just created our own Fake News Detection Model which has an accuracy of 92%!

-
example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]}
+
+
example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]}
 example_prediction = model.classify(tc.SFrame(example_text))
 print(example_prediction, flush=True)
-
+
+
-
+-------+--------------------+
+
+
+-------+--------------------+
 | class |    probability     |
 +-------+--------------------+
 |  FAKE | 0.9245648658345308 |
 +-------+--------------------+
 [1 rows x 2 columns]
-
+
+

Exporting the Model

-
model_name = 'FakeNews'
+
+
model_name = 'FakeNews'
 coreml_model_name = model_name + '.mlmodel'
 exportedModel = model.export_coreml(coreml_model_name)
-
+
+

Note: To download files from Google Colab, simply click on the files section in the sidebar, right click on filename and then click on download

@@ -162,7 +188,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i

We define our bag of words function

-
func bow(text: String) -> [String: Double] {
+
+
func bow(text: String) -> [String: Double] {
         var bagOfWords = [String: Double]()
 
         let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0)
@@ -181,22 +208,26 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i
 
         return bagOfWords
     }
-
+
+

We also declare our variables

-
@State private var title: String = ""
+
+
@State private var title: String = ""
 @State private var headline: String = ""
 @State private var alertTitle = ""
 @State private var alertText = ""
 @State private var showingAlert = false
-
+
+

Finally, we implement a simple function which reads the two text fields, creates their bag of words representation and displays an alert with the appropriate result

Complete Code

-
import SwiftUI
+
+
import SwiftUI
 
 struct ContentView: View {
     @State private var title: String = ""
@@ -271,7 +302,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i
         ContentView()
     }
 }
-
+
+
diff --git a/docs/posts/2020-01-14-Converting-between-PIL-NumPy.html b/docs/posts/2020-01-14-Converting-between-PIL-NumPy.html index 293da91..1db31be 100644 --- a/docs/posts/2020-01-14-Converting-between-PIL-NumPy.html +++ b/docs/posts/2020-01-14-Converting-between-PIL-NumPy.html @@ -43,7 +43,8 @@

Converting between image and NumPy array

-
import numpy
+
+
import numpy
 import PIL
 
 # Convert PIL Image to NumPy array
@@ -52,16 +53,19 @@
 
 # Convert array to Image
 img = PIL.Image.fromarray(arr)
-
+
+

Saving an Image

-
try:
+
+
try:
     img.save(destination, "JPEG", quality=80, optimize=True, progressive=True)
 except IOError:
     PIL.ImageFile.MAXBLOCK = img.size[0] * img.size[1]
     img.save(destination, "JPEG", quality=80, optimize=True, progressive=True)
-
+
+
diff --git a/docs/posts/2020-01-15-Setting-up-Kaggle-to-use-with-Colab.html b/docs/posts/2020-01-15-Setting-up-Kaggle-to-use-with-Colab.html index 9a7faef..d1c88d4 100644 --- a/docs/posts/2020-01-15-Setting-up-Kaggle-to-use-with-Colab.html +++ b/docs/posts/2020-01-15-Setting-up-Kaggle-to-use-with-Colab.html @@ -69,17 +69,21 @@

Mounting Google Drive

-
import os
+
+
import os
 from google.colab import drive
 drive.mount('/content/drive')
-
+
+

After this click on the URL in the output section, login and then paste the Auth Code

Configuring Kaggle

-
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
-
+
+
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
+
+

Voila! You can now download Kaggle datasets

diff --git a/docs/posts/2020-01-16-Image-Classifier-Using-Turicreate.html b/docs/posts/2020-01-16-Image-Classifier-Using-Turicreate.html index 4235b29..5056a82 100644 --- a/docs/posts/2020-01-16-Image-Classifier-Using-Turicreate.html +++ b/docs/posts/2020-01-16-Image-Classifier-Using-Turicreate.html @@ -49,31 +49,40 @@

Mounting Google Drive

-
import os
+
+
import os
 from google.colab import drive
 drive.mount('/content/drive')
-
+
+

Downloading Dataset from Kaggle

-
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
+
+
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
 !kaggle datasets download ashutosh69/fire-and-smoke-dataset
 !unzip "fire-and-smoke-dataset.zip"
-
+
+

Pre-Processing

-
!mkdir default smoke fire
-
+
+
!mkdir default smoke fire
+
+

\

-
!ls data/data/img_data/train/default/*.jpg
-
+
+
!ls data/data/img_data/train/default/*.jpg
+
+

\

-
img_1002.jpg   img_20.jpg     img_519.jpg     img_604.jpg       img_80.jpg
+
+
img_1002.jpg   img_20.jpg     img_519.jpg     img_604.jpg       img_80.jpg
 img_1003.jpg   img_21.jpg     img_51.jpg     img_60.jpg       img_8.jpg
 img_1007.jpg   img_22.jpg     img_520.jpg     img_61.jpg       img_900.jpg
 img_100.jpg    img_23.jpg     img_521.jpg    'img_62 (2).jpg'   img_920.jpg
@@ -106,11 +115,13 @@
 img_204.jpg    img_501.jpg    img_601.jpg     img_78.jpg
 img_205.jpg    img_502.jpg    img_602.jpg     img_79.jpg
 img_206.jpg    img_50.jpg     img_603.jpg     img_7.jpg
-
+
+

The image files are not actually JPEG, thus we first need to save them in the correct format for Turicreate

-
from PIL import Image
+
+
from PIL import Image
 import glob
 
 
@@ -127,26 +138,32 @@
     rgb_im = im.convert('RGB')
     rgb_im.save((folder + "/" + str(n) + ".jpg"), quality=100)
     n +=1
-
+
+

\

-
!mkdir train
+
+
!mkdir train
 !mv default ./train
 !mv smoke ./train
 !mv fire ./train
-
+
+

Making the Image Classifier

Making an SFrame

-
!pip install turicreate
-
+
+
!pip install turicreate
+
+

\

-
import turicreate as tc
+
+
import turicreate as tc
 import os
 
 data = tc.image_analysis.load_images("./train", with_path=True)
@@ -156,11 +173,13 @@
 print(data)
 
 data.save('fire-smoke.sframe')
-
+
+

\

-
+-------------------------+------------------------+
+
+
+-------------------------+------------------------+
 |           path          |         image          |
 +-------------------------+------------------------+
 |  ./train/default/1.jpg  | Height: 224 Width: 224 |
@@ -194,11 +213,13 @@
 [2028 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
-
+
+

Making the Model

-
import turicreate as tc
+
+
import turicreate as tc
 
 # Load the data
 data =  tc.SFrame('fire-smoke.sframe')
@@ -221,11 +242,13 @@
 
 # Export for use in Core ML
 model.export_coreml('fire-smoke.mlmodel')
-
+
+

\

-
Performing feature extraction on resized images...
+
+
Performing feature extraction on resized images...
 Completed   64/1633
 Completed  128/1633
 Completed  192/1633
@@ -283,7 +306,8 @@
 Completed 384/395
 Completed 395/395
 0.9316455696202531
-
+
+

We just got an accuracy of 94% on Training Data and 97% on Validation Data!

diff --git a/docs/posts/2020-07-01-Install-rdkit-colab.html b/docs/posts/2020-07-01-Install-rdkit-colab.html index 56e2f21..4b5c4e7 100644 --- a/docs/posts/2020-07-01-Install-rdkit-colab.html +++ b/docs/posts/2020-07-01-Install-rdkit-colab.html @@ -55,7 +55,8 @@

Just copy and paste this in a Colab cell and it will install it 👍

-
import sys
+
+
import sys
 import os
 import requests
 import subprocess
@@ -78,7 +79,8 @@
         force=False):
     """install rdkit from miniconda
    
-
+
+
import rdkit_installer
 rdkit_installer.install()
diff --git a/docs/posts/2020-08-01-Natural-Feature-Tracking-ARJS.html b/docs/posts/2020-08-01-Natural-Feature-Tracking-ARJS.html
index 6b28206..560996e 100644
--- a/docs/posts/2020-08-01-Natural-Feature-Tracking-ARJS.html
+++ b/docs/posts/2020-08-01-Natural-Feature-Tracking-ARJS.html
@@ -184,7 +184,8 @@ me.fset  me.fset3 me.iset
 
 

Create a new file called index.html in your project folder. This is the basic template we are going to use. Replace me with the root filename of your image, for example NeverGonnaGiveYouUp.png will become NeverGonnaGiveYouUp. Make sure you have copied all three files from the output folder in the previous step to the root of your project folder.

-
<script src="https://cdn.jsdelivr.net/gh/aframevr/aframe@1c2407b26c61958baa93967b5412487cd94b290b/dist/aframe-master.min.js"></script>
+
+
<script src="https://cdn.jsdelivr.net/gh/aframevr/aframe@1c2407b26c61958baa93967b5412487cd94b290b/dist/aframe-master.min.js"></script>
 <script src="https://raw.githack.com/AR-js-org/AR.js/master/aframe/build/aframe-ar-nft.js"></script>
 
 <style>
@@ -231,7 +232,8 @@ me.fset  me.fset3 me.iset
     <a-entity camera></a-entity>
   </a-scene>
 </body>
-
+
+

In this we are creating a AFrame scene and we are telling it that we want to use NFT Tracking. The amazing part about using AFrame is that we are able to use all AFrame objects!

@@ -239,10 +241,12 @@ me.fset me.fset3 me.iset

Let us add a simple box!

-
<a-nft .....>
+
+
<a-nft .....>
     <a-box position='100 0.5 -180' material='opacity: 0.5; side: double' scale="100 100 100"></a-box>
 </a-nft>
-
+
+

Now to test it out we will need to create a simple server, I use Python's inbuilt SimpleHTTPServer alongside ngrok

@@ -277,12 +281,14 @@ Serving HTTP on 0.0.0.0 port 8000 ...

Edit your index.html

-
<a-nft ..>
+
+
<a-nft ..>
     <a-box ..>
         <a-torus-knot radius='0.26' radius-tubular='0.05' ></a-torus-knot>
     </ a-box>
 </ a-nft>
-
+
+

@@ -298,9 +304,11 @@ Serving HTTP on 0.0.0.0 port 8000 ...

Change the box's material to add the GIF shader

-
...
+
+
...
 <a-box position='100 0.5 -180' material="shader:gif;src:url(https://media.tenor.com/images/412b1aa9149d98d561df62db221e0789/tenor.gif);opacity:.5" .....>
-
+
+

diff --git a/docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html b/docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html index 06951dc..d99f7b8 100644 --- a/docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html +++ b/docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html @@ -43,7 +43,7 @@

Trying Different Camera Setups

-
    +
    1. Animated Overlays
    2. Using a modern camera as your webcam
    3. Using your phone's camera as your webcam
    4. diff --git a/docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html b/docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html index f8e7b6c..fdde2b8 100644 --- a/docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html +++ b/docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html @@ -47,13 +47,17 @@

      Dependencies

      -
      sudo apt update && sudo apt install certbot -y
      -
      +
      +
      sudo apt update && sudo apt install certbot -y
      +
      +

      Get the Certificate

      -
      sudo certbot certonly --manual --preferred-challenges dns-01 --email senpai@email.com -d mydomain.duckdns.org
      -
      +
      +
      sudo certbot certonly --manual --preferred-challenges dns-01 --email senpai@email.com -d mydomain.duckdns.org
      +
      +

      After you accept that you are okay with you IP address being logged, it will prompt you with updating your dns record. You need to create a new TXT record in the DNS settings for your domain.

      @@ -66,7 +70,8 @@

      You can check if the TXT records have been updated by using the dig command:

      -
      dig navanspi.duckdns.org TXT
      +
      +
      dig navanspi.duckdns.org TXT
       ; <<>> DiG 9.16.1-Ubuntu <<>> navanspi.duckdns.org TXT
       ;; global options: +cmd
       ;; Got answer:
      @@ -85,7 +90,8 @@ navanspi.duckdns.org.    60    IN    TXT    ;; SERVER: 127.0.0.53#53(127.0.0.53)
       ;; WHEN: Tue Nov 17 15:23:15 IST 2020
       ;; MSG SIZE  rcvd: 105
      -
      +
      +

      DuckDNS almost instantly propagates the changes but for other domain hosts, it could take a while.

      @@ -99,13 +105,17 @@ navanspi.duckdns.org. 60 IN TXT
      gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589
      -
      +
      +
      gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589
      +
      +

      To use the certificate with it, simply copy the cert.pem and privkey.pem to your working directory ( change the appropriate permissions ) and include them in the command

      -
      gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589 --certfile=cert.pem --keyfile=privkey.pem
      -
      +
      +
      gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589 --certfile=cert.pem --keyfile=privkey.pem
      +
      +

      Caveats with copying the certificate: If you renew the certificate you will have to re-copy the files

      diff --git a/docs/posts/2020-12-1-HTML-JS-RSS-Feed.html b/docs/posts/2020-12-1-HTML-JS-RSS-Feed.html index 4fdb015..8acc446 100644 --- a/docs/posts/2020-12-1-HTML-JS-RSS-Feed.html +++ b/docs/posts/2020-12-1-HTML-JS-RSS-Feed.html @@ -45,7 +45,8 @@

      If you want to directly open the HTML file in your browser after saving, don't forget to set CORS_PROXY=""

      -
      <!doctype html>
      +
      +
      <!doctype html>
       <html lang="en">
       <head>
         <meta charset="utf-8">
      @@ -240,7 +241,8 @@
       </script>
       <noscript>Uh Oh! Your browser does not support JavaScript or JavaScript is currently disabled. Please enable JavaScript or switch to a different browser.</noscript>
       </body></html>
      -
      +
      +
      diff --git a/docs/posts/2021-06-25-Blog2Twitter-P1.html b/docs/posts/2021-06-25-Blog2Twitter-P1.html index ada9666..62233ab 100644 --- a/docs/posts/2021-06-25-Blog2Twitter-P1.html +++ b/docs/posts/2021-06-25-Blog2Twitter-P1.html @@ -57,7 +57,8 @@ I am not handling lists or images right now.

      pip install tweepy

      -
      import os
      +
      +
      import os
       import tweepy
       
       consumer_key = os.environ["consumer_key"]
      @@ -70,13 +71,15 @@ I am not handling lists or images right now.

      auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) -
      +
      +

      The program need to convert the blog post into text fragments.

      It reads the markdown file, removes the top YAML content, checks for headers and splits the content.

      -
      tweets = []
      +
      +
      tweets = []
       
       first___n = 0
       
      @@ -103,13 +106,15 @@ I am not handling lists or images right now.

      print("ERROR") else: tweets.append(line) -
      +
      +

      Every status update using tweepy has an id attached to it, for the next tweet in the thread, it adds that ID while calling the function.

      For every tweet fragment, it also appends 1/n.

      -
      for idx, tweet in enumerate(tweets):
      +
      +
      for idx, tweet in enumerate(tweets):
           tweet += " {}/{}".format(idx+1,len(tweets))
           if idx == 0:
               a = None
      @@ -118,12 +123,15 @@ I am not handling lists or images right now.

      a = api.update_status(tweet,in_reply_to_status_id=a.id) print(len(tweet),end=" ") print("{}/{}\n".format(idx+1,len(tweets))) -
      +
      +

      Finally, it replies to the last tweet in the thread with the link of the post.

      -
      api.update_status("Web Version: {}".format(post_link))
      -
      +
      +
      api.update_status("Web Version: {}".format(post_link))
      +
      +

      Result

      diff --git a/docs/posts/2021-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html b/docs/posts/2021-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html index 0b307fd..cdae911 100644 --- a/docs/posts/2021-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html +++ b/docs/posts/2021-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html @@ -89,7 +89,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a

      Screenshot of Sample Dataset

      -
      [
      +
      +
      [
           {
               "tokens": ["Tell","me","about","the","drug","Aspirin","."],
               "labels": ["NONE","NONE","NONE","NONE","NONE","COMPOUND","NONE"]
      @@ -103,7 +104,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a
               "labels": ["NONE","NONE","NONE","NONE","COMPOUND","NONE","NONE"]
           }
       ]
      -
      +
      +

      Screenshot of Create ML Text Classifier

      @@ -113,7 +115,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a

      Screenshot

      -
      import CoreML
      +
      +
      import CoreML
       import NaturalLanguage
       
       let mlModelClassifier = try IntentDetection_1(configuration:  MLModelConfiguration()).model
      @@ -124,7 +127,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a
       
       let tagger = NLTagger(tagSchemes: [.nameType, NLTagScheme("Apple")])
       tagger.setModels([tagPredictor], forTagScheme: NLTagScheme("Apple"))
      -
      +
      +

      Now, we define a simple structure which the custom function(s) can use to access the provided input. It can also be used to hold additional variables. @@ -134,7 +138,8 @@ The latter can be replaced with a function which asks the user for the input. Screenshot

      -
      struct User {
      +
      +
      struct User {
           static var message = ""
       }
       
      @@ -158,14 +163,16 @@ The latter can be replaced with a function which asks the user for the input. }
       
       }
      -
      +
      +

      Sometimes, no action needs to be performed, and the bot can use a predefined set of responses. Otherwise, if an action is required, it can call the custom action.

      Screenshot

      -
      let defaultResponses = [
      +
      +
      let defaultResponses = [
           "greetings": "Hello",
           "banter": "no, plix no"
       ]
      @@ -173,14 +180,16 @@ Otherwise, if an action is required, it can call the custom action.

      let customActions = [ "deez-drug": customAction ] -
      +
      +

      In the sample input, the program is updating the User.message and checking if it has a default response. Otherwise, it calls the custom action.

      Screenshot

      -
      let sampleMessages = [
      +
      +
      let sampleMessages = [
           "Hey there, how is it going",
           "hello, there",
           "Who let the dogs out",
      @@ -200,7 +209,8 @@ Otherwise, it calls the custom action.

      print(customActions[prediction!]!()) } } -
      +
      +

      Output

      diff --git a/docs/posts/2022-05-21-Similar-Movies-Recommender.html b/docs/posts/2022-05-21-Similar-Movies-Recommender.html index 5d2d6fe..f45b45e 100644 --- a/docs/posts/2022-05-21-Similar-Movies-Recommender.html +++ b/docs/posts/2022-05-21-Similar-Movies-Recommender.html @@ -63,7 +63,8 @@

      First, I needed to check the total number of records in Trakt’s database.

      -
      import requests
      +
      +
      import requests
       import os
       
       trakt_id = os.getenv("TRAKT_ID")
      @@ -87,14 +88,16 @@
       res = requests.get(f"{api_base}/search/movie",headers=headers,params=params)
       total_items = res.headers["x-pagination-item-count"]
       print(f"There are {total_items} movies")
      -
      +
      +
      There are 333946 movies
       

      First, I needed to declare the database schema in (database.py):

      -
      import sqlalchemy
      +
      +
      import sqlalchemy
       from sqlalchemy import create_engine
       from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, PickleType
       from sqlalchemy import insert
      @@ -129,13 +132,15 @@
           meta.create_all(engine)
           Session = sessionmaker(bind=engine)
           return engine, Session
      -
      +
      +

      In the end, I could have dropped the embeddings field from the table schema as I never got around to using it.

      Scripting Time

      -
      from database import *
      +
      +
      from database import *
       from tqdm import tqdm
       import requests
       import os
      @@ -228,7 +233,8 @@
                       except IntegrityError:
                           trans.rollback()
           req_count += 1
      -
      +
      +

      (Note: I was well within the rate-limit so I did not have to slow down or implement any other measures)

      @@ -263,7 +269,8 @@ As of writing this post, I did not include any other database except Trakt.

    5. Installing the Python module (pinecone-client)

    6. -
      import pandas as pd
      +
      +
      import pandas as pd
       import pinecone
       from sentence_transformers import SentenceTransformer
       from tqdm import tqdm 
      @@ -293,7 +300,8 @@ As of writing this post, I did not include any other database except Trakt. 

      str(value), embeddings[idx].tolist() )) index.upsert(to_send) -
      +
      +

      That's it!

      @@ -304,7 +312,8 @@ As of writing this post, I did not include any other database except Trakt.

      To find similar items, we will first have to map the name of the movie to its trakt_id, get the embeddings we have for that id and then perform a similarity search. It is possible that this additional step of mapping could be avoided by storing information as metadata in the index.

      -
      def get_trakt_id(df, title: str):
      +
      +
      def get_trakt_id(df, title: str):
         rec = df[df["title"].str.lower()==movie_name.lower()]
         if len(rec.trakt_id.values.tolist()) > 1:
           print(f"multiple values found... {len(rec.trakt_id.values)}")
      @@ -344,11 +353,13 @@ It is possible that this additional step of mapping could be avoided by storing
             "runtime": df.runtime.values[0],
             "year": df.year.values[0]
         }
      -
      +
      +

      Testing it Out

      -
      movie_name = "Now You See Me"
      +
      +
      movie_name = "Now You See Me"
       
       movie_trakt_id = get_trakt_id(df, movie_name)
       print(movie_trakt_id)
      @@ -360,7 +371,8 @@ It is possible that this additional step of mapping could be avoided by storing
       for trakt_id in movie_ids:
         deets = get_deets_by_trakt_id(df, trakt_id)
         print(f"{deets['title']} ({deets['year']}): {deets['overview']}")
      -
      +
      +

      Output:

      diff --git a/docs/posts/2022-11-07-a-new-method-to-blog.html b/docs/posts/2022-11-07-a-new-method-to-blog.html new file mode 100644 index 0000000..aa209b2 --- /dev/null +++ b/docs/posts/2022-11-07-a-new-method-to-blog.html @@ -0,0 +1,90 @@ + + + + + + + + + Hey - Post - A new method to blog + + + + + + + + + + + + + + + + + + + + + + + + + +
      +

      A new method to blog

      + +

      Paper Website is a service that lets you build a website with just pen and paper. I am going to try and replicate the process.

      + +

      The Plan

      + +

      The continuity feature on macOS + iOS lets you scan PDFs directly from your iPhone. I want to be able to scan these pages and automatically run an Automator script that takes the PDF and OCRs the text. Then I can further clean the text and convert from markdown.

      + +

      Challenges

      + +

      I quickly realised that the OCR software I planned on using could not detect my shitty handwriting accurately. I tried using ABBY Finereader, Prizmo and OCRMyPDF. (Abby Finereader and Prizmo support being automated by Automator).

      + +

      Now, I could either write neater, or use an external API like Microsoft Azure

      + +

      Solution

      + +

      OCR

      + +

      In the PDFs, all the scans are saved as images on a page. I extract the image and then send it to Azure's API.

      + +

      Paragraph Breaks

      + +

      The recognised text had multiple lines breaking in the middle of the sentence, Therefore, I use what is called a pilcrow to specify paragraph breaks. But, rather than trying to draw the normal pilcrow, I just use the HTML entity &#182; which is the pilcrow character.

      + +

      Where is the code?

      + +

      I created a GitHub Gist for a sample Python script to take the PDF and print the text

      + +

      A more complete version with Auomator scripts and an entire publishing pipeline will be available as a GitHub and Gitea repo soon.

      + +

      * In Part 2, I will discuss some more features *

      + + +
      + +
      + +
      + + + + + + \ No newline at end of file diff --git a/docs/posts/index.html b/docs/posts/index.html index 1698150..f4fab83 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -50,6 +50,21 @@
        +
      • A new method to blog
      • +
          +
        • Writing posts in markdown using pen and paper
        • +
        • Published On: 2022-11-07 23:29
        • +
        • Tags: + + Python, + + OCR, + + Microsoft Azure, + +
        + +
      • Why You No Host?
        • Why you should self-host with YunoHost
        • -- cgit v1.2.3