From d75527f7eecc4e2fcdd18ab157412506717c8adb Mon Sep 17 00:00:00 2001 From: navanchauhan Date: Mon, 7 Nov 2022 23:36:11 -0700 Subject: add blog post --- Content/posts/2022-11-07-a-new-method-to-blog.md | 34 + docs/assets/pdfs/2022-11-07-a-new-way-to-blog.pdf | Bin 0 -> 2087354 bytes docs/feed.rss | 706 ++++++++++++++------- docs/index.html | 15 + .../2019-12-08-Image-Classifier-Tensorflow.html | 60 +- docs/posts/2019-12-08-Splitting-Zips.html | 18 +- .../2019-12-10-TensorFlow-Model-Prediction.html | 42 +- ...019-12-16-TensorFlow-Polynomial-Regression.html | 186 ++++-- docs/posts/2019-12-22-Fake-News-Detector.html | 96 ++- .../2020-01-14-Converting-between-PIL-NumPy.html | 12 +- ...-01-15-Setting-up-Kaggle-to-use-with-Colab.html | 12 +- ...20-01-16-Image-Classifier-Using-Turicreate.html | 72 ++- docs/posts/2020-07-01-Install-rdkit-colab.html | 6 +- .../2020-08-01-Natural-Feature-Tracking-ARJS.html | 24 +- docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html | 2 +- docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html | 30 +- docs/posts/2020-12-1-HTML-JS-RSS-Feed.html | 6 +- docs/posts/2021-06-25-Blog2Twitter-P1.html | 24 +- ...21-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html | 30 +- .../2022-05-21-Similar-Movies-Recommender.html | 36 +- docs/posts/2022-11-07-a-new-method-to-blog.html | 90 +++ docs/posts/index.html | 15 + poetry.lock | 17 +- 23 files changed, 1087 insertions(+), 446 deletions(-) create mode 100644 Content/posts/2022-11-07-a-new-method-to-blog.md create mode 100644 docs/assets/pdfs/2022-11-07-a-new-way-to-blog.pdf create mode 100644 docs/posts/2022-11-07-a-new-method-to-blog.html diff --git a/Content/posts/2022-11-07-a-new-method-to-blog.md b/Content/posts/2022-11-07-a-new-method-to-blog.md new file mode 100644 index 0000000..d419a0f --- /dev/null +++ b/Content/posts/2022-11-07-a-new-method-to-blog.md @@ -0,0 +1,34 @@ +--- +date: 2022-11-07 23:29 +description: Writing posts in markdown using pen and paper +tags: Python, OCR, Microsoft Azure +--- + +# A new method to blog + +[Paper Website](https://paperwebsite.com) is a service that lets you build a website with just pen and paper. I am going to try and replicate the process. + +## The Plan +The continuity feature on macOS + iOS lets you scan PDFs directly from your iPhone. I want to be able to scan these pages and automatically run an Automator script that takes the PDF and OCRs the text. Then I can further clean the text and convert from markdown. + +## Challenges + +I quickly realised that the OCR software I planned on using could not detect my shitty handwriting accurately. I tried using ABBY Finereader, Prizmo and OCRMyPDF. (Abby Finereader and Prizmo support being automated by Automator). + +Now, I could either write neater, or use an external API like Microsoft Azure + +## Solution + +### OCR + +In the PDFs, all the scans are saved as images on a page. I extract the image and then send it to Azure's API. + +### Paragraph Breaks +The recognised text had multiple lines breaking in the middle of the sentence, Therefore, I use what is called a [pilcrow](https://en.wikipedia.org/wiki/Pilcrow) to specify paragraph breaks. But, rather than trying to draw the normal pilcrow, I just use the HTML entity `¶` which is the pilcrow character. + +## Where is the code? +I created a [GitHub Gist](https://gist.github.com/navanchauhan/5fc602b1e023b60a66bc63bd4eecd4f8) for a sample Python script to take the PDF and print the text + +A more complete version with Auomator scripts and an entire publishing pipeline will be available as a GitHub and Gitea repo soon. + +** In Part 2, I will discuss some more features ** diff --git a/docs/assets/pdfs/2022-11-07-a-new-way-to-blog.pdf b/docs/assets/pdfs/2022-11-07-a-new-way-to-blog.pdf new file mode 100644 index 0000000..00d563d Binary files /dev/null and b/docs/assets/pdfs/2022-11-07-a-new-way-to-blog.pdf differ diff --git a/docs/feed.rss b/docs/feed.rss index 1cb2662..17ea0a5 100644 --- a/docs/feed.rss +++ b/docs/feed.rss @@ -4,8 +4,8 @@ Navan's Archive Rare Tips, Tricks and Posts https://web.navan.dev/en - Sun, 07 Aug 2022 22:53:57 -0000 - Sun, 07 Aug 2022 22:53:57 -0000 + Mon, 07 Nov 2022 23:34:29 -0000 + Mon, 07 Nov 2022 23:34:29 -0000 250 @@ -83,7 +83,8 @@ I am not handling lists or images right now.

pip install tweepy

-
import os
+
+
import os
 import tweepy
 
 consumer_key = os.environ["consumer_key"]
@@ -96,13 +97,15 @@ I am not handling lists or images right now.

auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) -
+
+

The program need to convert the blog post into text fragments.

It reads the markdown file, removes the top YAML content, checks for headers and splits the content.

-
tweets = []
+
+
tweets = []
 
 first___n = 0
 
@@ -129,13 +132,15 @@ I am not handling lists or images right now.

print("ERROR") else: tweets.append(line) -
+
+

Every status update using tweepy has an id attached to it, for the next tweet in the thread, it adds that ID while calling the function.

For every tweet fragment, it also appends 1/n.

-
for idx, tweet in enumerate(tweets):
+
+
for idx, tweet in enumerate(tweets):
     tweet += " {}/{}".format(idx+1,len(tweets))
     if idx == 0:
         a = None
@@ -144,12 +149,15 @@ I am not handling lists or images right now.

a = api.update_status(tweet,in_reply_to_status_id=a.id) print(len(tweet),end=" ") print("{}/{}\n".format(idx+1,len(tweets))) -
+
+

Finally, it replies to the last tweet in the thread with the link of the post.

-
api.update_status("Web Version: {}".format(post_link))
-
+
+
api.update_status("Web Version: {}".format(post_link))
+
+

Result

@@ -244,17 +252,21 @@ I actually added the code to this post after running the program.

Mounting Google Drive

-
import os
+
+
import os
 from google.colab import drive
 drive.mount('/content/drive')
-
+
+

After this click on the URL in the output section, login and then paste the Auth Code

Configuring Kaggle

-
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
-
+
+
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
+
+

Voila! You can now download Kaggle datasets

]]> @@ -278,7 +290,8 @@ I actually added the code to this post after running the program.

Imports

-
%tensorflow_version 2.x #This is for telling Colab that you want to use TF 2.0, ignore if running on local machine
+
+
%tensorflow_version 2.x #This is for telling Colab that you want to use TF 2.0, ignore if running on local machine
 
 from PIL import Image # We use the PIL Library to resize images
 import numpy as np
@@ -290,21 +303,25 @@ I actually added the code to this post after running the program.

import matplotlib.pyplot as plt from keras.models import Sequential from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout -
+
+

Dataset

Fetching the Data

-
!wget ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
+
+
!wget ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
 !unzip cell_images.zip
-
+
+

Processing the Data

We resize all the images as 50x50 and add the numpy array of that image as well as their label names (Infected or Not) to common arrays.

-
data = []
+
+
data = []
 labels = []
 
 Parasitized = os.listdir("./cell_images/Parasitized/")
@@ -328,15 +345,18 @@ I actually added the code to this post after running the program.

labels.append(1) except AttributeError: print("") -
+
+

Splitting Data

-
df = np.array(data)
+
+
df = np.array(data)
 labels = np.array(labels)
 (X_train, X_test) = df[(int)(0.1*len(df)):],df[:(int)(0.1*len(df))]
 (y_train, y_test) = labels[(int)(0.1*len(labels)):],labels[:(int)(0.1*len(labels))]
-
+
+
s=np.arange(X_train.shape[0])
 np.random.shuffle(s)
@@ -353,7 +373,8 @@ X_train = X_train/255.0
 
 

Note: The input shape for the first layer is 50,50 which corresponds with the sizes of the resized images

-
model = models.Sequential()
+
+
model = models.Sequential()
 model.add(layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(50,50,3)))
 model.add(layers.MaxPooling2D(pool_size=2))
 model.add(layers.Conv2D(filters=32,kernel_size=2,padding='same',activation='relu'))
@@ -366,25 +387,31 @@ X_train = X_train/255.0
 model.add(layers.Dropout(0.2))
 model.add(layers.Dense(2,activation="softmax"))#2 represent output layer neurons 
 model.summary()
-
+
+

Compiling Model

We use the Adam optimiser as it is an adaptive learning rate optimisation algorithm that's been designed specifically for training deep neural networks, which means it changes its learning rate automatically to get the best results

-
model.compile(optimizer="adam",
+
+
model.compile(optimizer="adam",
               loss="sparse_categorical_crossentropy", 
              metrics=["accuracy"])
-
+
+

Training Model

We train the model for 10 epochs on the training data and then validate it using the testing data

-
history = model.fit(X_train,y_train, epochs=10, validation_data=(X_test,y_test))
-
+
+
history = model.fit(X_train,y_train, epochs=10, validation_data=(X_test,y_test))
+
+
-
Train on 24803 samples, validate on 2755 samples
+
+
Train on 24803 samples, validate on 2755 samples
 Epoch 1/10
 24803/24803 [==============================] - 57s 2ms/sample - loss: 0.0786 - accuracy: 0.9729 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
 Epoch 2/10
@@ -405,11 +432,13 @@ X_train = X_train/255.0
 24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0352 - accuracy: 0.9878 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
 Epoch 10/10
 24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0373 - accuracy: 0.9865 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
-
+
+

Results

-
accuracy = history.history['accuracy'][-1]*100
+
+
accuracy = history.history['accuracy'][-1]*100
 loss = history.history['loss'][-1]*100
 val_accuracy = history.history['val_accuracy'][-1]*100
 val_loss = history.history['val_loss'][-1]*100
@@ -420,13 +449,16 @@ X_train = X_train/255.0
     '\nValidation Accuracy:', val_accuracy,
     '\nValidation Loss:', val_loss
 )
-
+
+
-
Accuracy: 98.64532351493835 
+
+
Accuracy: 98.64532351493835 
 Loss: 3.732407123270176 
 Validation Accuracy: 100.0 
 Validation Loss: 0.0
-
+
+

We have achieved 98% Accuracy!

@@ -599,7 +631,8 @@ export BABEL_LIBDIR="/usr/lib/openbabel/3.1.0"

First, I needed to check the total number of records in Trakt’s database.

-
import requests
+
+
import requests
 import os
 
 trakt_id = os.getenv("TRAKT_ID")
@@ -623,14 +656,16 @@ export BABEL_LIBDIR="/usr/lib/openbabel/3.1.0"
 res = requests.get(f"{api_base}/search/movie",headers=headers,params=params)
 total_items = res.headers["x-pagination-item-count"]
 print(f"There are {total_items} movies")
-
+
+
There are 333946 movies
 

First, I needed to declare the database schema in (database.py):

-
import sqlalchemy
+
+
import sqlalchemy
 from sqlalchemy import create_engine
 from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, PickleType
 from sqlalchemy import insert
@@ -665,13 +700,15 @@ export BABEL_LIBDIR="/usr/lib/openbabel/3.1.0"
     meta.create_all(engine)
     Session = sessionmaker(bind=engine)
     return engine, Session
-
+
+

In the end, I could have dropped the embeddings field from the table schema as I never got around to using it.

Scripting Time

-
from database import *
+
+
from database import *
 from tqdm import tqdm
 import requests
 import os
@@ -764,7 +801,8 @@ export BABEL_LIBDIR="/usr/lib/openbabel/3.1.0"
                 except IntegrityError:
                     trans.rollback()
     req_count += 1
-
+
+

(Note: I was well within the rate-limit so I did not have to slow down or implement any other measures)

@@ -799,7 +837,8 @@ As of writing this post, I did not include any other database except Trakt.

  • Installing the Python module (pinecone-client)

  • -
    import pandas as pd
    +
    +
    import pandas as pd
     import pinecone
     from sentence_transformers import SentenceTransformer
     from tqdm import tqdm 
    @@ -829,7 +868,8 @@ As of writing this post, I did not include any other database except Trakt. 

    str(value), embeddings[idx].tolist() )) index.upsert(to_send) -
    +
    +

    That's it!

    @@ -840,7 +880,8 @@ As of writing this post, I did not include any other database except Trakt.

    To find similar items, we will first have to map the name of the movie to its trakt_id, get the embeddings we have for that id and then perform a similarity search. It is possible that this additional step of mapping could be avoided by storing information as metadata in the index.

    -
    def get_trakt_id(df, title: str):
    +
    +
    def get_trakt_id(df, title: str):
       rec = df[df["title"].str.lower()==movie_name.lower()]
       if len(rec.trakt_id.values.tolist()) > 1:
         print(f"multiple values found... {len(rec.trakt_id.values)}")
    @@ -880,11 +921,13 @@ It is possible that this additional step of mapping could be avoided by storing
           "runtime": df.runtime.values[0],
           "year": df.year.values[0]
       }
    -
    +
    +

    Testing it Out

    -
    movie_name = "Now You See Me"
    +
    +
    movie_name = "Now You See Me"
     
     movie_trakt_id = get_trakt_id(df, movie_name)
     print(movie_trakt_id)
    @@ -896,7 +939,8 @@ It is possible that this additional step of mapping could be avoided by storing
     for trakt_id in movie_ids:
       deets = get_deets_by_trakt_id(df, trakt_id)
       print(f"{deets['title']} ({deets['year']}): {deets['overview']}")
    -
    +
    +

    Output:

    @@ -1128,7 +1172,8 @@ me.fset me.fset3 me.iset

    Create a new file called index.html in your project folder. This is the basic template we are going to use. Replace me with the root filename of your image, for example NeverGonnaGiveYouUp.png will become NeverGonnaGiveYouUp. Make sure you have copied all three files from the output folder in the previous step to the root of your project folder.

    -
    <script src="https://cdn.jsdelivr.net/gh/aframevr/aframe@1c2407b26c61958baa93967b5412487cd94b290b/dist/aframe-master.min.js"></script>
    +
    +
    <script src="https://cdn.jsdelivr.net/gh/aframevr/aframe@1c2407b26c61958baa93967b5412487cd94b290b/dist/aframe-master.min.js"></script>
     <script src="https://raw.githack.com/AR-js-org/AR.js/master/aframe/build/aframe-ar-nft.js"></script>
     
     <style>
    @@ -1175,7 +1220,8 @@ me.fset  me.fset3 me.iset
         <a-entity camera></a-entity>
       </a-scene>
     </body>
    -
    +
    +

    In this we are creating a AFrame scene and we are telling it that we want to use NFT Tracking. The amazing part about using AFrame is that we are able to use all AFrame objects!

    @@ -1183,10 +1229,12 @@ me.fset me.fset3 me.iset

    Let us add a simple box!

    -
    <a-nft .....>
    +
    +
    <a-nft .....>
         <a-box position='100 0.5 -180' material='opacity: 0.5; side: double' scale="100 100 100"></a-box>
     </a-nft>
    -
    +
    +

    Now to test it out we will need to create a simple server, I use Python's inbuilt SimpleHTTPServer alongside ngrok

    @@ -1221,12 +1269,14 @@ Serving HTTP on 0.0.0.0 port 8000 ...

    Edit your index.html

    -
    <a-nft ..>
    +
    +
    <a-nft ..>
         <a-box ..>
             <a-torus-knot radius='0.26' radius-tubular='0.05' ></a-torus-knot>
         </ a-box>
     </ a-nft>
    -
    +
    +

    @@ -1242,9 +1292,11 @@ Serving HTTP on 0.0.0.0 port 8000 ...

    Change the box's material to add the GIF shader

    -
    ...
    +
    +
    ...
     <a-box position='100 0.5 -180' material="shader:gif;src:url(https://media.tenor.com/images/412b1aa9149d98d561df62db221e0789/tenor.gif);opacity:.5" .....>
    -
    +
    +

    @@ -1264,6 +1316,52 @@ Serving HTTP on 0.0.0.0 port 8000 ... ]]> + + + https://web.navan.dev/posts/2022-11-07-a-new-method-to-blog.html + + + A new method to blog + + + Writing posts in markdown using pen and paper + + https://web.navan.dev/posts/2022-11-07-a-new-method-to-blog.html + Mon, 07 Nov 2022 23:29:00 -0000 + A new method to blog + +

    Paper Website is a service that lets you build a website with just pen and paper. I am going to try and replicate the process.

    + +

    The Plan

    + +

    The continuity feature on macOS + iOS lets you scan PDFs directly from your iPhone. I want to be able to scan these pages and automatically run an Automator script that takes the PDF and OCRs the text. Then I can further clean the text and convert from markdown.

    + +

    Challenges

    + +

    I quickly realised that the OCR software I planned on using could not detect my shitty handwriting accurately. I tried using ABBY Finereader, Prizmo and OCRMyPDF. (Abby Finereader and Prizmo support being automated by Automator).

    + +

    Now, I could either write neater, or use an external API like Microsoft Azure

    + +

    Solution

    + +

    OCR

    + +

    In the PDFs, all the scans are saved as images on a page. I extract the image and then send it to Azure's API.

    + +

    Paragraph Breaks

    + +

    The recognised text had multiple lines breaking in the middle of the sentence, Therefore, I use what is called a pilcrow to specify paragraph breaks. But, rather than trying to draw the normal pilcrow, I just use the HTML entity &#182; which is the pilcrow character.

    + +

    Where is the code?

    + +

    I created a GitHub Gist for a sample Python script to take the PDF and print the text

    + +

    A more complete version with Auomator scripts and an entire publishing pipeline will be available as a GitHub and Gitea repo soon.

    + +

    * In Part 2, I will discuss some more features *

    +]]>
    +
    + https://web.navan.dev/posts/2020-03-03-Playing-With-Android-TV.html @@ -1391,12 +1489,14 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d

    Imports

    -
    import tensorflow.compat.v1 as tf
    +
    +
    import tensorflow.compat.v1 as tf
     tf.disable_v2_behavior()
     import matplotlib.pyplot as plt
     import numpy as np
     import pandas as pd
    -
    +
    +

    Dataset

    @@ -1408,30 +1508,41 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d

    linspace(lower_limit, upper_limit, no_of_observations)

    -
    x = np.linspace(0, 50, 50)
    +
    +
    x = np.linspace(0, 50, 50)
     y = np.linspace(0, 50, 50)
    -
    +
    +

    We use the following function to add noise to the data, so that our values

    -
    x += np.random.uniform(-4, 4, 50)
    +
    +
    x += np.random.uniform(-4, 4, 50)
     y += np.random.uniform(-4, 4, 50)
    -
    +
    +

    Position vs Salary Dataset

    We will be using https://drive.google.com/file/d/1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9/view (Salary vs Position Dataset)

    -
    !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
    -
    +
    +
    !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
    +
    +
    -
    df = pd.read_csv("data.csv")
    -
    +
    +
    df = pd.read_csv("data.csv")
    +
    +
    -
    df # this gives us a preview of the dataset we are working with
    -
    +
    +
    df # this gives us a preview of the dataset we are working with
    +
    +
    -
    | Position          | Level | Salary  |
    +
    +
    | Position          | Level | Salary  |
     |-------------------|-------|---------|
     | Business Analyst  | 1     | 45000   |
     | Junior Consultant | 2     | 50000   |
    @@ -1443,81 +1554,100 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d
     | Senior Partner    | 8     | 300000  |
     | C-level           | 9     | 500000  |
     | CEO               | 10    | 1000000 |
    -
    +
    +

    We convert the salary column as the ordinate (y-coordinate) and level column as the abscissa

    -
    abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
    +
    +
    abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
     ordinate = df["Salary"].to_list() # ordinate = [45000,50000,60000,80000,110000,150000,200000,300000,500000,1000000]
    -
    +
    +
    -
    n = len(abscissa) # no of observations
    +
    +
    n = len(abscissa) # no of observations
     plt.scatter(abscissa, ordinate)
     plt.ylabel('Salary')
     plt.xlabel('Position')
     plt.title("Salary vs Position")
     plt.show()
    -
    +
    +

    Defining Stuff

    -
    X = tf.placeholder("float")
    +
    +
    X = tf.placeholder("float")
     Y = tf.placeholder("float")
    -
    +
    +

    Defining Variables

    We first define all the coefficients and constant as tensorflow variables having a random initial value

    -
    a = tf.Variable(np.random.randn(), name = "a")
    +
    +
    a = tf.Variable(np.random.randn(), name = "a")
     b = tf.Variable(np.random.randn(), name = "b")
     c = tf.Variable(np.random.randn(), name = "c")
     d = tf.Variable(np.random.randn(), name = "d")
     e = tf.Variable(np.random.randn(), name = "e")
     f = tf.Variable(np.random.randn(), name = "f")
    -
    +
    +

    Model Configuration

    -
    learning_rate = 0.2
    +
    +
    learning_rate = 0.2
     no_of_epochs = 25000
    -
    +
    +

    Equations

    -
    deg1 = a*X + b
    +
    +
    deg1 = a*X + b
     deg2 = a*tf.pow(X,2) + b*X + c
     deg3 = a*tf.pow(X,3) + b*tf.pow(X,2) + c*X + d
     deg4 = a*tf.pow(X,4) + b*tf.pow(X,3) + c*tf.pow(X,2) + d*X + e
     deg5 = a*tf.pow(X,5) + b*tf.pow(X,4) + c*tf.pow(X,3) + d*tf.pow(X,2) + e*X + f
    -
    +
    +

    Cost Function

    We use the Mean Squared Error Function

    -
    mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
    +
    +
    mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
     mse2 = tf.reduce_sum(tf.pow(deg2-Y,2))/(2*n)
     mse3 = tf.reduce_sum(tf.pow(deg3-Y,2))/(2*n)
     mse4 = tf.reduce_sum(tf.pow(deg4-Y,2))/(2*n)
     mse5 = tf.reduce_sum(tf.pow(deg5-Y,2))/(2*n)
    -
    +
    +

    Optimizer

    We use the AdamOptimizer for the polynomial functions and GradientDescentOptimizer for the linear function

    -
    optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
    +
    +
    optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
     optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(mse2)
     optimizer3 = tf.train.AdamOptimizer(learning_rate).minimize(mse3)
     optimizer4 = tf.train.AdamOptimizer(learning_rate).minimize(mse4)
     optimizer5 = tf.train.AdamOptimizer(learning_rate).minimize(mse5)
    -
    +
    +
    -
    init=tf.global_variables_initializer()
    -
    +
    +
    init=tf.global_variables_initializer()
    +
    +

    Model Predictions

    @@ -1526,7 +1656,8 @@ values using the X values. We then plot it to compare the actual data and predic

    Linear Equation

    -
    with tf.Session() as sess:
    +
    +
    with tf.Session() as sess:
         sess.run(init)
         for epoch in range(no_of_epochs):
           for (x,y) in zip(abscissa, ordinate):
    @@ -1540,9 +1671,11 @@ values using the X values. We then plot it to compare the actual data and predic
             constant = sess.run(b)
     
     print(training_cost, coefficient1, constant)
    -
    +
    +
    -
    Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
    +
    +
    Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
     Epoch 2000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
     Epoch 3000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
     Epoch 4000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
    @@ -1568,9 +1701,11 @@ values using the X values. We then plot it to compare the actual data and predic
     Epoch 24000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
     Epoch 25000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
     88999125000.0 180396.42 -478869.12
    -
    +
    +
    -
    predictions = []
    +
    +
    predictions = []
     for x in abscissa:
       predictions.append((coefficient1*x + constant))
     plt.plot(abscissa , ordinate, 'ro', label ='Original data')
    @@ -1578,13 +1713,15 @@ values using the X values. We then plot it to compare the actual data and predic
     plt.title('Linear Regression Result')
     plt.legend()
     plt.show()
    -
    +
    +

    Quadratic Equation

    -
    with tf.Session() as sess:
    +
    +
    with tf.Session() as sess:
         sess.run(init)
         for epoch in range(no_of_epochs):
           for (x,y) in zip(abscissa, ordinate):
    @@ -1599,9 +1736,11 @@ values using the X values. We then plot it to compare the actual data and predic
             constant = sess.run(c)
     
     print(training_cost, coefficient1, coefficient2, constant)
    -
    +
    +
    -
    Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
    +
    +
    Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
     Epoch 2000 : Training Cost: 37798890000.0  a,b,c: 1952.4263 2130.2825 2469.7756
     Epoch 3000 : Training Cost: 26751185000.0  a,b,c: 2839.5825 3081.6118 3554.351
     Epoch 4000 : Training Cost: 19020106000.0  a,b,c: 3644.56 3922.9563 4486.3135
    @@ -1627,9 +1766,11 @@ values using the X values. We then plot it to compare the actual data and predic
     Epoch 24000 : Training Cost: 8088001000.0  a,b,c: 6632.96 3399.878 -79.89219
     Epoch 25000 : Training Cost: 8058094600.0  a,b,c: 6659.793 3227.2517 -463.03156
     8058094600.0 6659.793 3227.2517 -463.03156
    -
    +
    +
    -
    predictions = []
    +
    +
    predictions = []
     for x in abscissa:
       predictions.append((coefficient1*pow(x,2) + coefficient2*x + constant))
     plt.plot(abscissa , ordinate, 'ro', label ='Original data')
    @@ -1637,13 +1778,15 @@ values using the X values. We then plot it to compare the actual data and predic
     plt.title('Quadratic Regression Result')
     plt.legend()
     plt.show()
    -
    +
    +

    Cubic

    -
    with tf.Session() as sess:
    +
    +
    with tf.Session() as sess:
         sess.run(init)
         for epoch in range(no_of_epochs):
           for (x,y) in zip(abscissa, ordinate):
    @@ -1659,9 +1802,11 @@ values using the X values. We then plot it to compare the actual data and predic
             constant = sess.run(d)
     
     print(training_cost, coefficient1, coefficient2, coefficient3, constant)
    -
    +
    +
    -
    Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
    +
    +
    Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
     Epoch 2000 : Training Cost: 3770950400.0  a,b,c,d: 742.6414 666.3489 636.94525 859.2088
     Epoch 3000 : Training Cost: 3717708300.0  a,b,c,d: 756.2582 569.3339 448.105 748.23956
     Epoch 4000 : Training Cost: 3667464000.0  a,b,c,d: 769.4476 474.0318 265.5761 654.75525
    @@ -1687,9 +1832,11 @@ values using the X values. We then plot it to compare the actual data and predic
     Epoch 24000 : Training Cost: 3070361300.0  a,b,c,d: 975.52875 -1095.4292 -2211.854 1847.4485
     Epoch 25000 : Training Cost: 3052791300.0  a,b,c,d: 983.4346 -1159.7922 -2286.9412 2027.4857
     3052791300.0 983.4346 -1159.7922 -2286.9412 2027.4857
    -
    +
    +
    -
    predictions = []
    +
    +
    predictions = []
     for x in abscissa:
       predictions.append((coefficient1*pow(x,3) + coefficient2*pow(x,2) + coefficient3*x + constant))
     plt.plot(abscissa , ordinate, 'ro', label ='Original data')
    @@ -1697,13 +1844,15 @@ values using the X values. We then plot it to compare the actual data and predic
     plt.title('Cubic Regression Result')
     plt.legend()
     plt.show()
    -
    +
    +

    Quartic

    -
    with tf.Session() as sess:
    +
    +
    with tf.Session() as sess:
         sess.run(init)
         for epoch in range(no_of_epochs):
           for (x,y) in zip(abscissa, ordinate):
    @@ -1720,9 +1869,11 @@ values using the X values. We then plot it to compare the actual data and predic
             constant = sess.run(e)
     
     print(training_cost, coefficient1, coefficient2, coefficient3, coefficient4, constant)
    -
    +
    +
    -
    Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
    +
    +
    Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
     Epoch 2000 : Training Cost: 1854316200.0  a,b,c,d: 88.998955 13.073557 14.276088 223.55667 1056.4655
     Epoch 3000 : Training Cost: 1812812400.0  a,b,c,d: 92.9462 -22.331177 -15.262934 327.41858 1634.9054
     Epoch 4000 : Training Cost: 1775716000.0  a,b,c,d: 96.42522 -54.64535 -35.829437 449.5028 2239.1392
    @@ -1748,9 +1899,11 @@ values using the X values. We then plot it to compare the actual data and predic
     Epoch 24000 : Training Cost: 1252052600.0  a,b,c,d: 135.9583 -493.38254 90.268616 3764.0078 15010.481
     Epoch 25000 : Training Cost: 1231713700.0  a,b,c,d: 137.54753 -512.1876 101.59372 3926.4897 15609.368
     1231713700.0 137.54753 -512.1876 101.59372 3926.4897 15609.368
    -
    +
    +
    -
    predictions = []
    +
    +
    predictions = []
     for x in abscissa:
       predictions.append((coefficient1*pow(x,4) + coefficient2*pow(x,3) + coefficient3*pow(x,2) + coefficient4*x + constant))
     plt.plot(abscissa , ordinate, 'ro', label ='Original data')
    @@ -1758,13 +1911,15 @@ values using the X values. We then plot it to compare the actual data and predic
     plt.title('Quartic Regression Result')
     plt.legend()
     plt.show()
    -
    +
    +

    Quintic

    -
    with tf.Session() as sess:
    +
    +
    with tf.Session() as sess:
         sess.run(init)
         for epoch in range(no_of_epochs):
           for (x,y) in zip(abscissa, ordinate):
    @@ -1780,9 +1935,11 @@ values using the X values. We then plot it to compare the actual data and predic
             coefficient4 = sess.run(d)
             coefficient5 = sess.run(e)
             constant = sess.run(f)
    -
    +
    +
    -
    Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
    +
    +
    Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
     Epoch 2000 : Training Cost: 1306882400.0  a,b,c,d,e,f: 8.732181 -4.0085897 73.25298 315.90103 904.08887 2004.9749
     Epoch 3000 : Training Cost: 1212606000.0  a,b,c,d,e,f: 9.732249 -16.90125 86.28379 437.06552 1305.055 2966.2188
     Epoch 4000 : Training Cost: 1123640400.0  a,b,c,d,e,f: 10.74851 -29.82692 98.59997 555.331 1698.4631 3917.9155
    @@ -1808,9 +1965,11 @@ values using the X values. We then plot it to compare the actual data and predic
     Epoch 24000 : Training Cost: 229660080.0  a,b,c,d,e,f: 27.102589 -238.44817 309.35342 2420.4185 7770.5728 19536.19
     Epoch 25000 : Training Cost: 216972400.0  a,b,c,d,e,f: 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
     216972400.0 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
    -
    +
    +
    -
    predictions = []
    +
    +
    predictions = []
     for x in abscissa:
       predictions.append((coefficient1*pow(x,5) + coefficient2*pow(x,4) + coefficient3*pow(x,3) + coefficient4*pow(x,2) + coefficient5*x + constant))
     plt.plot(abscissa , ordinate, 'ro', label ='Original data')
    @@ -1818,7 +1977,8 @@ values using the X values. We then plot it to compare the actual data and predic
     plt.title('Quintic Regression Result')
     plt.legend()
     plt.show()
    -
    +
    +

    @@ -2682,7 +2842,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a

    Screenshot of Sample Dataset

    -
    [
    +
    +
    [
         {
             "tokens": ["Tell","me","about","the","drug","Aspirin","."],
             "labels": ["NONE","NONE","NONE","NONE","NONE","COMPOUND","NONE"]
    @@ -2696,7 +2857,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a
             "labels": ["NONE","NONE","NONE","NONE","COMPOUND","NONE","NONE"]
         }
     ]
    -
    +
    +

    Screenshot of Create ML Text Classifier

    @@ -2706,7 +2868,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a

    Screenshot

    -
    import CoreML
    +
    +
    import CoreML
     import NaturalLanguage
     
     let mlModelClassifier = try IntentDetection_1(configuration:  MLModelConfiguration()).model
    @@ -2717,7 +2880,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a
     
     let tagger = NLTagger(tagSchemes: [.nameType, NLTagScheme("Apple")])
     tagger.setModels([tagPredictor], forTagScheme: NLTagScheme("Apple"))
    -
    +
    +

    Now, we define a simple structure which the custom function(s) can use to access the provided input. It can also be used to hold additional variables. @@ -2727,7 +2891,8 @@ The latter can be replaced with a function which asks the user for the input. Screenshot

    -
    struct User {
    +
    +
    struct User {
         static var message = ""
     }
     
    @@ -2751,14 +2916,16 @@ The latter can be replaced with a function which asks the user for the input. }
     
     }
    -
    +
    +

    Sometimes, no action needs to be performed, and the bot can use a predefined set of responses. Otherwise, if an action is required, it can call the custom action.

    Screenshot

    -
    let defaultResponses = [
    +
    +
    let defaultResponses = [
         "greetings": "Hello",
         "banter": "no, plix no"
     ]
    @@ -2766,14 +2933,16 @@ Otherwise, if an action is required, it can call the custom action.

    let customActions = [ "deez-drug": customAction ] -
    +
    +

    In the sample input, the program is updating the User.message and checking if it has a default response. Otherwise, it calls the custom action.

    Screenshot

    -
    let sampleMessages = [
    +
    +
    let sampleMessages = [
         "Hey there, how is it going",
         "hello, there",
         "Who let the dogs out",
    @@ -2793,7 +2962,8 @@ Otherwise, it calls the custom action.

    print(customActions[prediction!]!()) } } -
    +
    +

    Output

    @@ -2825,39 +2995,53 @@ Otherwise, it calls the custom action.

    First we import the following if we have not imported these before

    -
    import cv2
    +
    +
    import cv2
     import os
    -
    +
    +

    Then we read the file using OpenCV.

    -
    image=cv2.imread(imagePath)
    -
    +
    +
    image=cv2.imread(imagePath)
    +
    +

    The cv2. imread() function returns a NumPy array representing the image. Therefore, we need to convert it before we can use it.

    -
    image_from_array = Image.fromarray(image, 'RGB')
    -
    +
    +
    image_from_array = Image.fromarray(image, 'RGB')
    +
    +

    Then we resize the image

    -
    size_image = image_from_array.resize((50,50))
    -
    +
    +
    size_image = image_from_array.resize((50,50))
    +
    +

    After this we create a batch consisting of only one image

    -
    p = np.expand_dims(size_image, 0)
    -
    +
    +
    p = np.expand_dims(size_image, 0)
    +
    +

    We then convert this uint8 datatype to a float32 datatype

    -
    img = tf.cast(p, tf.float32)
    -
    +
    +
    img = tf.cast(p, tf.float32)
    +
    +

    Finally we make the prediction

    -
    print(['Infected','Uninfected'][np.argmax(model.predict(img))])
    -
    +
    +
    print(['Infected','Uninfected'][np.argmax(model.predict(img))])
    +
    +

    Infected

    ]]> @@ -2889,7 +3073,8 @@ Otherwise, it calls the custom action.

    Just copy and paste this in a Colab cell and it will install it 👍

    -
    import sys
    +
    +
    import sys
     import os
     import requests
     import subprocess
    @@ -2912,7 +3097,8 @@ Otherwise, it calls the custom action.

    force=False): """install rdkit from miniconda -
    +
    +
    import rdkit_installer
     rdkit_installer.install()
    @@ -2999,13 +3185,17 @@ logger.info("rdkit-{} installation finished!".format(rdkit.__version__))
     
     

    Dependencies

    -
    sudo apt update && sudo apt install certbot -y
    -
    +
    +
    sudo apt update && sudo apt install certbot -y
    +
    +

    Get the Certificate

    -
    sudo certbot certonly --manual --preferred-challenges dns-01 --email senpai@email.com -d mydomain.duckdns.org
    -
    +
    +
    sudo certbot certonly --manual --preferred-challenges dns-01 --email senpai@email.com -d mydomain.duckdns.org
    +
    +

    After you accept that you are okay with you IP address being logged, it will prompt you with updating your dns record. You need to create a new TXT record in the DNS settings for your domain.

    @@ -3018,7 +3208,8 @@ logger.info("rdkit-{} installation finished!".format(rdkit.__version__))

    You can check if the TXT records have been updated by using the dig command:

    -
    dig navanspi.duckdns.org TXT
    +
    +
    dig navanspi.duckdns.org TXT
     ; <<>> DiG 9.16.1-Ubuntu <<>> navanspi.duckdns.org TXT
     ;; global options: +cmd
     ;; Got answer:
    @@ -3037,7 +3228,8 @@ navanspi.duckdns.org.    60    IN    TXT    ;; SERVER: 127.0.0.53#53(127.0.0.53)
     ;; WHEN: Tue Nov 17 15:23:15 IST 2020
     ;; MSG SIZE  rcvd: 105
    -
    +
    +

    DuckDNS almost instantly propagates the changes but for other domain hosts, it could take a while.

    @@ -3051,13 +3243,17 @@ navanspi.duckdns.org. 60 IN TXT
    gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589
    -
    +
    +
    gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589
    +
    +

    To use the certificate with it, simply copy the cert.pem and privkey.pem to your working directory ( change the appropriate permissions ) and include them in the command

    -
    gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589 --certfile=cert.pem --keyfile=privkey.pem
    -
    +
    +
    gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589 --certfile=cert.pem --keyfile=privkey.pem
    +
    +

    Caveats with copying the certificate: If you renew the certificate you will have to re-copy the files

    ]]> @@ -3094,48 +3290,63 @@ Whenever you are looking for a dataset, always try searching on Kaggle and GitHu This allows you to train the model on the GPU. Turicreate is built on top of Apache's MXNet Framework, for us to use GPU we need to install a CUDA compatible MXNet package.

    -
    !pip install turicreate
    +
    +
    !pip install turicreate
     !pip uninstall -y mxnet
     !pip install mxnet-cu100==1.4.0.post0
    -
    +
    +

    If you do not wish to train on GPU or are running it on your computer, you can ignore the last two lines

    Downloading the Dataset

    -
    !wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip"
    +
    +
    !wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip"
     !unzip fake_or_real_news.csv.zip
    -
    +
    +

    Model Creation

    -
    import turicreate as tc
    +
    +
    import turicreate as tc
     tc.config.set_num_gpus(-1) # If you do not wish to use GPUs, set it to 0
    -
    +
    +
    -
    dataSFrame = tc.SFrame('fake_or_real_news.csv')
    -
    +
    +
    dataSFrame = tc.SFrame('fake_or_real_news.csv')
    +
    +

    The dataset contains a column named "X1", which is of no use to us. Therefore, we simply drop it

    -
    dataSFrame.remove_column('X1')
    -
    +
    +
    dataSFrame.remove_column('X1')
    +
    +

    Splitting Dataset

    -
    train, test = dataSFrame.random_split(.9)
    -
    +
    +
    train, test = dataSFrame.random_split(.9)
    +
    +

    Training

    -
    model = tc.text_classifier.create(
    +
    +
    model = tc.text_classifier.create(
         dataset=train,
         target='label',
         features=['title','text']
     )
    -
    +
    +
    -
    +-----------+----------+-----------+--------------+-------------------+---------------------+
    +
    +
    +-----------+----------+-----------+--------------+-------------------+---------------------+
     | Iteration | Passes   | Step size | Elapsed Time | Training Accuracy | Validation Accuracy |
     +-----------+----------+-----------+--------------+-------------------+---------------------+
     | 0         | 2        | 1.000000  | 1.156349     | 0.889680          | 0.790036            |
    @@ -3145,39 +3356,50 @@ a CUDA compatible MXNet package.

    | 4 | 8 | 1.000000 | 1.814194 | 0.999063 | 0.925267 | | 9 | 14 | 1.000000 | 2.507072 | 1.000000 | 0.911032 | +-----------+----------+-----------+--------------+-------------------+---------------------+ -
    +
    +

    Testing the Model

    -
    est_predictions = model.predict(test)
    +
    +
    est_predictions = model.predict(test)
     accuracy = tc.evaluation.accuracy(test['label'], test_predictions)
     print(f'Topic classifier model has a testing accuracy of {accuracy*100}% ', flush=True)
    -
    +
    +
    -
    Topic classifier model has a testing accuracy of 92.3076923076923%
    -
    +
    +
    Topic classifier model has a testing accuracy of 92.3076923076923%
    +
    +

    We have just created our own Fake News Detection Model which has an accuracy of 92%!

    -
    example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]}
    +
    +
    example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]}
     example_prediction = model.classify(tc.SFrame(example_text))
     print(example_prediction, flush=True)
    -
    +
    +
    -
    +-------+--------------------+
    +
    +
    +-------+--------------------+
     | class |    probability     |
     +-------+--------------------+
     |  FAKE | 0.9245648658345308 |
     +-------+--------------------+
     [1 rows x 2 columns]
    -
    +
    +

    Exporting the Model

    -
    model_name = 'FakeNews'
    +
    +
    model_name = 'FakeNews'
     coreml_model_name = model_name + '.mlmodel'
     exportedModel = model.export_coreml(coreml_model_name)
    -
    +
    +

    Note: To download files from Google Colab, simply click on the files section in the sidebar, right click on filename and then click on download

    @@ -3196,7 +3418,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i

    We define our bag of words function

    -
    func bow(text: String) -> [String: Double] {
    +
    +
    func bow(text: String) -> [String: Double] {
             var bagOfWords = [String: Double]()
     
             let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0)
    @@ -3215,22 +3438,26 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i
     
             return bagOfWords
         }
    -
    +
    +

    We also declare our variables

    -
    @State private var title: String = ""
    +
    +
    @State private var title: String = ""
     @State private var headline: String = ""
     @State private var alertTitle = ""
     @State private var alertText = ""
     @State private var showingAlert = false
    -
    +
    +

    Finally, we implement a simple function which reads the two text fields, creates their bag of words representation and displays an alert with the appropriate result

    Complete Code

    -
    import SwiftUI
    +
    +
    import SwiftUI
     
     struct ContentView: View {
         @State private var title: String = ""
    @@ -3305,7 +3532,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i
             ContentView()
         }
     }
    -
    +
    +
    ]]> @@ -3325,7 +3553,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i

    If you want to directly open the HTML file in your browser after saving, don't forget to set CORS_PROXY=""

    -
    <!doctype html>
    +
    +
    <!doctype html>
     <html lang="en">
     <head>
       <meta charset="utf-8">
    @@ -3520,7 +3749,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i
     </script>
     <noscript>Uh Oh! Your browser does not support JavaScript or JavaScript is currently disabled. Please enable JavaScript or switch to a different browser.</noscript>
     </body></html>
    -
    +
    +
    ]]> @@ -3574,7 +3804,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i Tue, 14 Jan 2020 00:10:00 -0000 Converting between image and NumPy array -
    import numpy
    +
    +
    import numpy
     import PIL
     
     # Convert PIL Image to NumPy array
    @@ -3583,16 +3814,19 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i
     
     # Convert array to Image
     img = PIL.Image.fromarray(arr)
    -
    +
    +

    Saving an Image

    -
    try:
    +
    +
    try:
         img.save(destination, "JPEG", quality=80, optimize=True, progressive=True)
     except IOError:
         PIL.ImageFile.MAXBLOCK = img.size[0] * img.size[1]
         img.save(destination, "JPEG", quality=80, optimize=True, progressive=True)
    -
    +
    +
    ]]>
    @@ -3614,22 +3848,28 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i

    Creating the archive:

    -
    zip -r -s 5 oodlesofnoodles.zip website/
    -
    +
    +
    zip -r -s 5 oodlesofnoodles.zip website/
    +
    +

    5 stands for each split files' size (in mb, kb and gb can also be specified)

    For encrypting the zip:

    -
    zip -er -s 5 oodlesofnoodles.zip website
    -
    +
    +
    zip -er -s 5 oodlesofnoodles.zip website
    +
    +

    Extracting Files

    First we need to collect all parts, then

    -
    zip -F oodlesofnoodles.zip --out merged.zip
    -
    +
    +
    zip -F oodlesofnoodles.zip --out merged.zip
    +
    +
    ]]> @@ -3750,31 +3990,40 @@ return path(str, boost::filesystem::native);

    Mounting Google Drive

    -
    import os
    +
    +
    import os
     from google.colab import drive
     drive.mount('/content/drive')
    -
    +
    +

    Downloading Dataset from Kaggle

    -
    os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
    +
    +
    os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
     !kaggle datasets download ashutosh69/fire-and-smoke-dataset
     !unzip "fire-and-smoke-dataset.zip"
    -
    +
    +

    Pre-Processing

    -
    !mkdir default smoke fire
    -
    +
    +
    !mkdir default smoke fire
    +
    +

    \

    -
    !ls data/data/img_data/train/default/*.jpg
    -
    +
    +
    !ls data/data/img_data/train/default/*.jpg
    +
    +

    \

    -
    img_1002.jpg   img_20.jpg     img_519.jpg     img_604.jpg       img_80.jpg
    +
    +
    img_1002.jpg   img_20.jpg     img_519.jpg     img_604.jpg       img_80.jpg
     img_1003.jpg   img_21.jpg     img_51.jpg     img_60.jpg       img_8.jpg
     img_1007.jpg   img_22.jpg     img_520.jpg     img_61.jpg       img_900.jpg
     img_100.jpg    img_23.jpg     img_521.jpg    'img_62 (2).jpg'   img_920.jpg
    @@ -3807,11 +4056,13 @@ return path(str, boost::filesystem::native);
     img_204.jpg    img_501.jpg    img_601.jpg     img_78.jpg
     img_205.jpg    img_502.jpg    img_602.jpg     img_79.jpg
     img_206.jpg    img_50.jpg     img_603.jpg     img_7.jpg
    -
    +
    +

    The image files are not actually JPEG, thus we first need to save them in the correct format for Turicreate

    -
    from PIL import Image
    +
    +
    from PIL import Image
     import glob
     
     
    @@ -3828,26 +4079,32 @@ return path(str, boost::filesystem::native);
         rgb_im = im.convert('RGB')
         rgb_im.save((folder + "/" + str(n) + ".jpg"), quality=100)
         n +=1
    -
    +
    +

    \

    -
    !mkdir train
    +
    +
    !mkdir train
     !mv default ./train
     !mv smoke ./train
     !mv fire ./train
    -
    +
    +

    Making the Image Classifier

    Making an SFrame

    -
    !pip install turicreate
    -
    +
    +
    !pip install turicreate
    +
    +

    \

    -
    import turicreate as tc
    +
    +
    import turicreate as tc
     import os
     
     data = tc.image_analysis.load_images("./train", with_path=True)
    @@ -3857,11 +4114,13 @@ return path(str, boost::filesystem::native);
     print(data)
     
     data.save('fire-smoke.sframe')
    -
    +
    +

    \

    -
    +-------------------------+------------------------+
    +
    +
    +-------------------------+------------------------+
     |           path          |         image          |
     +-------------------------+------------------------+
     |  ./train/default/1.jpg  | Height: 224 Width: 224 |
    @@ -3895,11 +4154,13 @@ return path(str, boost::filesystem::native);
     [2028 rows x 3 columns]
     Note: Only the head of the SFrame is printed.
     You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
    -
    +
    +

    Making the Model

    -
    import turicreate as tc
    +
    +
    import turicreate as tc
     
     # Load the data
     data =  tc.SFrame('fire-smoke.sframe')
    @@ -3922,11 +4183,13 @@ return path(str, boost::filesystem::native);
     
     # Export for use in Core ML
     model.export_coreml('fire-smoke.mlmodel')
    -
    +
    +

    \

    -
    Performing feature extraction on resized images...
    +
    +
    Performing feature extraction on resized images...
     Completed   64/1633
     Completed  128/1633
     Completed  192/1633
    @@ -3984,7 +4247,8 @@ return path(str, boost::filesystem::native);
     Completed 384/395
     Completed 395/395
     0.9316455696202531
    -
    +
    +

    We just got an accuracy of 94% on Training Data and 97% on Validation Data!

    ]]> @@ -4004,7 +4268,7 @@ return path(str, boost::filesystem::native); Sun, 11 Oct 2020 16:12:00 -0000 Trying Different Camera Setups -
      +
      1. Animated Overlays
      2. Using a modern camera as your webcam
      3. Using your phone's camera as your webcam
      4. diff --git a/docs/index.html b/docs/index.html index f6285ba..8a9bb3d 100644 --- a/docs/index.html +++ b/docs/index.html @@ -47,6 +47,21 @@
          +
        • A new method to blog
        • +
            +
          • Writing posts in markdown using pen and paper
          • +
          • Published On: 2022-11-07 23:29
          • +
          • Tags: + + Python, + + OCR, + + Microsoft Azure + +
          + +
        • Why You No Host?
          • Why you should self-host with YunoHost
          • diff --git a/docs/posts/2019-12-08-Image-Classifier-Tensorflow.html b/docs/posts/2019-12-08-Image-Classifier-Tensorflow.html index ac305ac..9ecfff0 100644 --- a/docs/posts/2019-12-08-Image-Classifier-Tensorflow.html +++ b/docs/posts/2019-12-08-Image-Classifier-Tensorflow.html @@ -47,7 +47,8 @@

            Imports

            -
            %tensorflow_version 2.x #This is for telling Colab that you want to use TF 2.0, ignore if running on local machine
            +
            +
            %tensorflow_version 2.x #This is for telling Colab that you want to use TF 2.0, ignore if running on local machine
             
             from PIL import Image # We use the PIL Library to resize images
             import numpy as np
            @@ -59,21 +60,25 @@
             import matplotlib.pyplot as plt
             from keras.models import Sequential
             from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
            -
            +
            +

            Dataset

            Fetching the Data

            -
            !wget ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
            +
            +
            !wget ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
             !unzip cell_images.zip
            -
            +
            +

            Processing the Data

            We resize all the images as 50x50 and add the numpy array of that image as well as their label names (Infected or Not) to common arrays.

            -
            data = []
            +
            +
            data = []
             labels = []
             
             Parasitized = os.listdir("./cell_images/Parasitized/")
            @@ -97,15 +102,18 @@
                     labels.append(1)
                 except AttributeError:
                     print("")
            -
            +
            +

            Splitting Data

            -
            df = np.array(data)
            +
            +
            df = np.array(data)
             labels = np.array(labels)
             (X_train, X_test) = df[(int)(0.1*len(df)):],df[:(int)(0.1*len(df))]
             (y_train, y_test) = labels[(int)(0.1*len(labels)):],labels[:(int)(0.1*len(labels))]
            -
            +
            +
            s=np.arange(X_train.shape[0])
             np.random.shuffle(s)
            @@ -122,7 +130,8 @@ X_train = X_train/255.0
             
             

            Note: The input shape for the first layer is 50,50 which corresponds with the sizes of the resized images

            -
            model = models.Sequential()
            +
            +
            model = models.Sequential()
             model.add(layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(50,50,3)))
             model.add(layers.MaxPooling2D(pool_size=2))
             model.add(layers.Conv2D(filters=32,kernel_size=2,padding='same',activation='relu'))
            @@ -135,25 +144,31 @@ X_train = X_train/255.0
             model.add(layers.Dropout(0.2))
             model.add(layers.Dense(2,activation="softmax"))#2 represent output layer neurons 
             model.summary()
            -
            +
            +

            Compiling Model

            We use the Adam optimiser as it is an adaptive learning rate optimisation algorithm that's been designed specifically for training deep neural networks, which means it changes its learning rate automatically to get the best results

            -
            model.compile(optimizer="adam",
            +
            +
            model.compile(optimizer="adam",
                           loss="sparse_categorical_crossentropy", 
                          metrics=["accuracy"])
            -
            +
            +

            Training Model

            We train the model for 10 epochs on the training data and then validate it using the testing data

            -
            history = model.fit(X_train,y_train, epochs=10, validation_data=(X_test,y_test))
            -
            +
            +
            history = model.fit(X_train,y_train, epochs=10, validation_data=(X_test,y_test))
            +
            +
            -
            Train on 24803 samples, validate on 2755 samples
            +
            +
            Train on 24803 samples, validate on 2755 samples
             Epoch 1/10
             24803/24803 [==============================] - 57s 2ms/sample - loss: 0.0786 - accuracy: 0.9729 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
             Epoch 2/10
            @@ -174,11 +189,13 @@ X_train = X_train/255.0
             24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0352 - accuracy: 0.9878 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
             Epoch 10/10
             24803/24803 [==============================] - 58s 2ms/sample - loss: 0.0373 - accuracy: 0.9865 - val_loss: 0.0000e+00 - val_accuracy: 1.0000
            -
            +
            +

            Results

            -
            accuracy = history.history['accuracy'][-1]*100
            +
            +
            accuracy = history.history['accuracy'][-1]*100
             loss = history.history['loss'][-1]*100
             val_accuracy = history.history['val_accuracy'][-1]*100
             val_loss = history.history['val_loss'][-1]*100
            @@ -189,13 +206,16 @@ X_train = X_train/255.0
                 '\nValidation Accuracy:', val_accuracy,
                 '\nValidation Loss:', val_loss
             )
            -
            +
            +
            -
            Accuracy: 98.64532351493835 
            +
            +
            Accuracy: 98.64532351493835 
             Loss: 3.732407123270176 
             Validation Accuracy: 100.0 
             Validation Loss: 0.0
            -
            +
            +

            We have achieved 98% Accuracy!

            diff --git a/docs/posts/2019-12-08-Splitting-Zips.html b/docs/posts/2019-12-08-Splitting-Zips.html index ed9ecff..8464ca1 100644 --- a/docs/posts/2019-12-08-Splitting-Zips.html +++ b/docs/posts/2019-12-08-Splitting-Zips.html @@ -47,22 +47,28 @@

            Creating the archive:

            -
            zip -r -s 5 oodlesofnoodles.zip website/
            -
            +
            +
            zip -r -s 5 oodlesofnoodles.zip website/
            +
            +

            5 stands for each split files' size (in mb, kb and gb can also be specified)

            For encrypting the zip:

            -
            zip -er -s 5 oodlesofnoodles.zip website
            -
            +
            +
            zip -er -s 5 oodlesofnoodles.zip website
            +
            +

            Extracting Files

            First we need to collect all parts, then

            -
            zip -F oodlesofnoodles.zip --out merged.zip
            -
            +
            +
            zip -F oodlesofnoodles.zip --out merged.zip
            +
            +
            diff --git a/docs/posts/2019-12-10-TensorFlow-Model-Prediction.html b/docs/posts/2019-12-10-TensorFlow-Model-Prediction.html index 7187fe8..97ad373 100644 --- a/docs/posts/2019-12-10-TensorFlow-Model-Prediction.html +++ b/docs/posts/2019-12-10-TensorFlow-Model-Prediction.html @@ -51,39 +51,53 @@

            First we import the following if we have not imported these before

            -
            import cv2
            +
            +
            import cv2
             import os
            -
            +
            +

            Then we read the file using OpenCV.

            -
            image=cv2.imread(imagePath)
            -
            +
            +
            image=cv2.imread(imagePath)
            +
            +

            The cv2. imread() function returns a NumPy array representing the image. Therefore, we need to convert it before we can use it.

            -
            image_from_array = Image.fromarray(image, 'RGB')
            -
            +
            +
            image_from_array = Image.fromarray(image, 'RGB')
            +
            +

            Then we resize the image

            -
            size_image = image_from_array.resize((50,50))
            -
            +
            +
            size_image = image_from_array.resize((50,50))
            +
            +

            After this we create a batch consisting of only one image

            -
            p = np.expand_dims(size_image, 0)
            -
            +
            +
            p = np.expand_dims(size_image, 0)
            +
            +

            We then convert this uint8 datatype to a float32 datatype

            -
            img = tf.cast(p, tf.float32)
            -
            +
            +
            img = tf.cast(p, tf.float32)
            +
            +

            Finally we make the prediction

            -
            print(['Infected','Uninfected'][np.argmax(model.predict(img))])
            -
            +
            +
            print(['Infected','Uninfected'][np.argmax(model.predict(img))])
            +
            +

            Infected

            diff --git a/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html b/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html index 7bfe8d4..f0dad82 100644 --- a/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html +++ b/docs/posts/2019-12-16-TensorFlow-Polynomial-Regression.html @@ -69,12 +69,14 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d

            Imports

            -
            import tensorflow.compat.v1 as tf
            +
            +
            import tensorflow.compat.v1 as tf
             tf.disable_v2_behavior()
             import matplotlib.pyplot as plt
             import numpy as np
             import pandas as pd
            -
            +
            +

            Dataset

            @@ -86,30 +88,41 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d

            linspace(lower_limit, upper_limit, no_of_observations)

            -
            x = np.linspace(0, 50, 50)
            +
            +
            x = np.linspace(0, 50, 50)
             y = np.linspace(0, 50, 50)
            -
            +
            +

            We use the following function to add noise to the data, so that our values

            -
            x += np.random.uniform(-4, 4, 50)
            +
            +
            x += np.random.uniform(-4, 4, 50)
             y += np.random.uniform(-4, 4, 50)
            -
            +
            +

            Position vs Salary Dataset

            We will be using https://drive.google.com/file/d/1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9/view (Salary vs Position Dataset)

            -
            !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
            -
            +
            +
            !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1tNL4jxZEfpaP4oflfSn6pIHJX7Pachm9' -O data.csv
            +
            +
            -
            df = pd.read_csv("data.csv")
            -
            +
            +
            df = pd.read_csv("data.csv")
            +
            +
            -
            df # this gives us a preview of the dataset we are working with
            -
            +
            +
            df # this gives us a preview of the dataset we are working with
            +
            +
            -
            | Position          | Level | Salary  |
            +
            +
            | Position          | Level | Salary  |
             |-------------------|-------|---------|
             | Business Analyst  | 1     | 45000   |
             | Junior Consultant | 2     | 50000   |
            @@ -121,81 +134,100 @@ Polynomial regression even fits a non-linear relationship (e.g when the points d
             | Senior Partner    | 8     | 300000  |
             | C-level           | 9     | 500000  |
             | CEO               | 10    | 1000000 |
            -
            +
            +

            We convert the salary column as the ordinate (y-coordinate) and level column as the abscissa

            -
            abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
            +
            +
            abscissa = df["Level"].to_list() # abscissa = [1,2,3,4,5,6,7,8,9,10]
             ordinate = df["Salary"].to_list() # ordinate = [45000,50000,60000,80000,110000,150000,200000,300000,500000,1000000]
            -
            +
            +
            -
            n = len(abscissa) # no of observations
            +
            +
            n = len(abscissa) # no of observations
             plt.scatter(abscissa, ordinate)
             plt.ylabel('Salary')
             plt.xlabel('Position')
             plt.title("Salary vs Position")
             plt.show()
            -
            +
            +

            Defining Stuff

            -
            X = tf.placeholder("float")
            +
            +
            X = tf.placeholder("float")
             Y = tf.placeholder("float")
            -
            +
            +

            Defining Variables

            We first define all the coefficients and constant as tensorflow variables having a random initial value

            -
            a = tf.Variable(np.random.randn(), name = "a")
            +
            +
            a = tf.Variable(np.random.randn(), name = "a")
             b = tf.Variable(np.random.randn(), name = "b")
             c = tf.Variable(np.random.randn(), name = "c")
             d = tf.Variable(np.random.randn(), name = "d")
             e = tf.Variable(np.random.randn(), name = "e")
             f = tf.Variable(np.random.randn(), name = "f")
            -
            +
            +

            Model Configuration

            -
            learning_rate = 0.2
            +
            +
            learning_rate = 0.2
             no_of_epochs = 25000
            -
            +
            +

            Equations

            -
            deg1 = a*X + b
            +
            +
            deg1 = a*X + b
             deg2 = a*tf.pow(X,2) + b*X + c
             deg3 = a*tf.pow(X,3) + b*tf.pow(X,2) + c*X + d
             deg4 = a*tf.pow(X,4) + b*tf.pow(X,3) + c*tf.pow(X,2) + d*X + e
             deg5 = a*tf.pow(X,5) + b*tf.pow(X,4) + c*tf.pow(X,3) + d*tf.pow(X,2) + e*X + f
            -
            +
            +

            Cost Function

            We use the Mean Squared Error Function

            -
            mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
            +
            +
            mse1 = tf.reduce_sum(tf.pow(deg1-Y,2))/(2*n)
             mse2 = tf.reduce_sum(tf.pow(deg2-Y,2))/(2*n)
             mse3 = tf.reduce_sum(tf.pow(deg3-Y,2))/(2*n)
             mse4 = tf.reduce_sum(tf.pow(deg4-Y,2))/(2*n)
             mse5 = tf.reduce_sum(tf.pow(deg5-Y,2))/(2*n)
            -
            +
            +

            Optimizer

            We use the AdamOptimizer for the polynomial functions and GradientDescentOptimizer for the linear function

            -
            optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
            +
            +
            optimizer1 = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse1)
             optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(mse2)
             optimizer3 = tf.train.AdamOptimizer(learning_rate).minimize(mse3)
             optimizer4 = tf.train.AdamOptimizer(learning_rate).minimize(mse4)
             optimizer5 = tf.train.AdamOptimizer(learning_rate).minimize(mse5)
            -
            +
            +
            -
            init=tf.global_variables_initializer()
            -
            +
            +
            init=tf.global_variables_initializer()
            +
            +

            Model Predictions

            @@ -204,7 +236,8 @@ values using the X values. We then plot it to compare the actual data and predic

            Linear Equation

            -
            with tf.Session() as sess:
            +
            +
            with tf.Session() as sess:
                 sess.run(init)
                 for epoch in range(no_of_epochs):
                   for (x,y) in zip(abscissa, ordinate):
            @@ -218,9 +251,11 @@ values using the X values. We then plot it to compare the actual data and predic
                     constant = sess.run(b)
             
             print(training_cost, coefficient1, constant)
            -
            +
            +
            -
            Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
            +
            +
            Epoch 1000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
             Epoch 2000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
             Epoch 3000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
             Epoch 4000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
            @@ -246,9 +281,11 @@ values using the X values. We then plot it to compare the actual data and predic
             Epoch 24000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
             Epoch 25000 : Training Cost: 88999125000.0  a,b: 180396.42 -478869.12
             88999125000.0 180396.42 -478869.12
            -
            +
            +
            -
            predictions = []
            +
            +
            predictions = []
             for x in abscissa:
               predictions.append((coefficient1*x + constant))
             plt.plot(abscissa , ordinate, 'ro', label ='Original data')
            @@ -256,13 +293,15 @@ values using the X values. We then plot it to compare the actual data and predic
             plt.title('Linear Regression Result')
             plt.legend()
             plt.show()
            -
            +
            +

            Quadratic Equation

            -
            with tf.Session() as sess:
            +
            +
            with tf.Session() as sess:
                 sess.run(init)
                 for epoch in range(no_of_epochs):
                   for (x,y) in zip(abscissa, ordinate):
            @@ -277,9 +316,11 @@ values using the X values. We then plot it to compare the actual data and predic
                     constant = sess.run(c)
             
             print(training_cost, coefficient1, coefficient2, constant)
            -
            +
            +
            -
            Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
            +
            +
            Epoch 1000 : Training Cost: 52571360000.0  a,b,c: 1002.4456 1097.0197 1276.6921
             Epoch 2000 : Training Cost: 37798890000.0  a,b,c: 1952.4263 2130.2825 2469.7756
             Epoch 3000 : Training Cost: 26751185000.0  a,b,c: 2839.5825 3081.6118 3554.351
             Epoch 4000 : Training Cost: 19020106000.0  a,b,c: 3644.56 3922.9563 4486.3135
            @@ -305,9 +346,11 @@ values using the X values. We then plot it to compare the actual data and predic
             Epoch 24000 : Training Cost: 8088001000.0  a,b,c: 6632.96 3399.878 -79.89219
             Epoch 25000 : Training Cost: 8058094600.0  a,b,c: 6659.793 3227.2517 -463.03156
             8058094600.0 6659.793 3227.2517 -463.03156
            -
            +
            +
            -
            predictions = []
            +
            +
            predictions = []
             for x in abscissa:
               predictions.append((coefficient1*pow(x,2) + coefficient2*x + constant))
             plt.plot(abscissa , ordinate, 'ro', label ='Original data')
            @@ -315,13 +358,15 @@ values using the X values. We then plot it to compare the actual data and predic
             plt.title('Quadratic Regression Result')
             plt.legend()
             plt.show()
            -
            +
            +

            Cubic

            -
            with tf.Session() as sess:
            +
            +
            with tf.Session() as sess:
                 sess.run(init)
                 for epoch in range(no_of_epochs):
                   for (x,y) in zip(abscissa, ordinate):
            @@ -337,9 +382,11 @@ values using the X values. We then plot it to compare the actual data and predic
                     constant = sess.run(d)
             
             print(training_cost, coefficient1, coefficient2, coefficient3, constant)
            -
            +
            +
            -
            Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
            +
            +
            Epoch 1000 : Training Cost: 4279814000.0  a,b,c,d: 670.1527 694.4212 751.4653 903.9527
             Epoch 2000 : Training Cost: 3770950400.0  a,b,c,d: 742.6414 666.3489 636.94525 859.2088
             Epoch 3000 : Training Cost: 3717708300.0  a,b,c,d: 756.2582 569.3339 448.105 748.23956
             Epoch 4000 : Training Cost: 3667464000.0  a,b,c,d: 769.4476 474.0318 265.5761 654.75525
            @@ -365,9 +412,11 @@ values using the X values. We then plot it to compare the actual data and predic
             Epoch 24000 : Training Cost: 3070361300.0  a,b,c,d: 975.52875 -1095.4292 -2211.854 1847.4485
             Epoch 25000 : Training Cost: 3052791300.0  a,b,c,d: 983.4346 -1159.7922 -2286.9412 2027.4857
             3052791300.0 983.4346 -1159.7922 -2286.9412 2027.4857
            -
            +
            +
            -
            predictions = []
            +
            +
            predictions = []
             for x in abscissa:
               predictions.append((coefficient1*pow(x,3) + coefficient2*pow(x,2) + coefficient3*x + constant))
             plt.plot(abscissa , ordinate, 'ro', label ='Original data')
            @@ -375,13 +424,15 @@ values using the X values. We then plot it to compare the actual data and predic
             plt.title('Cubic Regression Result')
             plt.legend()
             plt.show()
            -
            +
            +

            Quartic

            -
            with tf.Session() as sess:
            +
            +
            with tf.Session() as sess:
                 sess.run(init)
                 for epoch in range(no_of_epochs):
                   for (x,y) in zip(abscissa, ordinate):
            @@ -398,9 +449,11 @@ values using the X values. We then plot it to compare the actual data and predic
                     constant = sess.run(e)
             
             print(training_cost, coefficient1, coefficient2, coefficient3, coefficient4, constant)
            -
            +
            +
            -
            Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
            +
            +
            Epoch 1000 : Training Cost: 1902632600.0  a,b,c,d: 84.48304 52.210594 54.791424 142.51952 512.0343
             Epoch 2000 : Training Cost: 1854316200.0  a,b,c,d: 88.998955 13.073557 14.276088 223.55667 1056.4655
             Epoch 3000 : Training Cost: 1812812400.0  a,b,c,d: 92.9462 -22.331177 -15.262934 327.41858 1634.9054
             Epoch 4000 : Training Cost: 1775716000.0  a,b,c,d: 96.42522 -54.64535 -35.829437 449.5028 2239.1392
            @@ -426,9 +479,11 @@ values using the X values. We then plot it to compare the actual data and predic
             Epoch 24000 : Training Cost: 1252052600.0  a,b,c,d: 135.9583 -493.38254 90.268616 3764.0078 15010.481
             Epoch 25000 : Training Cost: 1231713700.0  a,b,c,d: 137.54753 -512.1876 101.59372 3926.4897 15609.368
             1231713700.0 137.54753 -512.1876 101.59372 3926.4897 15609.368
            -
            +
            +
            -
            predictions = []
            +
            +
            predictions = []
             for x in abscissa:
               predictions.append((coefficient1*pow(x,4) + coefficient2*pow(x,3) + coefficient3*pow(x,2) + coefficient4*x + constant))
             plt.plot(abscissa , ordinate, 'ro', label ='Original data')
            @@ -436,13 +491,15 @@ values using the X values. We then plot it to compare the actual data and predic
             plt.title('Quartic Regression Result')
             plt.legend()
             plt.show()
            -
            +
            +

            Quintic

            -
            with tf.Session() as sess:
            +
            +
            with tf.Session() as sess:
                 sess.run(init)
                 for epoch in range(no_of_epochs):
                   for (x,y) in zip(abscissa, ordinate):
            @@ -458,9 +515,11 @@ values using the X values. We then plot it to compare the actual data and predic
                     coefficient4 = sess.run(d)
                     coefficient5 = sess.run(e)
                     constant = sess.run(f)
            -
            +
            +
            -
            Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
            +
            +
            Epoch 1000 : Training Cost: 1409200100.0  a,b,c,d,e,f: 7.949472 7.46219 55.626034 184.29028 484.00223 1024.0083
             Epoch 2000 : Training Cost: 1306882400.0  a,b,c,d,e,f: 8.732181 -4.0085897 73.25298 315.90103 904.08887 2004.9749
             Epoch 3000 : Training Cost: 1212606000.0  a,b,c,d,e,f: 9.732249 -16.90125 86.28379 437.06552 1305.055 2966.2188
             Epoch 4000 : Training Cost: 1123640400.0  a,b,c,d,e,f: 10.74851 -29.82692 98.59997 555.331 1698.4631 3917.9155
            @@ -486,9 +545,11 @@ values using the X values. We then plot it to compare the actual data and predic
             Epoch 24000 : Training Cost: 229660080.0  a,b,c,d,e,f: 27.102589 -238.44817 309.35342 2420.4185 7770.5728 19536.19
             Epoch 25000 : Training Cost: 216972400.0  a,b,c,d,e,f: 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
             216972400.0 27.660324 -245.69016 318.10062 2483.3608 7957.354 20027.707
            -
            +
            +
            -
            predictions = []
            +
            +
            predictions = []
             for x in abscissa:
               predictions.append((coefficient1*pow(x,5) + coefficient2*pow(x,4) + coefficient3*pow(x,3) + coefficient4*pow(x,2) + coefficient5*x + constant))
             plt.plot(abscissa , ordinate, 'ro', label ='Original data')
            @@ -496,7 +557,8 @@ values using the X values. We then plot it to compare the actual data and predic
             plt.title('Quintic Regression Result')
             plt.legend()
             plt.show()
            -
            +
            +

            diff --git a/docs/posts/2019-12-22-Fake-News-Detector.html b/docs/posts/2019-12-22-Fake-News-Detector.html index 46297b0..9b62b00 100644 --- a/docs/posts/2019-12-22-Fake-News-Detector.html +++ b/docs/posts/2019-12-22-Fake-News-Detector.html @@ -60,48 +60,63 @@ Whenever you are looking for a dataset, always try searching on Kaggle and GitHu This allows you to train the model on the GPU. Turicreate is built on top of Apache's MXNet Framework, for us to use GPU we need to install a CUDA compatible MXNet package.

            -
            !pip install turicreate
            +
            +
            !pip install turicreate
             !pip uninstall -y mxnet
             !pip install mxnet-cu100==1.4.0.post0
            -
            +
            +

            If you do not wish to train on GPU or are running it on your computer, you can ignore the last two lines

            Downloading the Dataset

            -
            !wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip"
            +
            +
            !wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip"
             !unzip fake_or_real_news.csv.zip
            -
            +
            +

            Model Creation

            -
            import turicreate as tc
            +
            +
            import turicreate as tc
             tc.config.set_num_gpus(-1) # If you do not wish to use GPUs, set it to 0
            -
            +
            +
            -
            dataSFrame = tc.SFrame('fake_or_real_news.csv')
            -
            +
            +
            dataSFrame = tc.SFrame('fake_or_real_news.csv')
            +
            +

            The dataset contains a column named "X1", which is of no use to us. Therefore, we simply drop it

            -
            dataSFrame.remove_column('X1')
            -
            +
            +
            dataSFrame.remove_column('X1')
            +
            +

            Splitting Dataset

            -
            train, test = dataSFrame.random_split(.9)
            -
            +
            +
            train, test = dataSFrame.random_split(.9)
            +
            +

            Training

            -
            model = tc.text_classifier.create(
            +
            +
            model = tc.text_classifier.create(
                 dataset=train,
                 target='label',
                 features=['title','text']
             )
            -
            +
            +
            -
            +-----------+----------+-----------+--------------+-------------------+---------------------+
            +
            +
            +-----------+----------+-----------+--------------+-------------------+---------------------+
             | Iteration | Passes   | Step size | Elapsed Time | Training Accuracy | Validation Accuracy |
             +-----------+----------+-----------+--------------+-------------------+---------------------+
             | 0         | 2        | 1.000000  | 1.156349     | 0.889680          | 0.790036            |
            @@ -111,39 +126,50 @@ a CUDA compatible MXNet package.

            | 4 | 8 | 1.000000 | 1.814194 | 0.999063 | 0.925267 | | 9 | 14 | 1.000000 | 2.507072 | 1.000000 | 0.911032 | +-----------+----------+-----------+--------------+-------------------+---------------------+ -
            +
            +

            Testing the Model

            -
            est_predictions = model.predict(test)
            +
            +
            est_predictions = model.predict(test)
             accuracy = tc.evaluation.accuracy(test['label'], test_predictions)
             print(f'Topic classifier model has a testing accuracy of {accuracy*100}% ', flush=True)
            -
            +
            +
            -
            Topic classifier model has a testing accuracy of 92.3076923076923%
            -
            +
            +
            Topic classifier model has a testing accuracy of 92.3076923076923%
            +
            +

            We have just created our own Fake News Detection Model which has an accuracy of 92%!

            -
            example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]}
            +
            +
            example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]}
             example_prediction = model.classify(tc.SFrame(example_text))
             print(example_prediction, flush=True)
            -
            +
            +
            -
            +-------+--------------------+
            +
            +
            +-------+--------------------+
             | class |    probability     |
             +-------+--------------------+
             |  FAKE | 0.9245648658345308 |
             +-------+--------------------+
             [1 rows x 2 columns]
            -
            +
            +

            Exporting the Model

            -
            model_name = 'FakeNews'
            +
            +
            model_name = 'FakeNews'
             coreml_model_name = model_name + '.mlmodel'
             exportedModel = model.export_coreml(coreml_model_name)
            -
            +
            +

            Note: To download files from Google Colab, simply click on the files section in the sidebar, right click on filename and then click on download

            @@ -162,7 +188,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i

            We define our bag of words function

            -
            func bow(text: String) -> [String: Double] {
            +
            +
            func bow(text: String) -> [String: Double] {
                     var bagOfWords = [String: Double]()
             
                     let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0)
            @@ -181,22 +208,26 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i
             
                     return bagOfWords
                 }
            -
            +
            +

            We also declare our variables

            -
            @State private var title: String = ""
            +
            +
            @State private var title: String = ""
             @State private var headline: String = ""
             @State private var alertTitle = ""
             @State private var alertText = ""
             @State private var showingAlert = false
            -
            +
            +

            Finally, we implement a simple function which reads the two text fields, creates their bag of words representation and displays an alert with the appropriate result

            Complete Code

            -
            import SwiftUI
            +
            +
            import SwiftUI
             
             struct ContentView: View {
                 @State private var title: String = ""
            @@ -271,7 +302,8 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i
                     ContentView()
                 }
             }
            -
            +
            +
            diff --git a/docs/posts/2020-01-14-Converting-between-PIL-NumPy.html b/docs/posts/2020-01-14-Converting-between-PIL-NumPy.html index 293da91..1db31be 100644 --- a/docs/posts/2020-01-14-Converting-between-PIL-NumPy.html +++ b/docs/posts/2020-01-14-Converting-between-PIL-NumPy.html @@ -43,7 +43,8 @@

            Converting between image and NumPy array

            -
            import numpy
            +
            +
            import numpy
             import PIL
             
             # Convert PIL Image to NumPy array
            @@ -52,16 +53,19 @@
             
             # Convert array to Image
             img = PIL.Image.fromarray(arr)
            -
            +
            +

            Saving an Image

            -
            try:
            +
            +
            try:
                 img.save(destination, "JPEG", quality=80, optimize=True, progressive=True)
             except IOError:
                 PIL.ImageFile.MAXBLOCK = img.size[0] * img.size[1]
                 img.save(destination, "JPEG", quality=80, optimize=True, progressive=True)
            -
            +
            +
            diff --git a/docs/posts/2020-01-15-Setting-up-Kaggle-to-use-with-Colab.html b/docs/posts/2020-01-15-Setting-up-Kaggle-to-use-with-Colab.html index 9a7faef..d1c88d4 100644 --- a/docs/posts/2020-01-15-Setting-up-Kaggle-to-use-with-Colab.html +++ b/docs/posts/2020-01-15-Setting-up-Kaggle-to-use-with-Colab.html @@ -69,17 +69,21 @@

            Mounting Google Drive

            -
            import os
            +
            +
            import os
             from google.colab import drive
             drive.mount('/content/drive')
            -
            +
            +

            After this click on the URL in the output section, login and then paste the Auth Code

            Configuring Kaggle

            -
            os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
            -
            +
            +
            os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
            +
            +

            Voila! You can now download Kaggle datasets

            diff --git a/docs/posts/2020-01-16-Image-Classifier-Using-Turicreate.html b/docs/posts/2020-01-16-Image-Classifier-Using-Turicreate.html index 4235b29..5056a82 100644 --- a/docs/posts/2020-01-16-Image-Classifier-Using-Turicreate.html +++ b/docs/posts/2020-01-16-Image-Classifier-Using-Turicreate.html @@ -49,31 +49,40 @@

            Mounting Google Drive

            -
            import os
            +
            +
            import os
             from google.colab import drive
             drive.mount('/content/drive')
            -
            +
            +

            Downloading Dataset from Kaggle

            -
            os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
            +
            +
            os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/"
             !kaggle datasets download ashutosh69/fire-and-smoke-dataset
             !unzip "fire-and-smoke-dataset.zip"
            -
            +
            +

            Pre-Processing

            -
            !mkdir default smoke fire
            -
            +
            +
            !mkdir default smoke fire
            +
            +

            \

            -
            !ls data/data/img_data/train/default/*.jpg
            -
            +
            +
            !ls data/data/img_data/train/default/*.jpg
            +
            +

            \

            -
            img_1002.jpg   img_20.jpg     img_519.jpg     img_604.jpg       img_80.jpg
            +
            +
            img_1002.jpg   img_20.jpg     img_519.jpg     img_604.jpg       img_80.jpg
             img_1003.jpg   img_21.jpg     img_51.jpg     img_60.jpg       img_8.jpg
             img_1007.jpg   img_22.jpg     img_520.jpg     img_61.jpg       img_900.jpg
             img_100.jpg    img_23.jpg     img_521.jpg    'img_62 (2).jpg'   img_920.jpg
            @@ -106,11 +115,13 @@
             img_204.jpg    img_501.jpg    img_601.jpg     img_78.jpg
             img_205.jpg    img_502.jpg    img_602.jpg     img_79.jpg
             img_206.jpg    img_50.jpg     img_603.jpg     img_7.jpg
            -
            +
            +

            The image files are not actually JPEG, thus we first need to save them in the correct format for Turicreate

            -
            from PIL import Image
            +
            +
            from PIL import Image
             import glob
             
             
            @@ -127,26 +138,32 @@
                 rgb_im = im.convert('RGB')
                 rgb_im.save((folder + "/" + str(n) + ".jpg"), quality=100)
                 n +=1
            -
            +
            +

            \

            -
            !mkdir train
            +
            +
            !mkdir train
             !mv default ./train
             !mv smoke ./train
             !mv fire ./train
            -
            +
            +

            Making the Image Classifier

            Making an SFrame

            -
            !pip install turicreate
            -
            +
            +
            !pip install turicreate
            +
            +

            \

            -
            import turicreate as tc
            +
            +
            import turicreate as tc
             import os
             
             data = tc.image_analysis.load_images("./train", with_path=True)
            @@ -156,11 +173,13 @@
             print(data)
             
             data.save('fire-smoke.sframe')
            -
            +
            +

            \

            -
            +-------------------------+------------------------+
            +
            +
            +-------------------------+------------------------+
             |           path          |         image          |
             +-------------------------+------------------------+
             |  ./train/default/1.jpg  | Height: 224 Width: 224 |
            @@ -194,11 +213,13 @@
             [2028 rows x 3 columns]
             Note: Only the head of the SFrame is printed.
             You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
            -
            +
            +

            Making the Model

            -
            import turicreate as tc
            +
            +
            import turicreate as tc
             
             # Load the data
             data =  tc.SFrame('fire-smoke.sframe')
            @@ -221,11 +242,13 @@
             
             # Export for use in Core ML
             model.export_coreml('fire-smoke.mlmodel')
            -
            +
            +

            \

            -
            Performing feature extraction on resized images...
            +
            +
            Performing feature extraction on resized images...
             Completed   64/1633
             Completed  128/1633
             Completed  192/1633
            @@ -283,7 +306,8 @@
             Completed 384/395
             Completed 395/395
             0.9316455696202531
            -
            +
            +

            We just got an accuracy of 94% on Training Data and 97% on Validation Data!

            diff --git a/docs/posts/2020-07-01-Install-rdkit-colab.html b/docs/posts/2020-07-01-Install-rdkit-colab.html index 56e2f21..4b5c4e7 100644 --- a/docs/posts/2020-07-01-Install-rdkit-colab.html +++ b/docs/posts/2020-07-01-Install-rdkit-colab.html @@ -55,7 +55,8 @@

            Just copy and paste this in a Colab cell and it will install it 👍

            -
            import sys
            +
            +
            import sys
             import os
             import requests
             import subprocess
            @@ -78,7 +79,8 @@
                     force=False):
                 """install rdkit from miniconda
                
            -
            +
            +
            import rdkit_installer
             rdkit_installer.install()
            diff --git a/docs/posts/2020-08-01-Natural-Feature-Tracking-ARJS.html b/docs/posts/2020-08-01-Natural-Feature-Tracking-ARJS.html
            index 6b28206..560996e 100644
            --- a/docs/posts/2020-08-01-Natural-Feature-Tracking-ARJS.html
            +++ b/docs/posts/2020-08-01-Natural-Feature-Tracking-ARJS.html
            @@ -184,7 +184,8 @@ me.fset  me.fset3 me.iset
             
             

            Create a new file called index.html in your project folder. This is the basic template we are going to use. Replace me with the root filename of your image, for example NeverGonnaGiveYouUp.png will become NeverGonnaGiveYouUp. Make sure you have copied all three files from the output folder in the previous step to the root of your project folder.

            -
            <script src="https://cdn.jsdelivr.net/gh/aframevr/aframe@1c2407b26c61958baa93967b5412487cd94b290b/dist/aframe-master.min.js"></script>
            +
            +
            <script src="https://cdn.jsdelivr.net/gh/aframevr/aframe@1c2407b26c61958baa93967b5412487cd94b290b/dist/aframe-master.min.js"></script>
             <script src="https://raw.githack.com/AR-js-org/AR.js/master/aframe/build/aframe-ar-nft.js"></script>
             
             <style>
            @@ -231,7 +232,8 @@ me.fset  me.fset3 me.iset
                 <a-entity camera></a-entity>
               </a-scene>
             </body>
            -
            +
            +

            In this we are creating a AFrame scene and we are telling it that we want to use NFT Tracking. The amazing part about using AFrame is that we are able to use all AFrame objects!

            @@ -239,10 +241,12 @@ me.fset me.fset3 me.iset

            Let us add a simple box!

            -
            <a-nft .....>
            +
            +
            <a-nft .....>
                 <a-box position='100 0.5 -180' material='opacity: 0.5; side: double' scale="100 100 100"></a-box>
             </a-nft>
            -
            +
            +

            Now to test it out we will need to create a simple server, I use Python's inbuilt SimpleHTTPServer alongside ngrok

            @@ -277,12 +281,14 @@ Serving HTTP on 0.0.0.0 port 8000 ...

            Edit your index.html

            -
            <a-nft ..>
            +
            +
            <a-nft ..>
                 <a-box ..>
                     <a-torus-knot radius='0.26' radius-tubular='0.05' ></a-torus-knot>
                 </ a-box>
             </ a-nft>
            -
            +
            +

            @@ -298,9 +304,11 @@ Serving HTTP on 0.0.0.0 port 8000 ...

            Change the box's material to add the GIF shader

            -
            ...
            +
            +
            ...
             <a-box position='100 0.5 -180' material="shader:gif;src:url(https://media.tenor.com/images/412b1aa9149d98d561df62db221e0789/tenor.gif);opacity:.5" .....>
            -
            +
            +

            diff --git a/docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html b/docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html index 06951dc..d99f7b8 100644 --- a/docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html +++ b/docs/posts/2020-10-11-macOS-Virtual-Cam-OBS.html @@ -43,7 +43,7 @@

            Trying Different Camera Setups

            -
              +
              1. Animated Overlays
              2. Using a modern camera as your webcam
              3. Using your phone's camera as your webcam
              4. diff --git a/docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html b/docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html index f8e7b6c..fdde2b8 100644 --- a/docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html +++ b/docs/posts/2020-11-17-Lets-Encrypt-DuckDns.html @@ -47,13 +47,17 @@

                Dependencies

                -
                sudo apt update && sudo apt install certbot -y
                -
                +
                +
                sudo apt update && sudo apt install certbot -y
                +
                +

                Get the Certificate

                -
                sudo certbot certonly --manual --preferred-challenges dns-01 --email senpai@email.com -d mydomain.duckdns.org
                -
                +
                +
                sudo certbot certonly --manual --preferred-challenges dns-01 --email senpai@email.com -d mydomain.duckdns.org
                +
                +

                After you accept that you are okay with you IP address being logged, it will prompt you with updating your dns record. You need to create a new TXT record in the DNS settings for your domain.

                @@ -66,7 +70,8 @@

                You can check if the TXT records have been updated by using the dig command:

                -
                dig navanspi.duckdns.org TXT
                +
                +
                dig navanspi.duckdns.org TXT
                 ; <<>> DiG 9.16.1-Ubuntu <<>> navanspi.duckdns.org TXT
                 ;; global options: +cmd
                 ;; Got answer:
                @@ -85,7 +90,8 @@ navanspi.duckdns.org.    60    IN    TXT    ;; SERVER: 127.0.0.53#53(127.0.0.53)
                 ;; WHEN: Tue Nov 17 15:23:15 IST 2020
                 ;; MSG SIZE  rcvd: 105
                -
                +
                +

                DuckDNS almost instantly propagates the changes but for other domain hosts, it could take a while.

                @@ -99,13 +105,17 @@ navanspi.duckdns.org. 60 IN TXT
                gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589
                -
                +
                +
                gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589
                +
                +

                To use the certificate with it, simply copy the cert.pem and privkey.pem to your working directory ( change the appropriate permissions ) and include them in the command

                -
                gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589 --certfile=cert.pem --keyfile=privkey.pem
                -
                +
                +
                gunicorn api:app -k uvicorn.workers.UvicornWorker -b 0.0.0.0:7589 --certfile=cert.pem --keyfile=privkey.pem
                +
                +

                Caveats with copying the certificate: If you renew the certificate you will have to re-copy the files

                diff --git a/docs/posts/2020-12-1-HTML-JS-RSS-Feed.html b/docs/posts/2020-12-1-HTML-JS-RSS-Feed.html index 4fdb015..8acc446 100644 --- a/docs/posts/2020-12-1-HTML-JS-RSS-Feed.html +++ b/docs/posts/2020-12-1-HTML-JS-RSS-Feed.html @@ -45,7 +45,8 @@

                If you want to directly open the HTML file in your browser after saving, don't forget to set CORS_PROXY=""

                -
                <!doctype html>
                +
                +
                <!doctype html>
                 <html lang="en">
                 <head>
                   <meta charset="utf-8">
                @@ -240,7 +241,8 @@
                 </script>
                 <noscript>Uh Oh! Your browser does not support JavaScript or JavaScript is currently disabled. Please enable JavaScript or switch to a different browser.</noscript>
                 </body></html>
                -
                +
                +
                diff --git a/docs/posts/2021-06-25-Blog2Twitter-P1.html b/docs/posts/2021-06-25-Blog2Twitter-P1.html index ada9666..62233ab 100644 --- a/docs/posts/2021-06-25-Blog2Twitter-P1.html +++ b/docs/posts/2021-06-25-Blog2Twitter-P1.html @@ -57,7 +57,8 @@ I am not handling lists or images right now.

                pip install tweepy

                -
                import os
                +
                +
                import os
                 import tweepy
                 
                 consumer_key = os.environ["consumer_key"]
                @@ -70,13 +71,15 @@ I am not handling lists or images right now.

                auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) -
                +
                +

                The program need to convert the blog post into text fragments.

                It reads the markdown file, removes the top YAML content, checks for headers and splits the content.

                -
                tweets = []
                +
                +
                tweets = []
                 
                 first___n = 0
                 
                @@ -103,13 +106,15 @@ I am not handling lists or images right now.

                print("ERROR") else: tweets.append(line) -
                +
                +

                Every status update using tweepy has an id attached to it, for the next tweet in the thread, it adds that ID while calling the function.

                For every tweet fragment, it also appends 1/n.

                -
                for idx, tweet in enumerate(tweets):
                +
                +
                for idx, tweet in enumerate(tweets):
                     tweet += " {}/{}".format(idx+1,len(tweets))
                     if idx == 0:
                         a = None
                @@ -118,12 +123,15 @@ I am not handling lists or images right now.

                a = api.update_status(tweet,in_reply_to_status_id=a.id) print(len(tweet),end=" ") print("{}/{}\n".format(idx+1,len(tweets))) -
                +
                +

                Finally, it replies to the last tweet in the thread with the link of the post.

                -
                api.update_status("Web Version: {}".format(post_link))
                -
                +
                +
                api.update_status("Web Version: {}".format(post_link))
                +
                +

                Result

                diff --git a/docs/posts/2021-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html b/docs/posts/2021-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html index 0b307fd..cdae911 100644 --- a/docs/posts/2021-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html +++ b/docs/posts/2021-06-27-Crude-ML-AI-Powered-Chatbot-Swift.html @@ -89,7 +89,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a

                Screenshot of Sample Dataset

                -
                [
                +
                +
                [
                     {
                         "tokens": ["Tell","me","about","the","drug","Aspirin","."],
                         "labels": ["NONE","NONE","NONE","NONE","NONE","COMPOUND","NONE"]
                @@ -103,7 +104,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a
                         "labels": ["NONE","NONE","NONE","NONE","COMPOUND","NONE","NONE"]
                     }
                 ]
                -
                +
                +

                Screenshot of Create ML Text Classifier

                @@ -113,7 +115,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a

                Screenshot

                -
                import CoreML
                +
                +
                import CoreML
                 import NaturalLanguage
                 
                 let mlModelClassifier = try IntentDetection_1(configuration:  MLModelConfiguration()).model
                @@ -124,7 +127,8 @@ I created a sample JSON with only 3 examples (I know, very less, but works for a
                 
                 let tagger = NLTagger(tagSchemes: [.nameType, NLTagScheme("Apple")])
                 tagger.setModels([tagPredictor], forTagScheme: NLTagScheme("Apple"))
                -
                +
                +

                Now, we define a simple structure which the custom function(s) can use to access the provided input. It can also be used to hold additional variables. @@ -134,7 +138,8 @@ The latter can be replaced with a function which asks the user for the input. Screenshot

                -
                struct User {
                +
                +
                struct User {
                     static var message = ""
                 }
                 
                @@ -158,14 +163,16 @@ The latter can be replaced with a function which asks the user for the input. }
                 
                 }
                -
                +
                +

                Sometimes, no action needs to be performed, and the bot can use a predefined set of responses. Otherwise, if an action is required, it can call the custom action.

                Screenshot

                -
                let defaultResponses = [
                +
                +
                let defaultResponses = [
                     "greetings": "Hello",
                     "banter": "no, plix no"
                 ]
                @@ -173,14 +180,16 @@ Otherwise, if an action is required, it can call the custom action.

                let customActions = [ "deez-drug": customAction ] -
                +
                +

                In the sample input, the program is updating the User.message and checking if it has a default response. Otherwise, it calls the custom action.

                Screenshot

                -
                let sampleMessages = [
                +
                +
                let sampleMessages = [
                     "Hey there, how is it going",
                     "hello, there",
                     "Who let the dogs out",
                @@ -200,7 +209,8 @@ Otherwise, it calls the custom action.

                print(customActions[prediction!]!()) } } -
                +
                +

                Output

                diff --git a/docs/posts/2022-05-21-Similar-Movies-Recommender.html b/docs/posts/2022-05-21-Similar-Movies-Recommender.html index 5d2d6fe..f45b45e 100644 --- a/docs/posts/2022-05-21-Similar-Movies-Recommender.html +++ b/docs/posts/2022-05-21-Similar-Movies-Recommender.html @@ -63,7 +63,8 @@

                First, I needed to check the total number of records in Trakt’s database.

                -
                import requests
                +
                +
                import requests
                 import os
                 
                 trakt_id = os.getenv("TRAKT_ID")
                @@ -87,14 +88,16 @@
                 res = requests.get(f"{api_base}/search/movie",headers=headers,params=params)
                 total_items = res.headers["x-pagination-item-count"]
                 print(f"There are {total_items} movies")
                -
                +
                +
                There are 333946 movies
                 

                First, I needed to declare the database schema in (database.py):

                -
                import sqlalchemy
                +
                +
                import sqlalchemy
                 from sqlalchemy import create_engine
                 from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, PickleType
                 from sqlalchemy import insert
                @@ -129,13 +132,15 @@
                     meta.create_all(engine)
                     Session = sessionmaker(bind=engine)
                     return engine, Session
                -
                +
                +

                In the end, I could have dropped the embeddings field from the table schema as I never got around to using it.

                Scripting Time

                -
                from database import *
                +
                +
                from database import *
                 from tqdm import tqdm
                 import requests
                 import os
                @@ -228,7 +233,8 @@
                                 except IntegrityError:
                                     trans.rollback()
                     req_count += 1
                -
                +
                +

                (Note: I was well within the rate-limit so I did not have to slow down or implement any other measures)

                @@ -263,7 +269,8 @@ As of writing this post, I did not include any other database except Trakt.

              5. Installing the Python module (pinecone-client)

          -
          import pandas as pd
          +
          +
          import pandas as pd
           import pinecone
           from sentence_transformers import SentenceTransformer
           from tqdm import tqdm 
          @@ -293,7 +300,8 @@ As of writing this post, I did not include any other database except Trakt. 

          str(value), embeddings[idx].tolist() )) index.upsert(to_send) -
          +
          +

          That's it!

          @@ -304,7 +312,8 @@ As of writing this post, I did not include any other database except Trakt.

          To find similar items, we will first have to map the name of the movie to its trakt_id, get the embeddings we have for that id and then perform a similarity search. It is possible that this additional step of mapping could be avoided by storing information as metadata in the index.

          -
          def get_trakt_id(df, title: str):
          +
          +
          def get_trakt_id(df, title: str):
             rec = df[df["title"].str.lower()==movie_name.lower()]
             if len(rec.trakt_id.values.tolist()) > 1:
               print(f"multiple values found... {len(rec.trakt_id.values)}")
          @@ -344,11 +353,13 @@ It is possible that this additional step of mapping could be avoided by storing
                 "runtime": df.runtime.values[0],
                 "year": df.year.values[0]
             }
          -
          +
          +

          Testing it Out

          -
          movie_name = "Now You See Me"
          +
          +
          movie_name = "Now You See Me"
           
           movie_trakt_id = get_trakt_id(df, movie_name)
           print(movie_trakt_id)
          @@ -360,7 +371,8 @@ It is possible that this additional step of mapping could be avoided by storing
           for trakt_id in movie_ids:
             deets = get_deets_by_trakt_id(df, trakt_id)
             print(f"{deets['title']} ({deets['year']}): {deets['overview']}")
          -
          +
          +

          Output:

          diff --git a/docs/posts/2022-11-07-a-new-method-to-blog.html b/docs/posts/2022-11-07-a-new-method-to-blog.html new file mode 100644 index 0000000..aa209b2 --- /dev/null +++ b/docs/posts/2022-11-07-a-new-method-to-blog.html @@ -0,0 +1,90 @@ + + + + + + + + + Hey - Post - A new method to blog + + + + + + + + + + + + + + + + + + + + + + + + + +
          +

          A new method to blog

          + +

          Paper Website is a service that lets you build a website with just pen and paper. I am going to try and replicate the process.

          + +

          The Plan

          + +

          The continuity feature on macOS + iOS lets you scan PDFs directly from your iPhone. I want to be able to scan these pages and automatically run an Automator script that takes the PDF and OCRs the text. Then I can further clean the text and convert from markdown.

          + +

          Challenges

          + +

          I quickly realised that the OCR software I planned on using could not detect my shitty handwriting accurately. I tried using ABBY Finereader, Prizmo and OCRMyPDF. (Abby Finereader and Prizmo support being automated by Automator).

          + +

          Now, I could either write neater, or use an external API like Microsoft Azure

          + +

          Solution

          + +

          OCR

          + +

          In the PDFs, all the scans are saved as images on a page. I extract the image and then send it to Azure's API.

          + +

          Paragraph Breaks

          + +

          The recognised text had multiple lines breaking in the middle of the sentence, Therefore, I use what is called a pilcrow to specify paragraph breaks. But, rather than trying to draw the normal pilcrow, I just use the HTML entity &#182; which is the pilcrow character.

          + +

          Where is the code?

          + +

          I created a GitHub Gist for a sample Python script to take the PDF and print the text

          + +

          A more complete version with Auomator scripts and an entire publishing pipeline will be available as a GitHub and Gitea repo soon.

          + +

          * In Part 2, I will discuss some more features *

          + + +
          + +
          + +
          + + + + + + \ No newline at end of file diff --git a/docs/posts/index.html b/docs/posts/index.html index 1698150..f4fab83 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -50,6 +50,21 @@
            +
          • A new method to blog
          • +
              +
            • Writing posts in markdown using pen and paper
            • +
            • Published On: 2022-11-07 23:29
            • +
            • Tags: + + Python, + + OCR, + + Microsoft Azure, + +
            + +
          • Why You No Host?
            • Why you should self-host with YunoHost
            • diff --git a/poetry.lock b/poetry.lock index ac8c4bd..64be20b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -14,12 +14,17 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "markdown2" -version = "2.4.3" +version = "2.4.6" description = "A fast and complete Python implementation of Markdown" category = "main" optional = false python-versions = ">=3.5, <4" +[package.extras] +all = ["pygments (>=2.7.3)", "wavedrom"] +code_syntax_highlighting = ["pygments (>=2.7.3)"] +wavedrom = ["wavedrom"] + [[package]] name = "markupsafe" version = "2.1.1" @@ -30,12 +35,15 @@ python-versions = ">=3.7" [[package]] name = "pygments" -version = "2.12.0" +version = "2.13.0" description = "Pygments is a syntax highlighting package written in Python." category = "main" optional = false python-versions = ">=3.6" +[package.extras] +plugins = ["importlib-metadata"] + [metadata] lock-version = "1.1" python-versions = "^3.9" @@ -89,7 +97,4 @@ markupsafe = [ {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"}, {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, ] -pygments = [ - {file = "Pygments-2.12.0-py3-none-any.whl", hash = "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"}, - {file = "Pygments-2.12.0.tar.gz", hash = "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb"}, -] +pygments = [] -- cgit v1.2.3