From 84b1d539f71e1a7ccfa8749170f3349e6fc73545 Mon Sep 17 00:00:00 2001 From: navanchauhan Date: Sun, 25 Dec 2022 20:20:12 -0500 Subject: ignore pycache --- docs/feed.rss | 300 +++++++++++++++++++++++++++- docs/index.html | 13 ++ docs/posts/2022-12-25-blog-to-toot.html | 340 ++++++++++++++++++++++++++++++++ docs/posts/index.html | 13 ++ 4 files changed, 664 insertions(+), 2 deletions(-) create mode 100644 docs/posts/2022-12-25-blog-to-toot.html (limited to 'docs') diff --git a/docs/feed.rss b/docs/feed.rss index 185227c..67e0987 100644 --- a/docs/feed.rss +++ b/docs/feed.rss @@ -4,8 +4,8 @@ Navan's Archive Rare Tips, Tricks and Posts https://web.navan.dev/en - Sat, 17 Dec 2022 20:25:31 -0000 - Sat, 17 Dec 2022 20:25:31 -0000 + Sun, 25 Dec 2022 20:18:51 -0000 + Sun, 25 Dec 2022 20:18:51 -0000 250 @@ -3792,6 +3792,302 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i ]]> + + + https://web.navan.dev/posts/2022-12-25-blog-to-toot.html + + + Posting blogs as Mastodon Toots + + + Cross posting blog posts to Mastodon + + https://web.navan.dev/posts/2022-12-25-blog-to-toot.html + Sun, 25 Dec 2022 17:32:00 -0000 + Posting blogs as Mastodon Toots + +

What is better than posting a blog post? Posting about your posting pipeline. I did this previously with Twitter.

+ +

the elephant in the room

+ +

mastodon.social does not support any formatting in the status posts. +Yes, there are other instances which have patches to enable features such as markdown formatting, but there is no upstream support.

+ +

time to code

+ +

My website is built using a really simple static site generator I wrote in Python. +Therefore, each post is self-contained in a Markdown file with the necessary metadata.

+ +

I am going to specify the path to the blog post, parse it and then publish it.

+ +

I initially planned on having a command line parser and some more flags.

+ +

interacting with mastodon

+ +

I ended up using mastodon.py rather than crafting requests by hand. Each statuspost/toot call returns a statusid that can be then used as an inreplyto parameter.

+ +

For the code snippets, seeing that mastodon does not support native formatting, I am resorting to using ray-so.

+ +

reading markdown

+ +

I am using a bunch of regex hacks, and reading the blog post line by line. +Because there is no markdown support, I append all the links to the end of the toot. +For images, I upload them and attach them to the toot. +The initial toot is generated based off the title and the tags associated with the post.

+ +
+
# Regexes I am using
+
+markdown_image = r'(?:!\[(.*?)\]\((.*?)\))'
+markdown_links = r'(?:\[(.*?)\]\((.*?)\))'
+tags_within_metadata = r"tags: ([\w,\s]+)"
+metadata_regex = r"---\s*\n(.*?)\n---\s*\n"
+
+
+ +

This is useful when I want to get the exact data I want. +In this case, I can extract the tags from the front matter.

+ +
+
metadata = re.search(metadata_regex, markdown_content, re.DOTALL)
+if metadata:
+    tags_match = re.search(r"tags: ([\w,\s]+)", metadata.group(1))
+    if tags_match:
+        tags = tags_match.group(1).split(",")
+
+
+ +

code snippet support

+ +

I am running akashrchandran/Rayso-API.

+ +
+
import requests
+
+def get_image(code, language: str = "python", title: str = "Code Snippet"):
+    params = (
+        ('code', code),
+        ('language', language),
+        ('title', title),
+    )
+
+    response = requests.get('http://localhost:3000/api', params=params)
+
+    return response.content
+
+
+ +

threads! threads! threads!

+ +

Even though mastodon does officially have a higher character limit than Twitter. +I prefer the way threads look.

+ +

result

+ +

Everything does seem to work! +Seeing that you are reading this on Mastodon, and that I have updated this section.

+ +

+ +

what's next?

+ +

Here is the current code:

+ +
+
from mastodon import Mastodon
+from mastodon.errors import MastodonAPIError
+import requests
+import re
+
+mastodon = Mastodon(
+    access_token='reeeeee',
+    api_base_url="https://mastodon.social"
+    )
+
+url_base = "https://web.navan.dev"
+sample_markdown_file = "Content/posts/2022-12-25-blog-to-toot.md"
+
+tags = []
+toots = []
+image_idx = 0
+markdown_image = r'(?:!\[(.*?)\]\((.*?)\))'
+markdown_links = r'(?:\[(.*?)\]\((.*?)\))'
+
+def get_image(code, language: str = "python", title: str = "Code Snippet"):
+    params = (
+        ('code', code),
+        ('language', language),
+        ('title', title),
+    )
+
+    response = requests.get('http://localhost:3000/api', params=params)
+
+    return response.content
+
+class TootContent:
+    def __init__(self, text: str = ""):
+        self.text = text
+        self.images = []
+        self.links = []
+        self.image_count = len(images)
+
+    def __str__(self):
+        toot_text = self.text
+        for link in self.links:
+            toot_text += " " + link
+        return toot_text
+
+    def get_text(self):
+        toot_text = self.text
+        for link in self.links:
+            toot_text += " " + link
+        return toot_text
+
+    def get_length(self):
+        length = len(self.text)
+        for link in self.links:
+            length += 23
+        return length
+
+    def add_link(self, link):
+        if len(self.text) + 23 < 498:
+            if link[0].lower() != 'h':
+                link = url_base + link
+            self.links.append(link)
+            return True
+        return False
+
+    def add_image(self, image):
+
+        if len(self.images) == 4:
+            # will handle in future
+            print("cannot upload more than 4 images per toot") 
+            exit(1)
+        # upload image and get id
+        self.images.append(image)
+        self.image_count = len(self.images)
+
+    def add_text(self, text):
+        if len(self.text + text) > 400:
+            return False
+        else:
+            self.text += f" {text}"
+            return True
+
+    def get_links(self):
+        print(len(self.links))
+
+
+in_metadata = False
+in_code_block = False
+
+my_toots = []
+text = ""
+images = []
+image_links = []
+extra_links = []
+tags = []
+
+code_block = ""
+language = "bash"
+
+current_toot = TootContent()
+
+metadata_regex = r"---\s*\n(.*?)\n---\s*\n"
+
+
+with open(sample_markdown_file) as f:
+    markdown_content = f.read()
+
+
+metadata = re.search(metadata_regex, markdown_content, re.DOTALL)
+if metadata:
+    tags_match = re.search(r"tags: ([\w,\s]+)", metadata.group(1))
+    if tags_match:
+        tags = tags_match.group(1).split(",")
+
+
+markdown_content = markdown_content.rsplit("---\n",1)[-1].strip()
+
+for line in markdown_content.split("\n"):
+    if current_toot.get_length() < 400:
+        if line.strip() == '':
+            continue
+        if line[0] == '#':
+            line = line.replace("#","".strip())
+            if len(my_toots) == 0:
+                current_toot.add_text(
+                    f"{line}: a cross-posted blog post \n"
+                    )
+                hashtags = ""
+                for tag in tags:
+                    hashtags += f"#{tag.strip()},"
+                current_toot.add_text(hashtags[:-1])
+                my_toots.append(current_toot)
+                current_toot = TootContent()
+            else:
+                my_toots.append(current_toot)
+                current_toot = TootContent(text=f"{line.title()}:")
+            continue
+        else:
+            if "```" in line:
+                in_code_block = not in_code_block
+                if in_code_block:
+                    language = line.strip().replace("```",'')
+                    continue
+                else:
+                    with open(f"code-snipped_{image_idx}.png","wb") as f:
+                        f.write(get_image(code_block, language))
+                    current_toot.add_image(f"code-snipped_{image_idx}.png")
+                    image_idx += 1
+                    code_block = ""
+                continue
+            if in_code_block:
+                line = line.replace("   ","\t")
+                code_block += line + "\n"
+                continue
+            if len(re.findall(markdown_image,line)) > 0:
+                for image_link in re.findall(markdown_links, line):
+                    image_link.append(image_link[1])
+                    # not handled yet
+                line = re.sub(markdown_image,"",line)
+            if len(re.findall(markdown_links,line)) > 0:
+                for link in re.findall(markdown_links, line):
+                    if not (current_toot.add_link(link[1])):
+                        extra_links.append(link[1])
+                    line = line.replace(f'[{link[0]}]({link[1]})',link[0])
+            if not current_toot.add_text(line):
+                my_toots.append(current_toot)
+                current_toot = TootContent(line)
+    else:
+        my_toots.append(current_toot)
+        current_toot = TootContent()
+
+my_toots.append(current_toot)
+
+in_reply_to_id = None
+for toot in my_toots:
+    image_ids = []
+    for image in toot.images:
+        print(f"uploading image, {image}")
+        try:
+            image_id = mastodon.media_post(image)
+            image_ids.append(image_id.id)
+        except MastodonAPIError:
+            print("failed to upload. Continuing...")
+    if image_ids == []:
+        image_ids = None
+
+    in_reply_to_id = mastodon.status_post(
+        toot.get_text(), in_reply_to_id=in_reply_to_id, media_ids=image_ids
+        ).id
+
+
+ +

Not the best thing I have ever written, but it works!

+]]>
+
+ https://web.navan.dev/posts/2020-01-14-Converting-between-PIL-NumPy.html diff --git a/docs/index.html b/docs/index.html index 8a9bb3d..2952918 100644 --- a/docs/index.html +++ b/docs/index.html @@ -47,6 +47,19 @@
    +
  • Posting blogs as Mastodon Toots
  • +
      +
    • Cross posting blog posts to Mastodon
    • +
    • Published On: 2022-12-25 17:32
    • +
    • Tags: + + Python, + + Mastodon + +
    + +
  • A new method to blog
    • Writing posts in markdown using pen and paper
    • diff --git a/docs/posts/2022-12-25-blog-to-toot.html b/docs/posts/2022-12-25-blog-to-toot.html new file mode 100644 index 0000000..297baad --- /dev/null +++ b/docs/posts/2022-12-25-blog-to-toot.html @@ -0,0 +1,340 @@ + + + + + + + + + Hey - Post - Posting blogs as Mastodon Toots + + + + + + + + + + + + + + + + + + + + + + + + + +
      +

      Posting blogs as Mastodon Toots

      + +

      What is better than posting a blog post? Posting about your posting pipeline. I did this previously with Twitter.

      + +

      the elephant in the room

      + +

      mastodon.social does not support any formatting in the status posts. +Yes, there are other instances which have patches to enable features such as markdown formatting, but there is no upstream support.

      + +

      time to code

      + +

      My website is built using a really simple static site generator I wrote in Python. +Therefore, each post is self-contained in a Markdown file with the necessary metadata.

      + +

      I am going to specify the path to the blog post, parse it and then publish it.

      + +

      I initially planned on having a command line parser and some more flags.

      + +

      interacting with mastodon

      + +

      I ended up using mastodon.py rather than crafting requests by hand. Each statuspost/toot call returns a statusid that can be then used as an inreplyto parameter.

      + +

      For the code snippets, seeing that mastodon does not support native formatting, I am resorting to using ray-so.

      + +

      reading markdown

      + +

      I am using a bunch of regex hacks, and reading the blog post line by line. +Because there is no markdown support, I append all the links to the end of the toot. +For images, I upload them and attach them to the toot. +The initial toot is generated based off the title and the tags associated with the post.

      + +
      +
      # Regexes I am using
      +
      +markdown_image = r'(?:!\[(.*?)\]\((.*?)\))'
      +markdown_links = r'(?:\[(.*?)\]\((.*?)\))'
      +tags_within_metadata = r"tags: ([\w,\s]+)"
      +metadata_regex = r"---\s*\n(.*?)\n---\s*\n"
      +
      +
      + +

      This is useful when I want to get the exact data I want. +In this case, I can extract the tags from the front matter.

      + +
      +
      metadata = re.search(metadata_regex, markdown_content, re.DOTALL)
      +if metadata:
      +    tags_match = re.search(r"tags: ([\w,\s]+)", metadata.group(1))
      +    if tags_match:
      +        tags = tags_match.group(1).split(",")
      +
      +
      + +

      code snippet support

      + +

      I am running akashrchandran/Rayso-API.

      + +
      +
      import requests
      +
      +def get_image(code, language: str = "python", title: str = "Code Snippet"):
      +    params = (
      +        ('code', code),
      +        ('language', language),
      +        ('title', title),
      +    )
      +
      +    response = requests.get('http://localhost:3000/api', params=params)
      +
      +    return response.content
      +
      +
      + +

      threads! threads! threads!

      + +

      Even though mastodon does officially have a higher character limit than Twitter. +I prefer the way threads look.

      + +

      result

      + +

      Everything does seem to work! +Seeing that you are reading this on Mastodon, and that I have updated this section.

      + +

      + +

      what's next?

      + +

      Here is the current code:

      + +
      +
      from mastodon import Mastodon
      +from mastodon.errors import MastodonAPIError
      +import requests
      +import re
      +
      +mastodon = Mastodon(
      +    access_token='reeeeee',
      +    api_base_url="https://mastodon.social"
      +    )
      +
      +url_base = "https://web.navan.dev"
      +sample_markdown_file = "Content/posts/2022-12-25-blog-to-toot.md"
      +
      +tags = []
      +toots = []
      +image_idx = 0
      +markdown_image = r'(?:!\[(.*?)\]\((.*?)\))'
      +markdown_links = r'(?:\[(.*?)\]\((.*?)\))'
      +
      +def get_image(code, language: str = "python", title: str = "Code Snippet"):
      +    params = (
      +        ('code', code),
      +        ('language', language),
      +        ('title', title),
      +    )
      +
      +    response = requests.get('http://localhost:3000/api', params=params)
      +
      +    return response.content
      +
      +class TootContent:
      +    def __init__(self, text: str = ""):
      +        self.text = text
      +        self.images = []
      +        self.links = []
      +        self.image_count = len(images)
      +
      +    def __str__(self):
      +        toot_text = self.text
      +        for link in self.links:
      +            toot_text += " " + link
      +        return toot_text
      +
      +    def get_text(self):
      +        toot_text = self.text
      +        for link in self.links:
      +            toot_text += " " + link
      +        return toot_text
      +
      +    def get_length(self):
      +        length = len(self.text)
      +        for link in self.links:
      +            length += 23
      +        return length
      +
      +    def add_link(self, link):
      +        if len(self.text) + 23 < 498:
      +            if link[0].lower() != 'h':
      +                link = url_base + link
      +            self.links.append(link)
      +            return True
      +        return False
      +
      +    def add_image(self, image):
      +
      +        if len(self.images) == 4:
      +            # will handle in future
      +            print("cannot upload more than 4 images per toot") 
      +            exit(1)
      +        # upload image and get id
      +        self.images.append(image)
      +        self.image_count = len(self.images)
      +
      +    def add_text(self, text):
      +        if len(self.text + text) > 400:
      +            return False
      +        else:
      +            self.text += f" {text}"
      +            return True
      +
      +    def get_links(self):
      +        print(len(self.links))
      +
      +
      +in_metadata = False
      +in_code_block = False
      +
      +my_toots = []
      +text = ""
      +images = []
      +image_links = []
      +extra_links = []
      +tags = []
      +
      +code_block = ""
      +language = "bash"
      +
      +current_toot = TootContent()
      +
      +metadata_regex = r"---\s*\n(.*?)\n---\s*\n"
      +
      +
      +with open(sample_markdown_file) as f:
      +    markdown_content = f.read()
      +
      +
      +metadata = re.search(metadata_regex, markdown_content, re.DOTALL)
      +if metadata:
      +    tags_match = re.search(r"tags: ([\w,\s]+)", metadata.group(1))
      +    if tags_match:
      +        tags = tags_match.group(1).split(",")
      +
      +
      +markdown_content = markdown_content.rsplit("---\n",1)[-1].strip()
      +
      +for line in markdown_content.split("\n"):
      +    if current_toot.get_length() < 400:
      +        if line.strip() == '':
      +            continue
      +        if line[0] == '#':
      +            line = line.replace("#","".strip())
      +            if len(my_toots) == 0:
      +                current_toot.add_text(
      +                    f"{line}: a cross-posted blog post \n"
      +                    )
      +                hashtags = ""
      +                for tag in tags:
      +                    hashtags += f"#{tag.strip()},"
      +                current_toot.add_text(hashtags[:-1])
      +                my_toots.append(current_toot)
      +                current_toot = TootContent()
      +            else:
      +                my_toots.append(current_toot)
      +                current_toot = TootContent(text=f"{line.title()}:")
      +            continue
      +        else:
      +            if "```" in line:
      +                in_code_block = not in_code_block
      +                if in_code_block:
      +                    language = line.strip().replace("```",'')
      +                    continue
      +                else:
      +                    with open(f"code-snipped_{image_idx}.png","wb") as f:
      +                        f.write(get_image(code_block, language))
      +                    current_toot.add_image(f"code-snipped_{image_idx}.png")
      +                    image_idx += 1
      +                    code_block = ""
      +                continue
      +            if in_code_block:
      +                line = line.replace("   ","\t")
      +                code_block += line + "\n"
      +                continue
      +            if len(re.findall(markdown_image,line)) > 0:
      +                for image_link in re.findall(markdown_links, line):
      +                    image_link.append(image_link[1])
      +                    # not handled yet
      +                line = re.sub(markdown_image,"",line)
      +            if len(re.findall(markdown_links,line)) > 0:
      +                for link in re.findall(markdown_links, line):
      +                    if not (current_toot.add_link(link[1])):
      +                        extra_links.append(link[1])
      +                    line = line.replace(f'[{link[0]}]({link[1]})',link[0])
      +            if not current_toot.add_text(line):
      +                my_toots.append(current_toot)
      +                current_toot = TootContent(line)
      +    else:
      +        my_toots.append(current_toot)
      +        current_toot = TootContent()
      +
      +my_toots.append(current_toot)
      +
      +in_reply_to_id = None
      +for toot in my_toots:
      +    image_ids = []
      +    for image in toot.images:
      +        print(f"uploading image, {image}")
      +        try:
      +            image_id = mastodon.media_post(image)
      +            image_ids.append(image_id.id)
      +        except MastodonAPIError:
      +            print("failed to upload. Continuing...")
      +    if image_ids == []:
      +        image_ids = None
      +
      +    in_reply_to_id = mastodon.status_post(
      +        toot.get_text(), in_reply_to_id=in_reply_to_id, media_ids=image_ids
      +        ).id
      +
      +
      + +

      Not the best thing I have ever written, but it works!

      + + +
      + +
      + +
      + + + + + + \ No newline at end of file diff --git a/docs/posts/index.html b/docs/posts/index.html index f4fab83..fd6d5f7 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -50,6 +50,19 @@