From 84b1d539f71e1a7ccfa8749170f3349e6fc73545 Mon Sep 17 00:00:00 2001 From: navanchauhan Date: Sun, 25 Dec 2022 20:20:12 -0500 Subject: ignore pycache --- docs/feed.rss | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 298 insertions(+), 2 deletions(-) (limited to 'docs/feed.rss') diff --git a/docs/feed.rss b/docs/feed.rss index 185227c..67e0987 100644 --- a/docs/feed.rss +++ b/docs/feed.rss @@ -4,8 +4,8 @@ Navan's Archive Rare Tips, Tricks and Posts https://web.navan.dev/en - Sat, 17 Dec 2022 20:25:31 -0000 - Sat, 17 Dec 2022 20:25:31 -0000 + Sun, 25 Dec 2022 20:18:51 -0000 + Sun, 25 Dec 2022 20:18:51 -0000 250 @@ -3792,6 +3792,302 @@ DescriptionThe bag-of-words model is a simplifying representation used in NLP, i ]]> + + + https://web.navan.dev/posts/2022-12-25-blog-to-toot.html + + + Posting blogs as Mastodon Toots + + + Cross posting blog posts to Mastodon + + https://web.navan.dev/posts/2022-12-25-blog-to-toot.html + Sun, 25 Dec 2022 17:32:00 -0000 + Posting blogs as Mastodon Toots + +

What is better than posting a blog post? Posting about your posting pipeline. I did this previously with Twitter.

+ +

the elephant in the room

+ +

mastodon.social does not support any formatting in the status posts. +Yes, there are other instances which have patches to enable features such as markdown formatting, but there is no upstream support.

+ +

time to code

+ +

My website is built using a really simple static site generator I wrote in Python. +Therefore, each post is self-contained in a Markdown file with the necessary metadata.

+ +

I am going to specify the path to the blog post, parse it and then publish it.

+ +

I initially planned on having a command line parser and some more flags.

+ +

interacting with mastodon

+ +

I ended up using mastodon.py rather than crafting requests by hand. Each statuspost/toot call returns a statusid that can be then used as an inreplyto parameter.

+ +

For the code snippets, seeing that mastodon does not support native formatting, I am resorting to using ray-so.

+ +

reading markdown

+ +

I am using a bunch of regex hacks, and reading the blog post line by line. +Because there is no markdown support, I append all the links to the end of the toot. +For images, I upload them and attach them to the toot. +The initial toot is generated based off the title and the tags associated with the post.

+ +
+
# Regexes I am using
+
+markdown_image = r'(?:!\[(.*?)\]\((.*?)\))'
+markdown_links = r'(?:\[(.*?)\]\((.*?)\))'
+tags_within_metadata = r"tags: ([\w,\s]+)"
+metadata_regex = r"---\s*\n(.*?)\n---\s*\n"
+
+
+ +

This is useful when I want to get the exact data I want. +In this case, I can extract the tags from the front matter.

+ +
+
metadata = re.search(metadata_regex, markdown_content, re.DOTALL)
+if metadata:
+    tags_match = re.search(r"tags: ([\w,\s]+)", metadata.group(1))
+    if tags_match:
+        tags = tags_match.group(1).split(",")
+
+
+ +

code snippet support

+ +

I am running akashrchandran/Rayso-API.

+ +
+
import requests
+
+def get_image(code, language: str = "python", title: str = "Code Snippet"):
+    params = (
+        ('code', code),
+        ('language', language),
+        ('title', title),
+    )
+
+    response = requests.get('http://localhost:3000/api', params=params)
+
+    return response.content
+
+
+ +

threads! threads! threads!

+ +

Even though mastodon does officially have a higher character limit than Twitter. +I prefer the way threads look.

+ +

result

+ +

Everything does seem to work! +Seeing that you are reading this on Mastodon, and that I have updated this section.

+ +

+ +

what's next?

+ +

Here is the current code:

+ +
+
from mastodon import Mastodon
+from mastodon.errors import MastodonAPIError
+import requests
+import re
+
+mastodon = Mastodon(
+    access_token='reeeeee',
+    api_base_url="https://mastodon.social"
+    )
+
+url_base = "https://web.navan.dev"
+sample_markdown_file = "Content/posts/2022-12-25-blog-to-toot.md"
+
+tags = []
+toots = []
+image_idx = 0
+markdown_image = r'(?:!\[(.*?)\]\((.*?)\))'
+markdown_links = r'(?:\[(.*?)\]\((.*?)\))'
+
+def get_image(code, language: str = "python", title: str = "Code Snippet"):
+    params = (
+        ('code', code),
+        ('language', language),
+        ('title', title),
+    )
+
+    response = requests.get('http://localhost:3000/api', params=params)
+
+    return response.content
+
+class TootContent:
+    def __init__(self, text: str = ""):
+        self.text = text
+        self.images = []
+        self.links = []
+        self.image_count = len(images)
+
+    def __str__(self):
+        toot_text = self.text
+        for link in self.links:
+            toot_text += " " + link
+        return toot_text
+
+    def get_text(self):
+        toot_text = self.text
+        for link in self.links:
+            toot_text += " " + link
+        return toot_text
+
+    def get_length(self):
+        length = len(self.text)
+        for link in self.links:
+            length += 23
+        return length
+
+    def add_link(self, link):
+        if len(self.text) + 23 < 498:
+            if link[0].lower() != 'h':
+                link = url_base + link
+            self.links.append(link)
+            return True
+        return False
+
+    def add_image(self, image):
+
+        if len(self.images) == 4:
+            # will handle in future
+            print("cannot upload more than 4 images per toot") 
+            exit(1)
+        # upload image and get id
+        self.images.append(image)
+        self.image_count = len(self.images)
+
+    def add_text(self, text):
+        if len(self.text + text) > 400:
+            return False
+        else:
+            self.text += f" {text}"
+            return True
+
+    def get_links(self):
+        print(len(self.links))
+
+
+in_metadata = False
+in_code_block = False
+
+my_toots = []
+text = ""
+images = []
+image_links = []
+extra_links = []
+tags = []
+
+code_block = ""
+language = "bash"
+
+current_toot = TootContent()
+
+metadata_regex = r"---\s*\n(.*?)\n---\s*\n"
+
+
+with open(sample_markdown_file) as f:
+    markdown_content = f.read()
+
+
+metadata = re.search(metadata_regex, markdown_content, re.DOTALL)
+if metadata:
+    tags_match = re.search(r"tags: ([\w,\s]+)", metadata.group(1))
+    if tags_match:
+        tags = tags_match.group(1).split(",")
+
+
+markdown_content = markdown_content.rsplit("---\n",1)[-1].strip()
+
+for line in markdown_content.split("\n"):
+    if current_toot.get_length() < 400:
+        if line.strip() == '':
+            continue
+        if line[0] == '#':
+            line = line.replace("#","".strip())
+            if len(my_toots) == 0:
+                current_toot.add_text(
+                    f"{line}: a cross-posted blog post \n"
+                    )
+                hashtags = ""
+                for tag in tags:
+                    hashtags += f"#{tag.strip()},"
+                current_toot.add_text(hashtags[:-1])
+                my_toots.append(current_toot)
+                current_toot = TootContent()
+            else:
+                my_toots.append(current_toot)
+                current_toot = TootContent(text=f"{line.title()}:")
+            continue
+        else:
+            if "```" in line:
+                in_code_block = not in_code_block
+                if in_code_block:
+                    language = line.strip().replace("```",'')
+                    continue
+                else:
+                    with open(f"code-snipped_{image_idx}.png","wb") as f:
+                        f.write(get_image(code_block, language))
+                    current_toot.add_image(f"code-snipped_{image_idx}.png")
+                    image_idx += 1
+                    code_block = ""
+                continue
+            if in_code_block:
+                line = line.replace("   ","\t")
+                code_block += line + "\n"
+                continue
+            if len(re.findall(markdown_image,line)) > 0:
+                for image_link in re.findall(markdown_links, line):
+                    image_link.append(image_link[1])
+                    # not handled yet
+                line = re.sub(markdown_image,"",line)
+            if len(re.findall(markdown_links,line)) > 0:
+                for link in re.findall(markdown_links, line):
+                    if not (current_toot.add_link(link[1])):
+                        extra_links.append(link[1])
+                    line = line.replace(f'[{link[0]}]({link[1]})',link[0])
+            if not current_toot.add_text(line):
+                my_toots.append(current_toot)
+                current_toot = TootContent(line)
+    else:
+        my_toots.append(current_toot)
+        current_toot = TootContent()
+
+my_toots.append(current_toot)
+
+in_reply_to_id = None
+for toot in my_toots:
+    image_ids = []
+    for image in toot.images:
+        print(f"uploading image, {image}")
+        try:
+            image_id = mastodon.media_post(image)
+            image_ids.append(image_id.id)
+        except MastodonAPIError:
+            print("failed to upload. Continuing...")
+    if image_ids == []:
+        image_ids = None
+
+    in_reply_to_id = mastodon.status_post(
+        toot.get_text(), in_reply_to_id=in_reply_to_id, media_ids=image_ids
+        ).id
+
+
+ +

Not the best thing I have ever written, but it works!

+]]>
+
+ https://web.navan.dev/posts/2020-01-14-Converting-between-PIL-NumPy.html -- cgit v1.2.3