- Joined
- Apr 22, 2022
Here's a tool for quoting threads on Bluesky. I've found including the text of Twitter threads in posts helpful because you can find it later with the (forum) thread search, so here's the same but for Bluesky. It's not a replacement for screenshots.
Given a bsky.app post URL, it outputs that post and replies by the original author as a BBCode quote block. It restores full inline links (not truncated), and shows the details of "embeds" or "cards": images, video, external links, and quoted posts.
You'll need Python 3.9 or newer, but you don't need anything except the script itself (no dependencies). You don't need a Bluesky account or any credentials.
Limitations: It doesn't retrieve conversations between different people (it could be adapted to do so). "Detached" quote posts will likely cause an error, reply here or PM me if you find a thread with one in. Threads are only fetched up to 100 posts; change the depth number in "DEFAULT_API_PARAMS" if you find a thread longer than that (the limit is 1,000).
Usage example:
Example output with https://bsky.app/profile/erininthemorning.com/post/3leuq2hskis2x:
Code below.
Given a bsky.app post URL, it outputs that post and replies by the original author as a BBCode quote block. It restores full inline links (not truncated), and shows the details of "embeds" or "cards": images, video, external links, and quoted posts.
You'll need Python 3.9 or newer, but you don't need anything except the script itself (no dependencies). You don't need a Bluesky account or any credentials.
Limitations: It doesn't retrieve conversations between different people (it could be adapted to do so). "Detached" quote posts will likely cause an error, reply here or PM me if you find a thread with one in. Threads are only fetched up to 100 posts; change the depth number in "DEFAULT_API_PARAMS" if you find a thread longer than that (the limit is 1,000).
Usage example:
Bash:
python3 quote-bsky-thread.py.txt YOUR_BSKY.APP_URL
Example output with https://bsky.app/profile/erininthemorning.com/post/3leuq2hskis2x:
Erin Reed (@erininthemorning.com) at 23:18 UTC on Friday January 3 2025:
I just landed in Missoula and the airplane in front of us is literally stuck in the snow and at this rate we may be as well.
• Image: (No alt text)
• Image: (No alt text)
Erin Reed (@erininthemorning.com) at 23:52 UTC on Friday January 3 2025:
The workers are literally throwing snowballs at each other
• Video: Watch via hlsplayer.org
• Video stream URL for yt-dlp:https://video.bsky.app/watch/did%3Aplc%3Am65ifh7vn5zdgs7izcmht4gy/bafkreidcagqucq3s4ls5lrac6ry4ao3df2fyik3ku5bsp3sausu5poybre/playlist.m3u8
Code below.
Python:
from __future__ import annotations
from collections import deque
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
import json
import re
import sys
import urllib.parse
import urllib.request
THREAD_ENDPOINT = "https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread"
DEFAULT_API_PARAMS = {"depth": 100}
def web_url_to_at_uri(url: str) -> str:
m = re.match(r"https://bsky\.app/profile/(?P<handle>[^/]+)/post/(?P<rkey>\w+)", url)
if m is None:
print(
"URL didn't match expected format https://bsky.app/profile/{handle}/post/{rkey}",
file=sys.stderr,
)
sys.exit(1)
at_uri = "at://{handle}/app.bsky.feed.post/{rkey}".format(**m.groupdict())
return at_uri
def build_query_string(at_uri: str) -> str:
query_string = urllib.parse.urlencode(DEFAULT_API_PARAMS | {"uri": at_uri})
return query_string
def web_url_to_api_query_url(url: str) -> str:
at_uri = web_url_to_at_uri(url)
query_string = build_query_string(at_uri)
api_url = f"{THREAD_ENDPOINT}?{query_string}"
return api_url
def fetch_thread(url: str) -> dict:
api_url = web_url_to_api_query_url(url)
with urllib.request.urlopen(api_url) as response:
body = response.read().decode("utf-8")
return json.loads(body)
@dataclass(frozen=True)
class Author:
display_name: str
handle: str
did: str
@staticmethod
def from_json(post_dict: dict) -> Author:
return Author(
display_name=post_dict["displayName"],
handle=post_dict["handle"],
did=post_dict["did"],
)
@dataclass(frozen=True)
class Embed:
@staticmethod
def from_json(json_dict) -> list[Embed]:
embed_dict = json_dict.get("embed")
# No embeds
if embed_dict is None:
return []
embed_type = embed_dict["$type"]
if embed_type.startswith("app.bsky.embed.external"):
return [
EmbeddedLink(
title=embed_dict["external"]["title"],
url=embed_dict["external"]["uri"],
)
]
if embed_type.startswith("app.bsky.embed.images"):
return [
Image(alt_text=image_dict["alt"], url=image_dict["fullsize"])
for image_dict in embed_dict["images"]
]
if embed_type.startswith("app.bsky.embed.video"):
return [Video(playlist_url=embed_dict["playlist"])]
if embed_type.startswith("app.bsky.embed.record"):
# This is gross because a quoted post doesn't quite have the same
# representation as a normal post.
post = embed_dict["record"]
post["record"] = post["value"]
if post.get("embeds") is not None:
post["embed"] = post["embeds"][0]
data = {"post": post}
return [QuotedPost(Post.from_json(data))]
# Other embed type
return []
@dataclass(frozen=True)
class QuotedPost(Embed):
post: Post
def __str__(self):
return f"• Quoted post:\n[HR][/HR]\n{self.post}\n[HR][/HR]"
@dataclass(frozen=True)
class EmbeddedLink(Embed):
title: str
url: str
def __str__(self):
return f'• Link card: [URL="{self.url}"]{self.title}[/URL]'
@dataclass(frozen=True)
class Image(Embed):
alt_text: str
url: str
def __str__(self):
return f'• Image: [URL="{self.url}"]{self.alt_text or "(No alt text)"}[/URL]'
@dataclass(frozen=True)
class Video(Embed):
playlist_url: str
def __str__(self):
watch_online = "https://www.hlsplayer.org/play?" + urllib.parse.urlencode(
{"url": self.playlist_url}
)
return f"""\
• Video: [URL="{watch_online}"]Watch via hlsplayer.org[/URL]
• Video stream URL for yt-dlp: [ICODE]{self.playlist_url}[/ICODE]"""
@dataclass(frozen=True)
class InlineLink:
start_byte: int
end_byte: int
url: str
@staticmethod
def from_facet_segment_json(json_dict: dict) -> Optional[InlineLink]:
start = json_dict["index"]["byteStart"]
end = json_dict["index"]["byteEnd"]
for feature in json_dict["features"]:
if feature["$type"] == "app.bsky.richtext.facet#link":
return InlineLink(start_byte=start, end_byte=end, url=feature["uri"])
return None
def insert_bbcode_link_in_bytes(self, text_bytes: bytearray):
text_bytes[self.start_byte : self.end_byte] = (
f"""[URL="{self.url}"]{self.url}[/URL]""".encode("utf-8")
)
@dataclass(frozen=True)
class Post:
author: Author
created_at: datetime
text: str
url: str
inline_links: list[InlineLink]
embeds: list[Embed]
replies: list[Post]
@staticmethod
def from_json(json_dict: dict, filter_same: bool = True) -> Post:
post_dict = json_dict["post"]
author = Author.from_json(post_dict["author"])
created_at = datetime.fromisoformat(
post_dict["record"]["createdAt"].removesuffix("Z")
)
url = re.sub(
r"at://(?P<did>[^/]+)/app\.bsky\.feed\.post/(?P<rkey>\w+)",
r"https://bsky.app/profile/\g<did>/post/\g<rkey>",
post_dict["uri"],
)
text = post_dict["record"]["text"]
facet_segments = post_dict["record"].get("facets")
if facet_segments is not None:
inline_links = [
link
for link in (
InlineLink.from_facet_segment_json(segment)
for segment in facet_segments
)
if link is not None
]
else:
inline_links = []
replies_json = json_dict.get("replies", [])
if filter_same:
replies_json = [
d for d in replies_json if d["post"]["author"]["did"] == author.did
]
replies = [Post.from_json(d, filter_same) for d in replies_json]
embeds = Embed.from_json(post_dict)
return Post(
author=author,
created_at=created_at,
text=text,
url=url,
inline_links=inline_links,
embeds=embeds,
replies=replies,
)
def to_single_post(self) -> Post:
return Post(
author=self.author,
created_at=self.created_at,
text=self.text,
url=self.url,
inline_links=self.inline_links,
embeds=self.embeds,
replies=[],
)
def __repr__(self):
return f"""\
Post(
author={self.author!r}
created_at={self.created_at!r}
text={self.text!r}
url={self.url!r}
inline_links={self.inline_links!r}
embeds={self.embeds!r}
replies=[{len(self.replies)} replies]
)"""
def __str__(self):
date = self.created_at.strftime("%H:%M UTC on %A %B %-d %Y")
text_utf_bytes = bytearray(self.text.encode("utf-8"))
for link in self.inline_links:
link.insert_bbcode_link_in_bytes(text_utf_bytes)
expanded_text = text_utf_bytes.decode("utf-8")
formatted = f"""\
{self.author.display_name} (@{self.author.handle}) at [URL="{self.url}"]{date}[/URL]:
[INDENT]
{expanded_text}
[/INDENT]
"""
for embed in self.embeds:
formatted += f"{embed}\n"
return formatted.strip()
def main(url: str):
resp = fetch_thread(url)
top = Post.from_json(resp["thread"])
queue = deque([top])
individual_posts = []
while len(queue) != 0:
current = queue.popleft()
queue.extend(current.replies)
individual_posts.append(current.to_single_post())
individual_posts.sort(key=lambda p: p.created_at)
thread_content = "\n[HR][/HR]\n\n".join([str(p) for p in individual_posts]).strip()
print(f"[QUOTE]\n{thread_content}\n[/QUOTE]")
if __name__ == "__main__":
url = sys.argv[1]
main(url)
Attachments
Last edited: