import requests
from sys import argv
from time import sleep
def ap_get_doc(session, url) -> dict:
resp = session.get(url)
if not resp.ok:
print(resp)
return {}
return resp.json()
def webfinger(handle):
domain = handle.split("@")[-1]
webfinger_url = \
f"https://{domain}/.well-known/webfinger?resource=acct:{handle}"
s = requests.get(webfinger_url)
if not s.ok:
raise Exception(s.text)
document = s.json()
for link in document["links"]:
if link["type"] == "application/activity+json":
# This link is the AP ID
return link["href"]
raise Exception()
def page_callback(page):
# do whatever here
print(page)
def download(user_url):
s = requests.Session()
s.headers.update({
"Accept": "application/activity+json",
"User-Agent": "frank 0.0"
})
document = ap_get_doc(s, user_url)
outbox = document.get("outbox", None)
if outbox is None:
return
document = ap_get_doc(s, outbox)
first = document.get("first", None)
if first is None:
return
document = ap_get_doc(s, first)
next_page = document.get("next", None)
while next_page is not None:
objects = document.get("orderedItems")
page = []
for object in objects:
if object["type"] != "Create":
continue
note = object["object"]
if note["type"] != "Note":
continue
if isinstance(note["source"], dict):
content = note["source"]["content"]
# OLD PLEROMA WHY
elif isinstance(note["source"], str):
content = note["source"]
if content == " " or content == "":
continue
page.append({"id": note["id"], "published": note["published"], "content": content})
page_callback(page)
# progress marker
print('.', end='', flush=True)
sleep(0.3)
document = ap_get_doc(s, next_page)
next_page = document.get("next", None)
s.close()
print()
if __name__ == '__main__':
# call with user@domain.com as the username, script handles the rest
handle = argv[1]
ap_id = webfinger(handle)
download(ap_id)