'''
Lolcow Archiver 1.1
This script can archive the following:
1) Kick/Twitch/YouTube Live Streams
2) YouTube Community Posts
This script requires yt-dlp to be installed for
video generation, in the same directory as this script.
You can get all the options for downloading yt-dlp
from it's Github page:
https://github.com/yt-dlp/yt-dlp/wiki/Installation
1.1: Replaced Playwright usage with Firefox and Selenium
due to Playwright memory leaks unaddressed by their devs
python -m pip install selenium
'''
import subprocess
import sys
import time
import os
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException, WebDriverException
# ---------------- SETTINGS ----------------
NEW_POST_COUNT = 0
NEW_STREAM_COUNT = 0
# ---------------- HELPERS ----------------
def read_last_seen(last_seen_file):
if os.path.exists(last_seen_file):
return Path(last_seen_file).read_text().strip()
return None
def save_last_seen(last_seen_file, url):
Path(last_seen_file).write_text(url)
def timestamp():
return time.strftime("%Y-%m-%d_%H-%M-%S")
def usage():
print("Usage: python lolcow_archiver.py Kick/Twitch/YouTube_URL Lolcow_Name [YouTube Community Post URL]")
print("\nFor Example:")
print("python lolcow_archiver.py https://www.youtube.com/@GlassWindow11/live Glass https://www.youtube.com/@GlassWindow11/posts")
print("python lolcow_archiver.py https://kick.com/clavicular/ Clavicular")
print("python lolcow_archiver.py https://kick.com/pwrworld/ PWR https://www.youtube.com/@PWRWorldOfficial/posts")
print("\n\n")
def check_for_livestream(live_url):
global NEW_STREAM_COUNT
script_dir = Path(__file__).resolve().parent
ytdlp_path = script_dir / "yt-dlp.exe"
if not ytdlp_path.exists():
print(time.strftime("%H:%M:%S", time.localtime()), f": Error: yt-dlp.exe not found at {ytdlp_path}")
sys.exit(1)
command = [
str(ytdlp_path),
"-o",
"%(title).50B [%(id)s].%(ext)s", # Limit filename length if stream title too long
"-t",
"mp4",
live_url,
]
print(time.strftime("%H:%M:%S", time.localtime()), ": Launching yt-dlp...")
try:
result = subprocess.run(command, check=False)
if result.returncode == 0:
NEW_STREAM_COUNT += 1
except Exception as e:
print(time.strftime("%H:%M:%S", time.localtime()), f": Error while running yt-dlp: {e}")
def make_firefox_driver():
options = FirefoxOptions()
options.add_argument("-headless")
# Do not wait for every YouTube resource to finish loading.
# Selenium supports page load strategies:
# normal = complete, eager = interactive, none = return immediately.
options.page_load_strategy = "eager"
# Keep YouTube UI text predictable so "Read more" selector works.
options.set_preference("intl.accept_languages", "en-US,en")
driver = webdriver.Firefox(options=options)
driver.set_window_size(1920, 1080)
# Keep this relatively short because we recover from timeouts below.
driver.set_page_load_timeout(20)
return driver
def safe_get(driver, url, label="page"):
try:
driver.get(url)
except TimeoutException:
print(
time.strftime("%H:%M:%S", time.localtime()),
f": Navigation timed out while loading {label}. Continuing with partially loaded page."
)
# Stop further loading so Selenium can continue interacting with the DOM.
try:
driver.execute_script("window.stop();")
except WebDriverException:
pass
def click_read_more_if_present(driver):
# YouTube can render "Read more" in different custom elements depending on layout.
candidates = driver.find_elements(
By.CSS_SELECTOR,
"tp-yt-paper-button, yt-formatted-string, button, span"
)
for element in candidates:
try:
text = (element.text or "").strip().lower()
if text == "read more" or "read more" in text:
driver.execute_script(
"arguments[0].scrollIntoView({block: 'center'});",
element
)
time.sleep(0.5)
try:
element.click()
except Exception:
driver.execute_script("arguments[0].click();", element)
time.sleep(2)
print(
time.strftime("%H:%M:%S", time.localtime()),
": Large community post detected. Expanding the full text of the community post."
)
return True
except Exception:
continue
return False
def check_for_new_post(lolcow_name, channel_posts_url):
global NEW_POST_COUNT
screenshot_prefix = f"{lolcow_name}_community_post_"
last_seen_file = f"{lolcow_name}_last_seen.txt"
driver = make_firefox_driver()
try:
print("\n")
print(time.strftime("%H:%M:%S", time.localtime()), f": Checking community posts for {lolcow_name}...")
# Navigate to Community Posts
safe_get(driver, channel_posts_url, "YouTube community posts page")
time.sleep(5)
# Grab newest post link
post_links = WebDriverWait(driver, 20).until(
lambda d: d.find_elements(By.CSS_SELECTOR, "a[href*='/post/'], a[href*='post/']")
)
if not post_links:
print(time.strftime("%H:%M:%S", time.localtime()), ": No posts located.")
return
latest_url = post_links[0].get_attribute("href")
if not latest_url:
print(time.strftime("%H:%M:%S", time.localtime()), ": No post URL found.")
return
if not latest_url.startswith("http"):
latest_url = "https://www.youtube.com" + latest_url
print(time.strftime("%H:%M:%S", time.localtime()), ": Latest post found:", latest_url)
# Compare to last seen
last_seen = read_last_seen(last_seen_file)
if latest_url == last_seen:
print(time.strftime("%H:%M:%S", time.localtime()), ": No new posts detected.")
return
print(time.strftime("%H:%M:%S", time.localtime()), ": NEW POST DETECTED!")
NEW_POST_COUNT += 1
save_last_seen(last_seen_file, latest_url)
# Open post page
safe_get(driver, latest_url, "YouTube community post page")
time.sleep(4)
# Expand text if needed
click_read_more_if_present(driver)
# Screenshot
screenshot_name = f"{screenshot_prefix}{timestamp()}.png"
screenshot_path = str(Path(screenshot_name).resolve())
ok = driver.get_full_page_screenshot_as_file(screenshot_path)
if ok:
print(time.strftime("%H:%M:%S", time.localtime()), ": Screenshot saved:", screenshot_name)
else:
print(time.strftime("%H:%M:%S", time.localtime()), ": Screenshot failed:", screenshot_name)
finally:
driver.quit()
# ---------------- MAIN ----------------
def main():
capture_community_posts = False
if len(sys.argv) < 3:
usage()
sys.exit(1)
if len(sys.argv) == 4:
capture_community_posts = True
channel_posts_url = sys.argv[3]
live_url = sys.argv[1]
lolcow_name = sys.argv[2]
time.strftime("%H:%M:%S", time.localtime())
print("\n")
print("__________________________________________________")
print(f"Starting Lolcow Archiver for {lolcow_name}")
print(f"Checking for livestream at {live_url}")
if capture_community_posts:
print (f"Checking for YouTube Community Posts at {channel_posts_url}")
print("--------------------------------------------------")
while True:
try:
if capture_community_posts:
check_for_new_post(lolcow_name, channel_posts_url)
check_for_livestream(live_url)
except Exception as e:
print("ERROR:", e)
if capture_community_posts and NEW_POST_COUNT > 0:
print("\n")
print("**************************************************")
print(f"THERE ARE {NEW_POST_COUNT} NEW COMMUNITY POSTS FOR YOU TO ARCHIVE")
print("**************************************************")
if NEW_STREAM_COUNT > 0:
print("\n")
print("**************************************************")
print(f"THERE ARE {NEW_STREAM_COUNT} NEW STREAMS FOR YOU TO ARCHIVE")
print("**************************************************")
print("\n")
print(time.strftime("%H:%M:%S", time.localtime()), ": Sleeping for 30 seconds before checking again...\n")
time.sleep(30)
if __name__ == "__main__":
main()