rss2podcast/episode-tool.py

216 lines
8.5 KiB
Python
Raw Permalink Normal View History

2024-11-05 14:45:19 +01:00
#!/usr/bin/env python3
import argparse
import sys
import json
from datetime import datetime
from utils import get_engine, create_tables, Episode, slugify
from sqlalchemy.orm import sessionmaker
from content_processing import clean_and_convert_content
def list_episodes(db_url, list_type, exclude_options, podcast_id):
engine = get_engine(db_url)
Session = sessionmaker(bind=engine)
session = Session()
try:
query = session.query(Episode).filter(Episode.podcast_id == podcast_id)
if exclude_options.get('exclude_skipped'):
query = query.filter(Episode.skipped == False)
if exclude_options.get('exclude_processed'):
query = query.filter(Episode.processing_status != 'processed')
if exclude_options.get('exclude_pending'):
query = query.filter(Episode.processing_status != 'pending')
if exclude_options.get('exclude_reprocess'):
query = query.filter(Episode.processing_status != 'reprocess')
episodes = query.all()
finally:
session.close()
for episode in episodes:
print(getattr(episode, list_type))
def select_episode(session, podcast_id, guid=None, title=None, link=None):
query = session.query(Episode).filter(Episode.podcast_id == podcast_id)
if guid:
query = query.filter(Episode.article_guid == guid)
elif title:
query = query.filter(Episode.title.like(f"%{title}%"))
else:
query = query.filter(Episode.link.like(f"%{link}%"))
return query.first()
def mark_episode_skipped(db_url, podcast_id, guid=None, title=None, link=None, reprocess=False):
if not guid and not title and not link:
print("You must provide either an article GUID, title, or link to identify the episode.")
sys.exit(1)
engine = get_engine(db_url)
Session = sessionmaker(bind=engine)
session = Session()
create_tables(engine)
try:
episode = select_episode(session, podcast_id, guid, title, link)
if not episode:
print("Episode not found.")
sys.exit(1)
if reprocess:
episode.processing_status = 'reprocess'
episode.skipped = False
status = 'reprocess'
else:
episode.processing_status = 'skipped'
episode.skipped = True
status = 'skipped'
session.commit()
print(f"Episode '{episode.title}' status set to '{status}'.")
finally:
session.close()
def delete_episode(db_url, podcast_id, guid=None, title=None, link=None):
if not guid and not title and not link:
print("You must provide either an article GUID, title, or link to identify the episode.")
sys.exit(1)
engine = get_engine(db_url)
Session = sessionmaker(bind=engine)
session = Session()
try:
episode = select_episode(session, podcast_id, guid, title, link)
if not episode:
print("Episode not found.")
sys.exit(1)
session.delete(episode)
session.commit()
print(f"Episode '{episode.title}' has been deleted from the database.")
finally:
session.close()
def add_new_episode(db_url, args, config):
content = sys.stdin.read()
if not content.strip():
print("No content provided. Please provide content via stdin.")
sys.exit(1)
podcast_id = config.get('podcast_id')
if not podcast_id:
podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')
engine = get_engine(db_url)
Session = sessionmaker(bind=engine)
session = Session()
create_tables(engine)
try:
# Generate GUID
if args.guid:
article_guid = args.guid
elif args.link:
article_guid = slugify(args.link)
else:
date_str = datetime.utcnow().strftime('%Y%m%d')
title_slug = slugify(args.title)
article_guid = f"{date_str}-{title_slug}"
pub_date = args.date or datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +0000')
# Determine content format
if args.html:
is_markdown = False
else:
is_markdown = True # default is markdown
content = clean_and_convert_content(content, is_markdown=is_markdown)
episode = Episode(
podcast_id=podcast_id,
article_guid=article_guid,
title=args.title,
link=args.link or '',
pub_date=pub_date,
description=args.description or '',
content=content,
processing_status='pending',
skipped=False
)
session.add(episode)
session.commit()
print(f"New episode '{args.title}' added to the database.")
finally:
session.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Manage episodes in the database.")
parser.add_argument("--config", default="config.json", help="Path to configuration file")
group = parser.add_mutually_exclusive_group()
group.add_argument("--new-episode", action="store_true", help="Add a new episode to the database")
group.add_argument("--list-guids", action="store_true", help="List all episode GUIDs")
group.add_argument("--list-links", action="store_true", help="List all episode links")
group.add_argument("--list-titles", action="store_true", help="List all episode titles")
group.add_argument("--skip", action="store_true", help="Mark an episode as skipped")
group.add_argument("--reprocess", action="store_true", help="Set processing_status to 'reprocess'")
group.add_argument("--delete", action="store_true", help="Delete an episode from the database")
# Exclude for list
parser.add_argument("--exclude-skipped", action="store_true", help="Exclude episodes marked for skipping from the list")
parser.add_argument("--exclude-processed", action="store_true", help="Exclude processed episodes from the list")
parser.add_argument("--exclude-pending", action="store_true", help="Exclude pending episodes from the list")
parser.add_argument("--exclude-reprocess", action="store_true", help="Exclude episodes marked for reprocessing from the list")
parser.add_argument("--guid", help="GUID of the episode to update, add, or delete")
parser.add_argument("--title", help="Title of the episode to update, add, or delete")
parser.add_argument("--link", help="Link of the episode to update, add, or delete")
parser.add_argument("--description", help="Description of the new episode")
parser.add_argument("--date", help="Publication date of the new episode")
parser.add_argument("--db", help="Database filename or connection string")
format_group = parser.add_mutually_exclusive_group()
format_group.add_argument("--markdown", action="store_true", help="Content is in Markdown format (default)")
format_group.add_argument("--html", action="store_true", help="Content is in HTML format")
args = parser.parse_args()
# Load configuration
try:
with open(args.config, 'r') as f:
config = json.load(f)
config['config_file_path'] = args.config
except Exception as e:
print(f"Error loading configuration file: {e}")
sys.exit(1)
# Set default db filename or connection string from config if not provided
db_url = args.db or config.get('database', 'episodes.db')
podcast_id = config.get('podcast_id')
if not podcast_id:
podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')
exclude_options = {
'exclude_skipped': args.exclude_skipped,
'exclude_processed': args.exclude_processed,
'exclude_pending': args.exclude_pending,
'exclude_reprocess': args.exclude_reprocess
}
if args.list_guids:
list_episodes(db_url, 'article_guid', exclude_options, podcast_id)
elif args.list_links:
list_episodes(db_url, 'link', exclude_options, podcast_id)
elif args.list_titles:
list_episodes(db_url, 'title', exclude_options, podcast_id)
elif args.new_episode:
add_new_episode(db_url, args, config)
elif args.skip or args.reprocess:
mark_episode_skipped(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link, reprocess=args.reprocess)
elif args.delete:
delete_episode(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link)
else:
print("Error: No command supplied. Please use one of the available options.")
parser.print_help()
sys.exit(1)