rss2podcast/episode-tool.py

#!/usr/bin/env python3
import argparse
import sys
import json
from datetime import datetime
from utils import get_engine, create_tables, Episode, slugify
from sqlalchemy.orm import sessionmaker
from content_processing import clean_and_convert_content

def list_episodes(db_url, list_type, exclude_options, podcast_id):
    engine = get_engine(db_url)
    Session = sessionmaker(bind=engine)
    session = Session()

    try:
        query = session.query(Episode).filter(Episode.podcast_id == podcast_id)
        if exclude_options.get('exclude_skipped'):
            query = query.filter(Episode.skipped == False)
        if exclude_options.get('exclude_processed'):
            query = query.filter(Episode.processing_status != 'processed')
        if exclude_options.get('exclude_pending'):
            query = query.filter(Episode.processing_status != 'pending')
        if exclude_options.get('exclude_reprocess'):
            query = query.filter(Episode.processing_status != 'reprocess')

        episodes = query.all()
    finally:
        session.close()

    for episode in episodes:
        print(getattr(episode, list_type))

def select_episode(session, podcast_id, guid=None, title=None, link=None):
    query = session.query(Episode).filter(Episode.podcast_id == podcast_id)
    if guid:
        query = query.filter(Episode.article_guid == guid)
    elif title:
        query = query.filter(Episode.title.like(f"%{title}%"))
    else:
        query = query.filter(Episode.link.like(f"%{link}%"))
    return query.first()

def mark_episode_skipped(db_url, podcast_id, guid=None, title=None, link=None, reprocess=False):
    if not guid and not title and not link:
        print("You must provide either an article GUID, title, or link to identify the episode.")
        sys.exit(1)

    engine = get_engine(db_url)
    Session = sessionmaker(bind=engine)
    session = Session()
    create_tables(engine)

    try:
        episode = select_episode(session, podcast_id, guid, title, link)
        if not episode:
            print("Episode not found.")
            sys.exit(1)

        if reprocess:
            episode.processing_status = 'reprocess'
            episode.skipped = False
            status = 'reprocess'
        else:
            episode.processing_status = 'skipped'
            episode.skipped = True
            status = 'skipped'
        session.commit()
        print(f"Episode '{episode.title}' status set to '{status}'.")
    finally:
        session.close()

def delete_episode(db_url, podcast_id, guid=None, title=None, link=None):
    if not guid and not title and not link:
        print("You must provide either an article GUID, title, or link to identify the episode.")
        sys.exit(1)

    engine = get_engine(db_url)
    Session = sessionmaker(bind=engine)
    session = Session()

    try:
        episode = select_episode(session, podcast_id, guid, title, link)
        if not episode:
            print("Episode not found.")
            sys.exit(1)
        session.delete(episode)
        session.commit()
        print(f"Episode '{episode.title}' has been deleted from the database.")
    finally:
        session.close()

def add_new_episode(db_url, args, config):
    content = sys.stdin.read()
    if not content.strip():
        print("No content provided. Please provide content via stdin.")
        sys.exit(1)

    podcast_id = config.get('podcast_id')
    if not podcast_id:
        podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')

    engine = get_engine(db_url)
    Session = sessionmaker(bind=engine)
    session = Session()
    create_tables(engine)

    try:
        # Generate GUID
        if args.guid:
            article_guid = args.guid
        elif args.link:
            article_guid = slugify(args.link)
        else:
            date_str = datetime.utcnow().strftime('%Y%m%d')
            title_slug = slugify(args.title)
            article_guid = f"{date_str}-{title_slug}"

        pub_date = args.date or datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +0000')

        # Determine content format
        if args.html:
            is_markdown = False
        else:
            is_markdown = True  # default is markdown

        content = clean_and_convert_content(content, is_markdown=is_markdown)

        episode = Episode(
            podcast_id=podcast_id,
            article_guid=article_guid,
            title=args.title,
            link=args.link or '',
            pub_date=pub_date,
            description=args.description or '',
            content=content,
            processing_status='pending',
            skipped=False
        )
        session.add(episode)
        session.commit()
        print(f"New episode '{args.title}' added to the database.")
    finally:
        session.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Manage episodes in the database.")
    parser.add_argument("--config", default="config.json", help="Path to configuration file")

    group = parser.add_mutually_exclusive_group()
    group.add_argument("--new-episode", action="store_true", help="Add a new episode to the database")
    group.add_argument("--list-guids", action="store_true", help="List all episode GUIDs")
    group.add_argument("--list-links", action="store_true", help="List all episode links")
    group.add_argument("--list-titles", action="store_true", help="List all episode titles")
    group.add_argument("--skip", action="store_true", help="Mark an episode as skipped")
    group.add_argument("--reprocess", action="store_true", help="Set processing_status to 'reprocess'")
    group.add_argument("--delete", action="store_true", help="Delete an episode from the database")

    # Exclude for list
    parser.add_argument("--exclude-skipped", action="store_true", help="Exclude episodes marked for skipping from the list")
    parser.add_argument("--exclude-processed", action="store_true", help="Exclude processed episodes from the list")
    parser.add_argument("--exclude-pending", action="store_true", help="Exclude pending episodes from the list")
    parser.add_argument("--exclude-reprocess", action="store_true", help="Exclude episodes marked for reprocessing from the list")

    parser.add_argument("--guid", help="GUID of the episode to update, add, or delete")
    parser.add_argument("--title", help="Title of the episode to update, add, or delete")
    parser.add_argument("--link", help="Link of the episode to update, add, or delete")
    parser.add_argument("--description", help="Description of the new episode")
    parser.add_argument("--date", help="Publication date of the new episode")
    parser.add_argument("--db", help="Database filename or connection string")

    format_group = parser.add_mutually_exclusive_group()
    format_group.add_argument("--markdown", action="store_true", help="Content is in Markdown format (default)")
    format_group.add_argument("--html", action="store_true", help="Content is in HTML format")

    args = parser.parse_args()

    # Load configuration
    try:
        with open(args.config, 'r') as f:
            config = json.load(f)
            config['config_file_path'] = args.config
    except Exception as e:
        print(f"Error loading configuration file: {e}")
        sys.exit(1)

    # Set default db filename or connection string from config if not provided
    db_url = args.db or config.get('database', 'episodes.db')

    podcast_id = config.get('podcast_id')
    if not podcast_id:
        podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')

    exclude_options = {
        'exclude_skipped': args.exclude_skipped,
        'exclude_processed': args.exclude_processed,
        'exclude_pending': args.exclude_pending,
        'exclude_reprocess': args.exclude_reprocess
    }

    if args.list_guids:
        list_episodes(db_url, 'article_guid', exclude_options, podcast_id)
    elif args.list_links:
        list_episodes(db_url, 'link', exclude_options, podcast_id)
    elif args.list_titles:
        list_episodes(db_url, 'title', exclude_options, podcast_id)
    elif args.new_episode:
        add_new_episode(db_url, args, config)
    elif args.skip or args.reprocess:
        mark_episode_skipped(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link, reprocess=args.reprocess)
    elif args.delete:
        delete_episode(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link)
    else:
        print("Error: No command supplied. Please use one of the available options.")
        parser.print_help()
        sys.exit(1)