rss2podcast/episode-tool.py

#!/usr/bin/env python3
import argparse
import sys
import json
from datetime import datetime
from utils import get_engine, create_tables, Episode, slugify
from sqlalchemy.orm import sessionmaker
from content_processing import clean_and_convert_content

def list_episodes(db_url, list_type, exclude_options, podcast_id):
    engine = get_engine(db_url)
    Session = sessionmaker(bind=engine)
    session = Session()

    try:
        query = session.query(Episode).filter(Episode.podcast_id == podcast_id)
        if exclude_options.get('exclude_skipped'):
            query = query.filter(Episode.skipped == False)
        if exclude_options.get('exclude_processed'):
            query = query.filter(Episode.processing_status != 'processed')
        if exclude_options.get('exclude_pending'):
            query = query.filter(Episode.processing_status != 'pending')
        if exclude_options.get('exclude_reprocess'):
            query = query.filter(Episode.processing_status != 'reprocess')

        episodes = query.all()
    finally:
        session.close()

    for episode in episodes:
        print(getattr(episode, list_type))

def select_episode(session, podcast_id, guid=None, title=None, link=None):
    query = session.query(Episode).filter(Episode.podcast_id == podcast_id)
    if guid:
        query = query.filter(Episode.article_guid == guid)
    elif title:
        query = query.filter(Episode.title.like(f"%{title}%"))
    else:
        query = query.filter(Episode.link.like(f"%{link}%"))
    return query.first()

def mark_episode_skipped(db_url, podcast_id, guid=None, title=None, link=None, reprocess=False):
    if not guid and not title and not link:
        print("You must provide either an article GUID, title, or link to identify the episode.")
        sys.exit(1)

    engine = get_engine(db_url)
    Session = sessionmaker(bind=engine)
    session = Session()
    create_tables(engine)

    try:
        episode = select_episode(session, podcast_id, guid, title, link)
        if not episode:
            print("Episode not found.")
            sys.exit(1)

        if reprocess:
            episode.processing_status = 'reprocess'
            episode.skipped = False
            status = 'reprocess'
        else:
            episode.processing_status = 'skipped'
            episode.skipped = True
            status = 'skipped'
        session.commit()
        print(f"Episode '{episode.title}' status set to '{status}'.")
    finally:
        session.close()

def delete_episode(db_url, podcast_id, guid=None, title=None, link=None):
    if not guid and not title and not link:
        print("You must provide either an article GUID, title, or link to identify the episode.")
        sys.exit(1)

    engine = get_engine(db_url)
    Session = sessionmaker(bind=engine)
    session = Session()

    try:
        episode = select_episode(session, podcast_id, guid, title, link)
        if not episode:
            print("Episode not found.")
            sys.exit(1)
        session.delete(episode)
        session.commit()
        print(f"Episode '{episode.title}' has been deleted from the database.")
    finally:
        session.close()

def add_new_episode(db_url, args, config):
    content = sys.stdin.read()
    if not content.strip():
        print("No content provided. Please provide content via stdin.")
        sys.exit(1)

    podcast_id = config.get('podcast_id')
    if not podcast_id:
        podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')

    engine = get_engine(db_url)
    Session = sessionmaker(bind=engine)
    session = Session()
    create_tables(engine)

    try:
        # Generate GUID
        if args.guid:
            article_guid = args.guid
        elif args.link:
            article_guid = slugify(args.link)
        else:
            date_str = datetime.utcnow().strftime('%Y%m%d')
            title_slug = slugify(args.title)
            article_guid = f"{date_str}-{title_slug}"

        pub_date = args.date or datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +0000')

        # Determine content format
        if args.html:
            is_markdown = False
        else:
            is_markdown = True  # default is markdown

        content = clean_and_convert_content(content, is_markdown=is_markdown)

        episode = Episode(
            podcast_id=podcast_id,
            article_guid=article_guid,
            title=args.title,
            link=args.link or '',
            pub_date=pub_date,
            description=args.description or '',
            content=content,
            processing_status='pending',
            skipped=False
        )
        session.add(episode)
        session.commit()
        print(f"New episode '{args.title}' added to the database.")
    finally:
        session.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Manage episodes in the database.")
    parser.add_argument("--config", default="config.json", help="Path to configuration file")

    group = parser.add_mutually_exclusive_group()
    group.add_argument("--new-episode", action="store_true", help="Add a new episode to the database")
    group.add_argument("--list-guids", action="store_true", help="List all episode GUIDs")
    group.add_argument("--list-links", action="store_true", help="List all episode links")
    group.add_argument("--list-titles", action="store_true", help="List all episode titles")
    group.add_argument("--skip", action="store_true", help="Mark an episode as skipped")
    group.add_argument("--reprocess", action="store_true", help="Set processing_status to 'reprocess'")
    group.add_argument("--delete", action="store_true", help="Delete an episode from the database")

    # Exclude for list
    parser.add_argument("--exclude-skipped", action="store_true", help="Exclude episodes marked for skipping from the list")
    parser.add_argument("--exclude-processed", action="store_true", help="Exclude processed episodes from the list")
    parser.add_argument("--exclude-pending", action="store_true", help="Exclude pending episodes from the list")
    parser.add_argument("--exclude-reprocess", action="store_true", help="Exclude episodes marked for reprocessing from the list")

    parser.add_argument("--guid", help="GUID of the episode to update, add, or delete")
    parser.add_argument("--title", help="Title of the episode to update, add, or delete")
    parser.add_argument("--link", help="Link of the episode to update, add, or delete")
    parser.add_argument("--description", help="Description of the new episode")
    parser.add_argument("--date", help="Publication date of the new episode")
    parser.add_argument("--db", help="Database filename or connection string")

    format_group = parser.add_mutually_exclusive_group()
    format_group.add_argument("--markdown", action="store_true", help="Content is in Markdown format (default)")
    format_group.add_argument("--html", action="store_true", help="Content is in HTML format")

    args = parser.parse_args()

    # Load configuration
    try:
        with open(args.config, 'r') as f:
            config = json.load(f)
            config['config_file_path'] = args.config
    except Exception as e:
        print(f"Error loading configuration file: {e}")
        sys.exit(1)

    # Set default db filename or connection string from config if not provided
    db_url = args.db or config.get('database', 'episodes.db')

    podcast_id = config.get('podcast_id')
    if not podcast_id:
        podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')

    exclude_options = {
        'exclude_skipped': args.exclude_skipped,
        'exclude_processed': args.exclude_processed,
        'exclude_pending': args.exclude_pending,
        'exclude_reprocess': args.exclude_reprocess
    }

    if args.list_guids:
        list_episodes(db_url, 'article_guid', exclude_options, podcast_id)
    elif args.list_links:
        list_episodes(db_url, 'link', exclude_options, podcast_id)
    elif args.list_titles:
        list_episodes(db_url, 'title', exclude_options, podcast_id)
    elif args.new_episode:
        add_new_episode(db_url, args, config)
    elif args.skip or args.reprocess:
        mark_episode_skipped(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link, reprocess=args.reprocess)
    elif args.delete:
        delete_episode(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link)
    else:
        print("Error: No command supplied. Please use one of the available options.")
        parser.print_help()
        sys.exit(1)
initial commit 2024-11-05 14:45:19 +01:00			`#!/usr/bin/env python3`
			`import argparse`
			`import sys`
			`import json`
			`from datetime import datetime`
			`from utils import get_engine, create_tables, Episode, slugify`
			`from sqlalchemy.orm import sessionmaker`
			`from content_processing import clean_and_convert_content`

			`def list_episodes(db_url, list_type, exclude_options, podcast_id):`
			`engine = get_engine(db_url)`
			`Session = sessionmaker(bind=engine)`
			`session = Session()`

			`try:`
			`query = session.query(Episode).filter(Episode.podcast_id == podcast_id)`
			`if exclude_options.get('exclude_skipped'):`
			`query = query.filter(Episode.skipped == False)`
			`if exclude_options.get('exclude_processed'):`
			`query = query.filter(Episode.processing_status != 'processed')`
			`if exclude_options.get('exclude_pending'):`
			`query = query.filter(Episode.processing_status != 'pending')`
			`if exclude_options.get('exclude_reprocess'):`
			`query = query.filter(Episode.processing_status != 'reprocess')`

			`episodes = query.all()`
			`finally:`
			`session.close()`

			`for episode in episodes:`
			`print(getattr(episode, list_type))`

			`def select_episode(session, podcast_id, guid=None, title=None, link=None):`
			`query = session.query(Episode).filter(Episode.podcast_id == podcast_id)`
			`if guid:`
			`query = query.filter(Episode.article_guid == guid)`
			`elif title:`
			`query = query.filter(Episode.title.like(f"%{title}%"))`
			`else:`
			`query = query.filter(Episode.link.like(f"%{link}%"))`
			`return query.first()`

			`def mark_episode_skipped(db_url, podcast_id, guid=None, title=None, link=None, reprocess=False):`
			`if not guid and not title and not link:`
			`print("You must provide either an article GUID, title, or link to identify the episode.")`
			`sys.exit(1)`

			`engine = get_engine(db_url)`
			`Session = sessionmaker(bind=engine)`
			`session = Session()`
			`create_tables(engine)`

			`try:`
			`episode = select_episode(session, podcast_id, guid, title, link)`
			`if not episode:`
			`print("Episode not found.")`
			`sys.exit(1)`

			`if reprocess:`
			`episode.processing_status = 'reprocess'`
			`episode.skipped = False`
			`status = 'reprocess'`
			`else:`
			`episode.processing_status = 'skipped'`
			`episode.skipped = True`
			`status = 'skipped'`
			`session.commit()`
			`print(f"Episode '{episode.title}' status set to '{status}'.")`
			`finally:`
			`session.close()`

			`def delete_episode(db_url, podcast_id, guid=None, title=None, link=None):`
			`if not guid and not title and not link:`
			`print("You must provide either an article GUID, title, or link to identify the episode.")`
			`sys.exit(1)`

			`engine = get_engine(db_url)`
			`Session = sessionmaker(bind=engine)`
			`session = Session()`

			`try:`
			`episode = select_episode(session, podcast_id, guid, title, link)`
			`if not episode:`
			`print("Episode not found.")`
			`sys.exit(1)`
			`session.delete(episode)`
			`session.commit()`
			`print(f"Episode '{episode.title}' has been deleted from the database.")`
			`finally:`
			`session.close()`

			`def add_new_episode(db_url, args, config):`
			`content = sys.stdin.read()`
			`if not content.strip():`
			`print("No content provided. Please provide content via stdin.")`
			`sys.exit(1)`

			`podcast_id = config.get('podcast_id')`
			`if not podcast_id:`
			`podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')`

			`engine = get_engine(db_url)`
			`Session = sessionmaker(bind=engine)`
			`session = Session()`
			`create_tables(engine)`

			`try:`
			`# Generate GUID`
			`if args.guid:`
			`article_guid = args.guid`
			`elif args.link:`
			`article_guid = slugify(args.link)`
			`else:`
			`date_str = datetime.utcnow().strftime('%Y%m%d')`
			`title_slug = slugify(args.title)`
			`article_guid = f"{date_str}-{title_slug}"`

			`pub_date = args.date or datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +0000')`

			`# Determine content format`
			`if args.html:`
			`is_markdown = False`
			`else:`
			`is_markdown = True # default is markdown`

			`content = clean_and_convert_content(content, is_markdown=is_markdown)`

			`episode = Episode(`
			`podcast_id=podcast_id,`
			`article_guid=article_guid,`
			`title=args.title,`
			`link=args.link or '',`
			`pub_date=pub_date,`
			`description=args.description or '',`
			`content=content,`
			`processing_status='pending',`
			`skipped=False`
			`)`
			`session.add(episode)`
			`session.commit()`
			`print(f"New episode '{args.title}' added to the database.")`
			`finally:`
			`session.close()`

			`if __name__ == "__main__":`
			`parser = argparse.ArgumentParser(description="Manage episodes in the database.")`
			`parser.add_argument("--config", default="config.json", help="Path to configuration file")`

			`group = parser.add_mutually_exclusive_group()`
			`group.add_argument("--new-episode", action="store_true", help="Add a new episode to the database")`
			`group.add_argument("--list-guids", action="store_true", help="List all episode GUIDs")`
			`group.add_argument("--list-links", action="store_true", help="List all episode links")`
			`group.add_argument("--list-titles", action="store_true", help="List all episode titles")`
			`group.add_argument("--skip", action="store_true", help="Mark an episode as skipped")`
			`group.add_argument("--reprocess", action="store_true", help="Set processing_status to 'reprocess'")`
			`group.add_argument("--delete", action="store_true", help="Delete an episode from the database")`

			`# Exclude for list`
			`parser.add_argument("--exclude-skipped", action="store_true", help="Exclude episodes marked for skipping from the list")`
			`parser.add_argument("--exclude-processed", action="store_true", help="Exclude processed episodes from the list")`
			`parser.add_argument("--exclude-pending", action="store_true", help="Exclude pending episodes from the list")`
			`parser.add_argument("--exclude-reprocess", action="store_true", help="Exclude episodes marked for reprocessing from the list")`

			`parser.add_argument("--guid", help="GUID of the episode to update, add, or delete")`
			`parser.add_argument("--title", help="Title of the episode to update, add, or delete")`
			`parser.add_argument("--link", help="Link of the episode to update, add, or delete")`
			`parser.add_argument("--description", help="Description of the new episode")`
			`parser.add_argument("--date", help="Publication date of the new episode")`
			`parser.add_argument("--db", help="Database filename or connection string")`

			`format_group = parser.add_mutually_exclusive_group()`
			`format_group.add_argument("--markdown", action="store_true", help="Content is in Markdown format (default)")`
			`format_group.add_argument("--html", action="store_true", help="Content is in HTML format")`

			`args = parser.parse_args()`

			`# Load configuration`
			`try:`
			`with open(args.config, 'r') as f:`
			`config = json.load(f)`
			`config['config_file_path'] = args.config`
			`except Exception as e:`
			`print(f"Error loading configuration file: {e}")`
			`sys.exit(1)`

			`# Set default db filename or connection string from config if not provided`
			`db_url = args.db or config.get('database', 'episodes.db')`

			`podcast_id = config.get('podcast_id')`
			`if not podcast_id:`
			`podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')`

			`exclude_options = {`
			`'exclude_skipped': args.exclude_skipped,`
			`'exclude_processed': args.exclude_processed,`
			`'exclude_pending': args.exclude_pending,`
			`'exclude_reprocess': args.exclude_reprocess`
			`}`

			`if args.list_guids:`
			`list_episodes(db_url, 'article_guid', exclude_options, podcast_id)`
			`elif args.list_links:`
			`list_episodes(db_url, 'link', exclude_options, podcast_id)`
			`elif args.list_titles:`
			`list_episodes(db_url, 'title', exclude_options, podcast_id)`
			`elif args.new_episode:`
			`add_new_episode(db_url, args, config)`
			`elif args.skip or args.reprocess:`
			`mark_episode_skipped(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link, reprocess=args.reprocess)`
			`elif args.delete:`
			`delete_episode(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link)`
			`else:`
			`print("Error: No command supplied. Please use one of the available options.")`
			`parser.print_help()`
			`sys.exit(1)`