mirror of
https://github.com/jooray/rss2podcast.git
synced 2025-05-23 16:02:00 +00:00
216 lines
8.5 KiB
Python
Executable File
216 lines
8.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import sys
|
|
import json
|
|
from datetime import datetime
|
|
from utils import get_engine, create_tables, Episode, slugify
|
|
from sqlalchemy.orm import sessionmaker
|
|
from content_processing import clean_and_convert_content
|
|
|
|
def list_episodes(db_url, list_type, exclude_options, podcast_id):
|
|
engine = get_engine(db_url)
|
|
Session = sessionmaker(bind=engine)
|
|
session = Session()
|
|
|
|
try:
|
|
query = session.query(Episode).filter(Episode.podcast_id == podcast_id)
|
|
if exclude_options.get('exclude_skipped'):
|
|
query = query.filter(Episode.skipped == False)
|
|
if exclude_options.get('exclude_processed'):
|
|
query = query.filter(Episode.processing_status != 'processed')
|
|
if exclude_options.get('exclude_pending'):
|
|
query = query.filter(Episode.processing_status != 'pending')
|
|
if exclude_options.get('exclude_reprocess'):
|
|
query = query.filter(Episode.processing_status != 'reprocess')
|
|
|
|
episodes = query.all()
|
|
finally:
|
|
session.close()
|
|
|
|
for episode in episodes:
|
|
print(getattr(episode, list_type))
|
|
|
|
def select_episode(session, podcast_id, guid=None, title=None, link=None):
|
|
query = session.query(Episode).filter(Episode.podcast_id == podcast_id)
|
|
if guid:
|
|
query = query.filter(Episode.article_guid == guid)
|
|
elif title:
|
|
query = query.filter(Episode.title.like(f"%{title}%"))
|
|
else:
|
|
query = query.filter(Episode.link.like(f"%{link}%"))
|
|
return query.first()
|
|
|
|
def mark_episode_skipped(db_url, podcast_id, guid=None, title=None, link=None, reprocess=False):
|
|
if not guid and not title and not link:
|
|
print("You must provide either an article GUID, title, or link to identify the episode.")
|
|
sys.exit(1)
|
|
|
|
engine = get_engine(db_url)
|
|
Session = sessionmaker(bind=engine)
|
|
session = Session()
|
|
create_tables(engine)
|
|
|
|
try:
|
|
episode = select_episode(session, podcast_id, guid, title, link)
|
|
if not episode:
|
|
print("Episode not found.")
|
|
sys.exit(1)
|
|
|
|
if reprocess:
|
|
episode.processing_status = 'reprocess'
|
|
episode.skipped = False
|
|
status = 'reprocess'
|
|
else:
|
|
episode.processing_status = 'skipped'
|
|
episode.skipped = True
|
|
status = 'skipped'
|
|
session.commit()
|
|
print(f"Episode '{episode.title}' status set to '{status}'.")
|
|
finally:
|
|
session.close()
|
|
|
|
def delete_episode(db_url, podcast_id, guid=None, title=None, link=None):
|
|
if not guid and not title and not link:
|
|
print("You must provide either an article GUID, title, or link to identify the episode.")
|
|
sys.exit(1)
|
|
|
|
engine = get_engine(db_url)
|
|
Session = sessionmaker(bind=engine)
|
|
session = Session()
|
|
|
|
try:
|
|
episode = select_episode(session, podcast_id, guid, title, link)
|
|
if not episode:
|
|
print("Episode not found.")
|
|
sys.exit(1)
|
|
session.delete(episode)
|
|
session.commit()
|
|
print(f"Episode '{episode.title}' has been deleted from the database.")
|
|
finally:
|
|
session.close()
|
|
|
|
def add_new_episode(db_url, args, config):
|
|
content = sys.stdin.read()
|
|
if not content.strip():
|
|
print("No content provided. Please provide content via stdin.")
|
|
sys.exit(1)
|
|
|
|
podcast_id = config.get('podcast_id')
|
|
if not podcast_id:
|
|
podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')
|
|
|
|
engine = get_engine(db_url)
|
|
Session = sessionmaker(bind=engine)
|
|
session = Session()
|
|
create_tables(engine)
|
|
|
|
try:
|
|
# Generate GUID
|
|
if args.guid:
|
|
article_guid = args.guid
|
|
elif args.link:
|
|
article_guid = slugify(args.link)
|
|
else:
|
|
date_str = datetime.utcnow().strftime('%Y%m%d')
|
|
title_slug = slugify(args.title)
|
|
article_guid = f"{date_str}-{title_slug}"
|
|
|
|
pub_date = args.date or datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +0000')
|
|
|
|
# Determine content format
|
|
if args.html:
|
|
is_markdown = False
|
|
else:
|
|
is_markdown = True # default is markdown
|
|
|
|
content = clean_and_convert_content(content, is_markdown=is_markdown)
|
|
|
|
episode = Episode(
|
|
podcast_id=podcast_id,
|
|
article_guid=article_guid,
|
|
title=args.title,
|
|
link=args.link or '',
|
|
pub_date=pub_date,
|
|
description=args.description or '',
|
|
content=content,
|
|
processing_status='pending',
|
|
skipped=False
|
|
)
|
|
session.add(episode)
|
|
session.commit()
|
|
print(f"New episode '{args.title}' added to the database.")
|
|
finally:
|
|
session.close()
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Manage episodes in the database.")
|
|
parser.add_argument("--config", default="config.json", help="Path to configuration file")
|
|
|
|
group = parser.add_mutually_exclusive_group()
|
|
group.add_argument("--new-episode", action="store_true", help="Add a new episode to the database")
|
|
group.add_argument("--list-guids", action="store_true", help="List all episode GUIDs")
|
|
group.add_argument("--list-links", action="store_true", help="List all episode links")
|
|
group.add_argument("--list-titles", action="store_true", help="List all episode titles")
|
|
group.add_argument("--skip", action="store_true", help="Mark an episode as skipped")
|
|
group.add_argument("--reprocess", action="store_true", help="Set processing_status to 'reprocess'")
|
|
group.add_argument("--delete", action="store_true", help="Delete an episode from the database")
|
|
|
|
# Exclude for list
|
|
parser.add_argument("--exclude-skipped", action="store_true", help="Exclude episodes marked for skipping from the list")
|
|
parser.add_argument("--exclude-processed", action="store_true", help="Exclude processed episodes from the list")
|
|
parser.add_argument("--exclude-pending", action="store_true", help="Exclude pending episodes from the list")
|
|
parser.add_argument("--exclude-reprocess", action="store_true", help="Exclude episodes marked for reprocessing from the list")
|
|
|
|
parser.add_argument("--guid", help="GUID of the episode to update, add, or delete")
|
|
parser.add_argument("--title", help="Title of the episode to update, add, or delete")
|
|
parser.add_argument("--link", help="Link of the episode to update, add, or delete")
|
|
parser.add_argument("--description", help="Description of the new episode")
|
|
parser.add_argument("--date", help="Publication date of the new episode")
|
|
parser.add_argument("--db", help="Database filename or connection string")
|
|
|
|
format_group = parser.add_mutually_exclusive_group()
|
|
format_group.add_argument("--markdown", action="store_true", help="Content is in Markdown format (default)")
|
|
format_group.add_argument("--html", action="store_true", help="Content is in HTML format")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load configuration
|
|
try:
|
|
with open(args.config, 'r') as f:
|
|
config = json.load(f)
|
|
config['config_file_path'] = args.config
|
|
except Exception as e:
|
|
print(f"Error loading configuration file: {e}")
|
|
sys.exit(1)
|
|
|
|
# Set default db filename or connection string from config if not provided
|
|
db_url = args.db or config.get('database', 'episodes.db')
|
|
|
|
podcast_id = config.get('podcast_id')
|
|
if not podcast_id:
|
|
podcast_id = config.get('output_rss_feed', {}).get('atom_link', {}).get('href', 'default_podcast_id')
|
|
|
|
exclude_options = {
|
|
'exclude_skipped': args.exclude_skipped,
|
|
'exclude_processed': args.exclude_processed,
|
|
'exclude_pending': args.exclude_pending,
|
|
'exclude_reprocess': args.exclude_reprocess
|
|
}
|
|
|
|
if args.list_guids:
|
|
list_episodes(db_url, 'article_guid', exclude_options, podcast_id)
|
|
elif args.list_links:
|
|
list_episodes(db_url, 'link', exclude_options, podcast_id)
|
|
elif args.list_titles:
|
|
list_episodes(db_url, 'title', exclude_options, podcast_id)
|
|
elif args.new_episode:
|
|
add_new_episode(db_url, args, config)
|
|
elif args.skip or args.reprocess:
|
|
mark_episode_skipped(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link, reprocess=args.reprocess)
|
|
elif args.delete:
|
|
delete_episode(db_url, podcast_id, guid=args.guid, title=args.title, link=args.link)
|
|
else:
|
|
print("Error: No command supplied. Please use one of the available options.")
|
|
parser.print_help()
|
|
sys.exit(1)
|