Using YouTube Transcripts for AI-Powered Summaries

Using YouTube Transcripts for AI-Powered Summaries

By Mohamed Messaad5 min read
tutorial
summaries
openai
python
use-cases

Learn how to create accurate, insightful summaries from YouTube videos using CleanScript's transcript API and LLMs

Using YouTube Transcripts for AI-Powered Summaries

Creating summaries of YouTube videos has traditionally been a tedious, manual process. With CleanScript's high-quality transcript API and modern LLMs, you can now automate this process to create accurate, insightful summaries in seconds.

The Challenge of YouTube Summaries

Traditional YouTube transcript methods face several challenges:

  1. Poor formatting: Raw YouTube transcripts lack proper sentence structure and punctuation
  2. Missing context: No semantic chunking or chapter information
  3. Inconsistent quality: Automated captions contain errors and are difficult to parse

CleanScript solves these issues, making it possible to create high-quality summaries with minimal effort.

Setting Up Your Summary Pipeline

Let's create a Python script that takes a YouTube video ID and generates different types of summaries.

First, install the necessary packages:

pip install requests openai

Basic Summary Generator

Here's a complete example using CleanScript and OpenAI to create summaries:

import requests
import openai
import json
import sys

# Configure API keys
CLEANSCRIPT_API_KEY = "your-cleanscript-api-key"
openai.api_key = "your-openai-api-key"

def get_transcript(video_id):
    """Fetch transcript from CleanScript API"""
    headers = {
        "Authorization": f"Bearer {CLEANSCRIPT_API_KEY}"
    }

    response = requests.get(
        f"https://api.cleanscript.ai/v1/transcript?videoId={video_id}",
        headers=headers
    )

    if response.status_code != 200:
        print(f"Error: {response.json().get('detail', 'Unknown error')}")
        sys.exit(1)

    return response.json()

def create_summary(transcript_data, summary_type="concise"):
    """Generate a summary using OpenAI"""
    # Extract video metadata
    video_title = transcript_data.get("title", "Untitled")
    channel = transcript_data.get("channel", "Unknown")

    # Combine all chapter transcripts
    full_transcript = ""
    for chapter in transcript_data.get("chapters", []):
        full_transcript += f"## {chapter['title']}\n{chapter['transcript']}\n\n"

    # Define prompt based on summary type
    if summary_type == "concise":
        prompt = f"""
        Create a concise 3-5 sentence summary of this YouTube video:

        Title: {video_title}
        Channel: {channel}

        Transcript:
        {full_transcript[:4000]}...

        Summary:
        """
    elif summary_type == "detailed":
        prompt = f"""
        Create a detailed summary of this YouTube video with key points and insights:

        Title: {video_title}
        Channel: {channel}

        Transcript:
        {full_transcript[:4000]}...

        Detailed Summary:
        """
    elif summary_type == "bullet-points":
        prompt = f"""
        Create a bullet-point summary of the main points from this YouTube video:

        Title: {video_title}
        Channel: {channel}

        Transcript:
        {full_transcript[:4000]}...

        Bullet-Point Summary:
        """

    # Generate summary with OpenAI
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=500,
        temperature=0.5
    )

    return response.choices[0].text.strip()

def main():
    # Get video ID from command line
    if len(sys.argv) < 2:
        print("Usage: python summarize.py <youtube_video_id> [summary_type]")
        sys.exit(1)

    video_id = sys.argv[1]
    summary_type = sys.argv[2] if len(sys.argv) > 2 else "concise"

    # Get transcript
    print(f"Fetching transcript for video ID: {video_id}")
    transcript_data = get_transcript(video_id)

    # Generate summary
    print(f"Generating {summary_type} summary...")
    summary = create_summary(transcript_data, summary_type)

    # Print results
    print("\n" + "=" * 50)
    print(f"Summary of: {transcript_data.get('title', 'Untitled')}")
    print(f"Channel: {transcript_data.get('channel', 'Unknown')}")
    print("=" * 50)
    print(summary)

if __name__ == "__main__":
    main()

Run the script with:

python summarize.py dQw4w9WgXcQ concise

Advanced: Chapter-Based Summaries

One of the unique benefits of CleanScript is its automatic chapter detection. Let's leverage this to create chapter-by-chapter summaries:

def create_chapter_summaries(transcript_data):
    """Generate summaries for each chapter"""
    video_title = transcript_data.get("title", "Untitled")
    summaries = []

    for chapter in transcript_data.get("chapters", []):
        chapter_title = chapter.get("title", "Untitled Chapter")
        chapter_transcript = chapter.get("transcript", "")

        # Skip very short chapters
        if len(chapter_transcript.split()) < 20:
            continue

        prompt = f"""
        Summarize this chapter from a YouTube video in 2-3 sentences:

        Video: {video_title}
        Chapter: {chapter_title}

        Transcript:
        {chapter_transcript}

        Summary:
        """

        response = openai.Completion.create(
            engine="text-davinci-003",
            prompt=prompt,
            max_tokens=150,
            temperature=0.5
        )

        summary = response.choices[0].text.strip()
        summaries.append({
            "title": chapter_title,
            "start_time": chapter.get("startTime", 0),
            "summary": summary
        })

    return summaries

# Then in your main function:
chapter_summaries = create_chapter_summaries(transcript_data)
print("\nCHAPTER SUMMARIES:")
for chapter in chapter_summaries:
    print(f"\n## {chapter['title']} (starts at {chapter['start_time']} seconds)")
    print(chapter['summary'])

Sample Output: Before and After

Here's an example of what raw YouTube captions look like versus a CleanScript-powered summary:

Raw YouTube Captions:

hey everyone welcome to another video
today we're going to talk about machine
learning and its applications in
everyday life so machine learning is
basically a subset of artificial
intelligence that focuses on developing
algorithms that enable computers to
learn from data

CleanScript + OpenAI Summary:

CONCISE SUMMARY:
This video provides an accessible introduction to machine learning, explaining it as a subset of artificial intelligence that enables computers to learn from data without explicit programming. The presenter covers fundamental concepts including supervised and unsupervised learning, and demonstrates practical applications in everyday technology like recommendation systems, voice assistants, and medical diagnostics. The video emphasizes that while machine learning is transforming numerous industries, understanding its basic principles is increasingly important for everyone regardless of technical background.

Best Practices for AI Summaries

When generating summaries from YouTube videos, consider these best practices:

  1. Use semantic chunking: CleanScript's chunking option improves context understanding
  2. Leverage chapter information: Chapter-based summaries provide better structure
  3. Match summary type to content: Technical videos benefit from detailed summaries while entertainment content works better with concise ones
  4. Include metadata: Video title, channel, and publication date add important context
  5. Consider multiple summary formats: Different users may prefer bullet points vs. prose

Building a Summary API

For those looking to implement this at scale, here's a simple Flask API that provides YouTube summaries:

from flask import Flask, request, jsonify
import requests
import openai
import os

app = Flask(__name__)

# Configure API keys
CLEANSCRIPT_API_KEY = os.environ.get("CLEANSCRIPT_API_KEY")
openai.api_key = os.environ.get("OPENAI_API_KEY")

@app.route('/summarize', methods=['GET'])
def summarize_video():
    video_id = request.args.get('video_id')
    summary_type = request.args.get('type', 'concise')

    if not video_id:
        return jsonify({"error": "Missing video_id parameter"}), 400

    # Get transcript from CleanScript
    headers = {"Authorization": f"Bearer {CLEANSCRIPT_API_KEY}"}
    response = requests.get(
        f"https://api.cleanscript.ai/v1/transcript?videoId={video_id}",
        headers=headers
    )

    if response.status_code != 200:
        return jsonify({"error": response.json().get('detail', 'Failed to fetch transcript')}), 500

    transcript_data = response.json()

    # Generate summary
    try:
        summary = create_summary(transcript_data, summary_type)
        return jsonify({
            "video_id": video_id,
            "title": transcript_data.get("title", ""),
            "channel": transcript_data.get("channel", ""),
            "summary_type": summary_type,
            "summary": summary
        })
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
    app.run(debug=True)

Conclusion

Creating high-quality summaries from YouTube content has never been easier. By combining CleanScript's clean transcripts with modern LLMs, you can generate accurate, insightful summaries for research, content creation, or personal knowledge management.

In our next post, we'll explore how to combine these summaries with vector embeddings to create searchable knowledge bases from YouTube content.

Happy summarizing!