//
// HTMLImageExtractor.swift
// readeck
//
// Created by Ilyas Hallak on 30.11.25.
//
import Foundation
/// Utility for extracting image URLs from HTML content
struct HTMLImageExtractor {
/// Extracts all image URLs from HTML using regex
/// - Parameter html: The HTML string to parse
/// - Returns: Array of absolute image URLs (http/https only)
func extract(from html: String) -> [String] {
var imageURLs: [String] = []
// Simple regex pattern for img tags
let pattern = #"
]+src="([^"]+)""#
guard let regex = try? NSRegularExpression(pattern: pattern, options: []) else {
return imageURLs
}
let nsString = html as NSString
let results = regex.matches(in: html, options: [], range: NSRange(location: 0, length: nsString.length))
for result in results {
if result.numberOfRanges >= 2 {
let urlRange = result.range(at: 1)
if let url = nsString.substring(with: urlRange) as String?,
url.hasPrefix("http") { // Only include absolute URLs
imageURLs.append(url)
}
}
}
Logger.sync.debug("Extracted \(imageURLs.count) image URLs from HTML")
return imageURLs
}
/// Extracts image URLs from HTML and optionally prepends hero/thumbnail image
/// - Parameters:
/// - html: The HTML string to parse
/// - heroImageURL: Optional hero image URL to prepend
/// - thumbnailURL: Optional thumbnail URL to prepend if no hero image
/// - Returns: Array of image URLs with hero/thumbnail first if provided
func extract(from html: String, heroImageURL: String? = nil, thumbnailURL: String? = nil) -> [String] {
var imageURLs = extract(from: html)
// Prepend hero or thumbnail image if available
if let heroURL = heroImageURL {
imageURLs.insert(heroURL, at: 0)
Logger.sync.debug("Added hero image: \(heroURL)")
} else if let thumbURL = thumbnailURL {
imageURLs.insert(thumbURL, at: 0)
Logger.sync.debug("Added thumbnail image: \(thumbURL)")
}
return imageURLs
}
}