ReadKeep/readeck/Data/Utils/HTMLImageExtractor.swift
Ilyas Hallak b9f8e11782 Refactor offline sync to enforce Clean Architecture
Refactorings:
- Extract HTMLImageEmbedder and HTMLImageExtractor utilities
- Create UseCases for cached data access (GetCachedBookmarksUseCase, GetCachedArticleUseCase)
- Create CreateAnnotationUseCase to remove API dependency from ViewModel
- Simplify CachedAsyncImage by extracting helper methods
- Fix Kingfisher API compatibility (Source types, Result handling)
- Add documentation to OfflineCacheSyncUseCase
- Remove unused TestView from production code

Enforces Clean Architecture:
- ViewModels now only use UseCases, no direct Repository or API access
- All data layer access goes through Domain layer
2025-11-30 19:12:51 +01:00

64 lines
2.2 KiB
Swift

//
// HTMLImageExtractor.swift
// readeck
//
// Created by Ilyas Hallak on 30.11.25.
//
import Foundation
/// Utility for extracting image URLs from HTML content
struct HTMLImageExtractor {
/// Extracts all image URLs from HTML using regex
/// - Parameter html: The HTML string to parse
/// - Returns: Array of absolute image URLs (http/https only)
func extract(from html: String) -> [String] {
var imageURLs: [String] = []
// Simple regex pattern for img tags
let pattern = #"<img[^>]+src="([^"]+)""#
guard let regex = try? NSRegularExpression(pattern: pattern, options: []) else {
return imageURLs
}
let nsString = html as NSString
let results = regex.matches(in: html, options: [], range: NSRange(location: 0, length: nsString.length))
for result in results {
if result.numberOfRanges >= 2 {
let urlRange = result.range(at: 1)
if let url = nsString.substring(with: urlRange) as String?,
url.hasPrefix("http") { // Only include absolute URLs
imageURLs.append(url)
}
}
}
Logger.sync.debug("Extracted \(imageURLs.count) image URLs from HTML")
return imageURLs
}
/// Extracts image URLs from HTML and optionally prepends hero/thumbnail image
/// - Parameters:
/// - html: The HTML string to parse
/// - heroImageURL: Optional hero image URL to prepend
/// - thumbnailURL: Optional thumbnail URL to prepend if no hero image
/// - Returns: Array of image URLs with hero/thumbnail first if provided
func extract(from html: String, heroImageURL: String? = nil, thumbnailURL: String? = nil) -> [String] {
var imageURLs = extract(from: html)
// Prepend hero or thumbnail image if available
if let heroURL = heroImageURL {
imageURLs.insert(heroURL, at: 0)
Logger.sync.debug("Added hero image: \(heroURL)")
} else if let thumbURL = thumbnailURL {
imageURLs.insert(thumbURL, at: 0)
Logger.sync.debug("Added thumbnail image: \(thumbURL)")
}
return imageURLs
}
}