// // HTMLImageExtractorTests.swift // readeckTests // // Created by Ilyas Hallak on 30.11.25. // import Testing import Foundation @testable import readeck @Suite("HTMLImageExtractor Tests") struct HTMLImageExtractorTests { // MARK: - Test Data private let htmlWithImages = """ Image 1 """ private let htmlWithMixedURLs = """ """ private let htmlWithoutImages = """

This is just text content with no images.

Some more content
""" private let htmlEmpty = "" // MARK: - Basic Functionality Tests @Test("Extract finds all absolute image URLs from HTML") func testExtractFindsAllImageURLs() { let extractor = HTMLImageExtractor() let imageURLs = extractor.extract(from: htmlWithImages) #expect(imageURLs.count == 3) #expect(imageURLs.contains("https://example.com/image1.jpg")) #expect(imageURLs.contains("https://example.com/image2.png")) #expect(imageURLs.contains("https://example.com/image3.gif")) } @Test("Extract only includes absolute URLs with http or https") func testExtractOnlyIncludesAbsoluteURLs() { let extractor = HTMLImageExtractor() let imageURLs = extractor.extract(from: htmlWithMixedURLs) #expect(imageURLs.count == 2) #expect(imageURLs.contains("https://absolute.com/img.jpg")) #expect(imageURLs.contains("https://another.com/photo.png")) // Verify relative and data URIs are NOT included #expect(!imageURLs.contains("/relative/path.jpg")) #expect(!imageURLs.contains(where: { $0.hasPrefix("data:") })) } @Test("Extract returns empty array when HTML has no images") func testExtractReturnsEmptyArrayWhenNoImages() { let extractor = HTMLImageExtractor() let imageURLs = extractor.extract(from: htmlWithoutImages) #expect(imageURLs.isEmpty) } // MARK: - Edge Case Tests @Test("Extract ignores relative URLs without http prefix") func testExtractIgnoresRelativeURLs() { let htmlWithRelative = """ """ let extractor = HTMLImageExtractor() let imageURLs = extractor.extract(from: htmlWithRelative) #expect(imageURLs.count == 1) #expect(imageURLs.first == "https://valid.com/image.jpg") } @Test("Extract handles empty HTML string") func testExtractHandlesEmptyHTML() { let extractor = HTMLImageExtractor() let imageURLs = extractor.extract(from: htmlEmpty) #expect(imageURLs.isEmpty) } @Test("Extract ignores data URI images") func testExtractIgnoresDataURIs() { let htmlWithDataURI = """ """ let extractor = HTMLImageExtractor() let imageURLs = extractor.extract(from: htmlWithDataURI) #expect(imageURLs.count == 1) #expect(imageURLs.first == "https://example.com/real-image.jpg") // Verify no data URIs are included for url in imageURLs { #expect(!url.hasPrefix("data:")) } } // MARK: - Hero/Thumbnail Tests @Test("Extract with hero image prepends it to array") func testExtractWithHeroImagePrependsToArray() { let extractor = HTMLImageExtractor() let heroURL = "https://example.com/hero.jpg" let imageURLs = extractor.extract( from: htmlWithImages, heroImageURL: heroURL, thumbnailURL: nil ) #expect(imageURLs.count == 4) // 3 from HTML + 1 hero #expect(imageURLs.first == heroURL) // Hero should be at position 0 #expect(imageURLs.contains("https://example.com/image1.jpg")) } @Test("Extract with thumbnail prepends it when no hero image") func testExtractWithThumbnailPrependsWhenNoHero() { let extractor = HTMLImageExtractor() let thumbnailURL = "https://example.com/thumbnail.jpg" let imageURLs = extractor.extract( from: htmlWithImages, heroImageURL: nil, thumbnailURL: thumbnailURL ) #expect(imageURLs.count == 4) // 3 from HTML + 1 thumbnail #expect(imageURLs.first == thumbnailURL) // Thumbnail should be at position 0 } @Test("Extract prefers hero image over thumbnail when both provided") func testExtractPrefersHeroOverThumbnail() { let extractor = HTMLImageExtractor() let heroURL = "https://example.com/hero.jpg" let thumbnailURL = "https://example.com/thumbnail.jpg" let imageURLs = extractor.extract( from: htmlWithImages, heroImageURL: heroURL, thumbnailURL: thumbnailURL ) #expect(imageURLs.count == 4) // 3 from HTML + 1 hero (thumbnail ignored) #expect(imageURLs.first == heroURL) // Hero takes precedence #expect(!imageURLs.contains(thumbnailURL)) // Thumbnail should NOT be added } @Test("Extract with hero and thumbnail but no HTML images") func testExtractWithHeroAndNoHTMLImages() { let extractor = HTMLImageExtractor() let heroURL = "https://example.com/hero.jpg" let imageURLs = extractor.extract( from: htmlWithoutImages, heroImageURL: heroURL, thumbnailURL: nil ) #expect(imageURLs.count == 1) #expect(imageURLs.first == heroURL) } }