Coverage for apps / recipes / services / image_cache.py: 89%
108 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-14 19:13 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-14 19:13 +0000
1"""
2Search result image caching service for iOS 9 compatibility.
4Implements fire-and-forget batch downloads to cache external recipe images
5locally, avoiding CORS and security issues on older Safari browsers.
6"""
8import asyncio
9import hashlib
10import io
11import logging
12from pathlib import Path
13from urllib.parse import urlparse
15from asgiref.sync import sync_to_async
16from curl_cffi.requests import AsyncSession
17from django.core.files.base import ContentFile
18from PIL import Image
20from apps.recipes.services.fingerprint import BROWSER_PROFILES
22logger = logging.getLogger(__name__)
25class SearchImageCache:
26 """
27 Service for caching search result images to local storage.
29 Enables iOS 9 Safari compatibility by downloading external recipe images
30 to the server immediately (fire-and-forget), then returning local URLs
31 that don't trigger CORS restrictions.
33 Browser profiles are centralized in fingerprint.py for maintainability.
34 """
36 MAX_CONCURRENT = 5
37 DOWNLOAD_TIMEOUT = 15
39 async def cache_images(self, image_urls: list) -> None:
40 """
41 Fire-and-forget batch download of search result images.
43 Args:
44 image_urls: List of external image URLs to cache
46 Returns:
47 None (errors logged but not raised)
48 """
49 if not image_urls:
50 return
52 # Create semaphore to limit concurrent downloads
53 semaphore = asyncio.Semaphore(self.MAX_CONCURRENT)
55 # Create download tasks
56 tasks = [self._download_and_save(None, semaphore, url) for url in image_urls]
58 # Run concurrently without awaiting completion
59 if tasks:
60 await asyncio.gather(*tasks, return_exceptions=True)
62 async def _download_and_save(self, session: AsyncSession, semaphore: asyncio.Semaphore, url: str) -> None:
63 """
64 Download and cache a single image with status tracking.
66 Args:
67 session: AsyncSession (can be None, will create if needed)
68 semaphore: Semaphore to limit concurrent downloads
69 url: External image URL to cache
70 """
71 # Import here to avoid circular imports
72 from apps.recipes.models import CachedSearchImage
74 async with semaphore:
75 try:
76 # Get or create cache record
77 cached, created = await sync_to_async(CachedSearchImage.objects.get_or_create)(
78 external_url=url, defaults={"status": CachedSearchImage.STATUS_PENDING}
79 )
81 # Skip if already successfully cached
82 if cached.status == CachedSearchImage.STATUS_SUCCESS and cached.image:
83 return
85 # Download image
86 image_data = await self._fetch_image(url)
87 if not image_data:
88 cached.status = CachedSearchImage.STATUS_FAILED
89 await sync_to_async(cached.save)(update_fields=["status"])
90 return
92 # Convert to JPEG for iOS 9 compatibility (no WebP support)
93 converted_data = self._convert_to_jpeg(image_data)
94 if not converted_data:
95 cached.status = CachedSearchImage.STATUS_FAILED
96 await sync_to_async(cached.save)(update_fields=["status"])
97 return
99 # Generate filename and save
100 filename = self._generate_filename(url)
101 cached.image = ContentFile(converted_data, name=filename)
102 cached.status = CachedSearchImage.STATUS_SUCCESS
103 await sync_to_async(cached.save)(update_fields=["image", "status"])
104 logger.info(f"Cached image from {url}")
106 except Exception as e:
107 logger.error(f"Failed to cache image from {url}: {e}")
108 # Try to mark as failed if we have a record
109 try:
110 from apps.recipes.models import CachedSearchImage
112 cached = await sync_to_async(CachedSearchImage.objects.get)(external_url=url)
113 cached.status = CachedSearchImage.STATUS_FAILED
114 await sync_to_async(cached.save)(update_fields=["status"])
115 except Exception:
116 pass
118 async def _fetch_image(self, url: str) -> bytes | None:
119 """
120 Fetch image content from URL with browser profile fallback.
122 Tries multiple browser profiles if initial request fails.
123 Browser profiles are configured in fingerprint.py.
125 Args:
126 url: Image URL to fetch
128 Returns:
129 Image bytes or None if fetch fails
130 """
131 if not self._is_image_url(url):
132 return None
134 # Try each browser profile until one succeeds
135 for profile in BROWSER_PROFILES:
136 try:
137 async with AsyncSession(impersonate=profile) as session:
138 response = await session.get(
139 url,
140 timeout=self.DOWNLOAD_TIMEOUT,
141 allow_redirects=True,
142 )
144 if response.status_code == 200:
145 content_type = response.headers.get("content-type", "")
146 if "image" in content_type:
147 return response.content
149 except Exception as e:
150 logger.debug(f"Failed to fetch image {url} with {profile}: {e}")
151 continue
153 return None
155 async def get_cached_urls_batch(self, urls: list) -> dict:
156 """
157 Batch lookup of cached image URLs for API response.
159 Args:
160 urls: List of external image URLs to check
162 Returns:
163 Dict mapping external_url → cached_image_url (or None if not cached)
164 """
165 if not urls:
166 return {}
168 # Import here to avoid circular imports
169 from apps.recipes.models import CachedSearchImage
171 # Query all at once
172 cached_images = await sync_to_async(
173 lambda: list(
174 CachedSearchImage.objects.filter(
175 external_url__in=urls,
176 status=CachedSearchImage.STATUS_SUCCESS,
177 image__isnull=False,
178 ).exclude(image="")
179 )
180 )()
182 # Build result dict
183 result = {}
184 for cached in cached_images:
185 if cached.image:
186 result[cached.external_url] = cached.image.url
187 # Update access time without saving to DB unnecessarily
188 # (updated via auto_now on next modification)
190 return result
192 def _generate_filename(self, image_url: str) -> str:
193 """
194 Generate unique hash-based filename for cached image.
196 Args:
197 image_url: External image URL
199 Returns:
200 Filename like 'search_{hash}.{ext}'
201 """
202 # Create hash from URL for uniqueness (not for security)
203 url_hash = hashlib.md5(image_url.encode(), usedforsecurity=False).hexdigest()[:12]
205 # Get extension from image URL
206 ext = ".jpg" # default
207 if image_url:
208 parsed = urlparse(image_url)
209 path_ext = Path(parsed.path).suffix.lower()
210 if path_ext in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
211 ext = path_ext
213 return f"search_{url_hash}{ext}"
215 def _convert_to_jpeg(self, image_data: bytes) -> bytes | None:
216 """
217 Convert image to JPEG format for iOS 9 compatibility.
219 iOS 9 Safari doesn't support WebP (added in Safari 14/iOS 14).
220 This converts any image format (WebP, PNG, etc.) to JPEG.
222 Args:
223 image_data: Raw image bytes in any format
225 Returns:
226 JPEG image bytes, or None if conversion fails
227 """
228 try:
229 # Open image from bytes
230 img = Image.open(io.BytesIO(image_data))
232 # Convert RGBA to RGB (JPEG doesn't support transparency)
233 if img.mode in ("RGBA", "LA", "P"):
234 # Create white background
235 background = Image.new("RGB", img.size, (255, 255, 255))
236 if img.mode == "P":
237 img = img.convert("RGBA")
238 background.paste(img, mask=img.split()[-1] if img.mode in ("RGBA", "LA") else None)
239 img = background
240 elif img.mode != "RGB":
241 img = img.convert("RGB")
243 # Save as JPEG
244 output = io.BytesIO()
245 img.save(output, format="JPEG", quality=92, optimize=True)
246 return output.getvalue()
248 except Exception as e:
249 logger.error(f"Failed to convert image to JPEG: {e}")
250 return None
252 def _is_image_url(self, url: str) -> bool:
253 """
254 Check if URL looks like an image based on extension.
256 Args:
257 url: URL to check
259 Returns:
260 True if URL has image extension, False otherwise
261 """
262 image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".webp")
263 parsed = urlparse(url)
264 return parsed.path.lower().endswith(image_extensions)