Coverage for apps / recipes / services / image_cache.py: 95%
108 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 00:40 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 00:40 +0000
1"""
2Search result image caching service for iOS 9 compatibility.
4Implements fire-and-forget batch downloads to cache external recipe images
5locally, avoiding CORS and security issues on older Safari browsers.
6"""
8import asyncio
9import hashlib
10import io
11import logging
12from pathlib import Path
13from urllib.parse import urlparse
15from asgiref.sync import sync_to_async
16from curl_cffi.requests import AsyncSession
17from django.core.files.base import ContentFile
18from PIL import Image
20from apps.recipes.services.fingerprint import BROWSER_PROFILES
22logger = logging.getLogger(__name__)
25class SearchImageCache:
26 """
27 Service for caching search result images to local storage.
29 Enables iOS 9 Safari compatibility by downloading external recipe images
30 to the server immediately (fire-and-forget), then returning local URLs
31 that don't trigger CORS restrictions.
33 Browser profiles are centralized in fingerprint.py for maintainability.
34 """
36 MAX_CONCURRENT = 5
37 DOWNLOAD_TIMEOUT = 15
39 async def cache_images(self, image_urls: list) -> None:
40 """
41 Fire-and-forget batch download of search result images.
43 Args:
44 image_urls: List of external image URLs to cache
46 Returns:
47 None (errors logged but not raised)
48 """
49 if not image_urls:
50 return
52 # Create semaphore to limit concurrent downloads
53 semaphore = asyncio.Semaphore(self.MAX_CONCURRENT)
55 # Create download tasks
56 tasks = [
57 self._download_and_save(None, semaphore, url)
58 for url in image_urls
59 ]
61 # Run concurrently without awaiting completion
62 if tasks:
63 await asyncio.gather(*tasks, return_exceptions=True)
65 async def _download_and_save(self, session: AsyncSession, semaphore: asyncio.Semaphore, url: str) -> None:
66 """
67 Download and cache a single image with status tracking.
69 Args:
70 session: AsyncSession (can be None, will create if needed)
71 semaphore: Semaphore to limit concurrent downloads
72 url: External image URL to cache
73 """
74 # Import here to avoid circular imports
75 from apps.recipes.models import CachedSearchImage
77 async with semaphore:
78 try:
79 # Get or create cache record
80 cached, created = await sync_to_async(
81 CachedSearchImage.objects.get_or_create
82 )(
83 external_url=url,
84 defaults={'status': CachedSearchImage.STATUS_PENDING}
85 )
87 # Skip if already successfully cached
88 if cached.status == CachedSearchImage.STATUS_SUCCESS and cached.image:
89 return
91 # Download image
92 image_data = await self._fetch_image(url)
93 if not image_data:
94 cached.status = CachedSearchImage.STATUS_FAILED
95 await sync_to_async(cached.save)(update_fields=['status'])
96 return
98 # Convert to JPEG for iOS 9 compatibility (no WebP support)
99 converted_data = self._convert_to_jpeg(image_data)
100 if not converted_data:
101 cached.status = CachedSearchImage.STATUS_FAILED
102 await sync_to_async(cached.save)(update_fields=['status'])
103 return
105 # Generate filename and save
106 filename = self._generate_filename(url)
107 cached.image = ContentFile(converted_data, name=filename)
108 cached.status = CachedSearchImage.STATUS_SUCCESS
109 await sync_to_async(cached.save)(update_fields=['image', 'status'])
110 logger.info(f"Cached image from {url}")
112 except Exception as e:
113 logger.error(f"Failed to cache image from {url}: {e}")
114 # Try to mark as failed if we have a record
115 try:
116 from apps.recipes.models import CachedSearchImage
117 cached = await sync_to_async(
118 CachedSearchImage.objects.get
119 )(external_url=url)
120 cached.status = CachedSearchImage.STATUS_FAILED
121 await sync_to_async(cached.save)(update_fields=['status'])
122 except Exception:
123 pass
125 async def _fetch_image(self, url: str) -> bytes | None:
126 """
127 Fetch image content from URL with browser profile fallback.
129 Tries multiple browser profiles if initial request fails.
130 Browser profiles are configured in fingerprint.py.
132 Args:
133 url: Image URL to fetch
135 Returns:
136 Image bytes or None if fetch fails
137 """
138 if not self._is_image_url(url):
139 return None
141 # Try each browser profile until one succeeds
142 for profile in BROWSER_PROFILES:
143 try:
144 async with AsyncSession(impersonate=profile) as session:
145 response = await session.get(
146 url,
147 timeout=self.DOWNLOAD_TIMEOUT,
148 allow_redirects=True,
149 )
151 if response.status_code == 200:
152 content_type = response.headers.get('content-type', '')
153 if 'image' in content_type:
154 return response.content
156 except Exception as e:
157 logger.debug(f"Failed to fetch image {url} with {profile}: {e}")
158 continue
160 return None
162 async def get_cached_urls_batch(self, urls: list) -> dict:
163 """
164 Batch lookup of cached image URLs for API response.
166 Args:
167 urls: List of external image URLs to check
169 Returns:
170 Dict mapping external_url → cached_image_url (or None if not cached)
171 """
172 if not urls:
173 return {}
175 # Import here to avoid circular imports
176 from apps.recipes.models import CachedSearchImage
178 # Query all at once
179 cached_images = await sync_to_async(
180 lambda: list(
181 CachedSearchImage.objects.filter(
182 external_url__in=urls,
183 status=CachedSearchImage.STATUS_SUCCESS,
184 image__isnull=False,
185 ).exclude(image='')
186 )
187 )()
189 # Build result dict
190 result = {}
191 for cached in cached_images:
192 if cached.image:
193 result[cached.external_url] = cached.image.url
194 # Update access time without saving to DB unnecessarily
195 # (updated via auto_now on next modification)
197 return result
199 def _generate_filename(self, image_url: str) -> str:
200 """
201 Generate unique hash-based filename for cached image.
203 Args:
204 image_url: External image URL
206 Returns:
207 Filename like 'search_{hash}.{ext}'
208 """
209 # Create hash from URL for uniqueness
210 url_hash = hashlib.md5(image_url.encode()).hexdigest()[:12]
212 # Get extension from image URL
213 ext = '.jpg' # default
214 if image_url:
215 parsed = urlparse(image_url)
216 path_ext = Path(parsed.path).suffix.lower()
217 if path_ext in ('.jpg', '.jpeg', '.png', '.gif', '.webp'):
218 ext = path_ext
220 return f"search_{url_hash}{ext}"
222 def _convert_to_jpeg(self, image_data: bytes) -> bytes | None:
223 """
224 Convert image to JPEG format for iOS 9 compatibility.
226 iOS 9 Safari doesn't support WebP (added in Safari 14/iOS 14).
227 This converts any image format (WebP, PNG, etc.) to JPEG.
229 Args:
230 image_data: Raw image bytes in any format
232 Returns:
233 JPEG image bytes, or None if conversion fails
234 """
235 try:
236 # Open image from bytes
237 img = Image.open(io.BytesIO(image_data))
239 # Convert RGBA to RGB (JPEG doesn't support transparency)
240 if img.mode in ('RGBA', 'LA', 'P'):
241 # Create white background
242 background = Image.new('RGB', img.size, (255, 255, 255))
243 if img.mode == 'P':
244 img = img.convert('RGBA')
245 background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
246 img = background
247 elif img.mode != 'RGB':
248 img = img.convert('RGB')
250 # Save as JPEG
251 output = io.BytesIO()
252 img.save(output, format='JPEG', quality=92, optimize=True)
253 return output.getvalue()
255 except Exception as e:
256 logger.error(f"Failed to convert image to JPEG: {e}")
257 return None
259 def _is_image_url(self, url: str) -> bool:
260 """
261 Check if URL looks like an image based on extension.
263 Args:
264 url: URL to check
266 Returns:
267 True if URL has image extension, False otherwise
268 """
269 image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.webp')
270 parsed = urlparse(url)
271 return parsed.path.lower().endswith(image_extensions)