Coverage for apps / recipes / services / image_cache.py: 89%

108 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-14 19:13 +0000

1""" 

2Search result image caching service for iOS 9 compatibility. 

3 

4Implements fire-and-forget batch downloads to cache external recipe images 

5locally, avoiding CORS and security issues on older Safari browsers. 

6""" 

7 

8import asyncio 

9import hashlib 

10import io 

11import logging 

12from pathlib import Path 

13from urllib.parse import urlparse 

14 

15from asgiref.sync import sync_to_async 

16from curl_cffi.requests import AsyncSession 

17from django.core.files.base import ContentFile 

18from PIL import Image 

19 

20from apps.recipes.services.fingerprint import BROWSER_PROFILES 

21 

22logger = logging.getLogger(__name__) 

23 

24 

25class SearchImageCache: 

26 """ 

27 Service for caching search result images to local storage. 

28 

29 Enables iOS 9 Safari compatibility by downloading external recipe images 

30 to the server immediately (fire-and-forget), then returning local URLs 

31 that don't trigger CORS restrictions. 

32 

33 Browser profiles are centralized in fingerprint.py for maintainability. 

34 """ 

35 

36 MAX_CONCURRENT = 5 

37 DOWNLOAD_TIMEOUT = 15 

38 

39 async def cache_images(self, image_urls: list) -> None: 

40 """ 

41 Fire-and-forget batch download of search result images. 

42 

43 Args: 

44 image_urls: List of external image URLs to cache 

45 

46 Returns: 

47 None (errors logged but not raised) 

48 """ 

49 if not image_urls: 

50 return 

51 

52 # Create semaphore to limit concurrent downloads 

53 semaphore = asyncio.Semaphore(self.MAX_CONCURRENT) 

54 

55 # Create download tasks 

56 tasks = [self._download_and_save(None, semaphore, url) for url in image_urls] 

57 

58 # Run concurrently without awaiting completion 

59 if tasks: 

60 await asyncio.gather(*tasks, return_exceptions=True) 

61 

62 async def _download_and_save(self, session: AsyncSession, semaphore: asyncio.Semaphore, url: str) -> None: 

63 """ 

64 Download and cache a single image with status tracking. 

65 

66 Args: 

67 session: AsyncSession (can be None, will create if needed) 

68 semaphore: Semaphore to limit concurrent downloads 

69 url: External image URL to cache 

70 """ 

71 # Import here to avoid circular imports 

72 from apps.recipes.models import CachedSearchImage 

73 

74 async with semaphore: 

75 try: 

76 # Get or create cache record 

77 cached, created = await sync_to_async(CachedSearchImage.objects.get_or_create)( 

78 external_url=url, defaults={"status": CachedSearchImage.STATUS_PENDING} 

79 ) 

80 

81 # Skip if already successfully cached 

82 if cached.status == CachedSearchImage.STATUS_SUCCESS and cached.image: 

83 return 

84 

85 # Download image 

86 image_data = await self._fetch_image(url) 

87 if not image_data: 

88 cached.status = CachedSearchImage.STATUS_FAILED 

89 await sync_to_async(cached.save)(update_fields=["status"]) 

90 return 

91 

92 # Convert to JPEG for iOS 9 compatibility (no WebP support) 

93 converted_data = self._convert_to_jpeg(image_data) 

94 if not converted_data: 

95 cached.status = CachedSearchImage.STATUS_FAILED 

96 await sync_to_async(cached.save)(update_fields=["status"]) 

97 return 

98 

99 # Generate filename and save 

100 filename = self._generate_filename(url) 

101 cached.image = ContentFile(converted_data, name=filename) 

102 cached.status = CachedSearchImage.STATUS_SUCCESS 

103 await sync_to_async(cached.save)(update_fields=["image", "status"]) 

104 logger.info(f"Cached image from {url}") 

105 

106 except Exception as e: 

107 logger.error(f"Failed to cache image from {url}: {e}") 

108 # Try to mark as failed if we have a record 

109 try: 

110 from apps.recipes.models import CachedSearchImage 

111 

112 cached = await sync_to_async(CachedSearchImage.objects.get)(external_url=url) 

113 cached.status = CachedSearchImage.STATUS_FAILED 

114 await sync_to_async(cached.save)(update_fields=["status"]) 

115 except Exception: 

116 pass 

117 

118 async def _fetch_image(self, url: str) -> bytes | None: 

119 """ 

120 Fetch image content from URL with browser profile fallback. 

121 

122 Tries multiple browser profiles if initial request fails. 

123 Browser profiles are configured in fingerprint.py. 

124 

125 Args: 

126 url: Image URL to fetch 

127 

128 Returns: 

129 Image bytes or None if fetch fails 

130 """ 

131 if not self._is_image_url(url): 

132 return None 

133 

134 # Try each browser profile until one succeeds 

135 for profile in BROWSER_PROFILES: 

136 try: 

137 async with AsyncSession(impersonate=profile) as session: 

138 response = await session.get( 

139 url, 

140 timeout=self.DOWNLOAD_TIMEOUT, 

141 allow_redirects=True, 

142 ) 

143 

144 if response.status_code == 200: 

145 content_type = response.headers.get("content-type", "") 

146 if "image" in content_type: 

147 return response.content 

148 

149 except Exception as e: 

150 logger.debug(f"Failed to fetch image {url} with {profile}: {e}") 

151 continue 

152 

153 return None 

154 

155 async def get_cached_urls_batch(self, urls: list) -> dict: 

156 """ 

157 Batch lookup of cached image URLs for API response. 

158 

159 Args: 

160 urls: List of external image URLs to check 

161 

162 Returns: 

163 Dict mapping external_url → cached_image_url (or None if not cached) 

164 """ 

165 if not urls: 

166 return {} 

167 

168 # Import here to avoid circular imports 

169 from apps.recipes.models import CachedSearchImage 

170 

171 # Query all at once 

172 cached_images = await sync_to_async( 

173 lambda: list( 

174 CachedSearchImage.objects.filter( 

175 external_url__in=urls, 

176 status=CachedSearchImage.STATUS_SUCCESS, 

177 image__isnull=False, 

178 ).exclude(image="") 

179 ) 

180 )() 

181 

182 # Build result dict 

183 result = {} 

184 for cached in cached_images: 

185 if cached.image: 

186 result[cached.external_url] = cached.image.url 

187 # Update access time without saving to DB unnecessarily 

188 # (updated via auto_now on next modification) 

189 

190 return result 

191 

192 def _generate_filename(self, image_url: str) -> str: 

193 """ 

194 Generate unique hash-based filename for cached image. 

195 

196 Args: 

197 image_url: External image URL 

198 

199 Returns: 

200 Filename like 'search_{hash}.{ext}' 

201 """ 

202 # Create hash from URL for uniqueness (not for security) 

203 url_hash = hashlib.md5(image_url.encode(), usedforsecurity=False).hexdigest()[:12] 

204 

205 # Get extension from image URL 

206 ext = ".jpg" # default 

207 if image_url: 

208 parsed = urlparse(image_url) 

209 path_ext = Path(parsed.path).suffix.lower() 

210 if path_ext in (".jpg", ".jpeg", ".png", ".gif", ".webp"): 

211 ext = path_ext 

212 

213 return f"search_{url_hash}{ext}" 

214 

215 def _convert_to_jpeg(self, image_data: bytes) -> bytes | None: 

216 """ 

217 Convert image to JPEG format for iOS 9 compatibility. 

218 

219 iOS 9 Safari doesn't support WebP (added in Safari 14/iOS 14). 

220 This converts any image format (WebP, PNG, etc.) to JPEG. 

221 

222 Args: 

223 image_data: Raw image bytes in any format 

224 

225 Returns: 

226 JPEG image bytes, or None if conversion fails 

227 """ 

228 try: 

229 # Open image from bytes 

230 img = Image.open(io.BytesIO(image_data)) 

231 

232 # Convert RGBA to RGB (JPEG doesn't support transparency) 

233 if img.mode in ("RGBA", "LA", "P"): 

234 # Create white background 

235 background = Image.new("RGB", img.size, (255, 255, 255)) 

236 if img.mode == "P": 

237 img = img.convert("RGBA") 

238 background.paste(img, mask=img.split()[-1] if img.mode in ("RGBA", "LA") else None) 

239 img = background 

240 elif img.mode != "RGB": 

241 img = img.convert("RGB") 

242 

243 # Save as JPEG 

244 output = io.BytesIO() 

245 img.save(output, format="JPEG", quality=92, optimize=True) 

246 return output.getvalue() 

247 

248 except Exception as e: 

249 logger.error(f"Failed to convert image to JPEG: {e}") 

250 return None 

251 

252 def _is_image_url(self, url: str) -> bool: 

253 """ 

254 Check if URL looks like an image based on extension. 

255 

256 Args: 

257 url: URL to check 

258 

259 Returns: 

260 True if URL has image extension, False otherwise 

261 """ 

262 image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".webp") 

263 parsed = urlparse(url) 

264 return parsed.path.lower().endswith(image_extensions) 

← Back to Dashboard