Coverage for apps / recipes / services / image_cache.py: 95%

108 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 00:40 +0000

1""" 

2Search result image caching service for iOS 9 compatibility. 

3 

4Implements fire-and-forget batch downloads to cache external recipe images 

5locally, avoiding CORS and security issues on older Safari browsers. 

6""" 

7 

8import asyncio 

9import hashlib 

10import io 

11import logging 

12from pathlib import Path 

13from urllib.parse import urlparse 

14 

15from asgiref.sync import sync_to_async 

16from curl_cffi.requests import AsyncSession 

17from django.core.files.base import ContentFile 

18from PIL import Image 

19 

20from apps.recipes.services.fingerprint import BROWSER_PROFILES 

21 

22logger = logging.getLogger(__name__) 

23 

24 

25class SearchImageCache: 

26 """ 

27 Service for caching search result images to local storage. 

28 

29 Enables iOS 9 Safari compatibility by downloading external recipe images 

30 to the server immediately (fire-and-forget), then returning local URLs 

31 that don't trigger CORS restrictions. 

32 

33 Browser profiles are centralized in fingerprint.py for maintainability. 

34 """ 

35 

36 MAX_CONCURRENT = 5 

37 DOWNLOAD_TIMEOUT = 15 

38 

39 async def cache_images(self, image_urls: list) -> None: 

40 """ 

41 Fire-and-forget batch download of search result images. 

42 

43 Args: 

44 image_urls: List of external image URLs to cache 

45 

46 Returns: 

47 None (errors logged but not raised) 

48 """ 

49 if not image_urls: 

50 return 

51 

52 # Create semaphore to limit concurrent downloads 

53 semaphore = asyncio.Semaphore(self.MAX_CONCURRENT) 

54 

55 # Create download tasks 

56 tasks = [ 

57 self._download_and_save(None, semaphore, url) 

58 for url in image_urls 

59 ] 

60 

61 # Run concurrently without awaiting completion 

62 if tasks: 

63 await asyncio.gather(*tasks, return_exceptions=True) 

64 

65 async def _download_and_save(self, session: AsyncSession, semaphore: asyncio.Semaphore, url: str) -> None: 

66 """ 

67 Download and cache a single image with status tracking. 

68 

69 Args: 

70 session: AsyncSession (can be None, will create if needed) 

71 semaphore: Semaphore to limit concurrent downloads 

72 url: External image URL to cache 

73 """ 

74 # Import here to avoid circular imports 

75 from apps.recipes.models import CachedSearchImage 

76 

77 async with semaphore: 

78 try: 

79 # Get or create cache record 

80 cached, created = await sync_to_async( 

81 CachedSearchImage.objects.get_or_create 

82 )( 

83 external_url=url, 

84 defaults={'status': CachedSearchImage.STATUS_PENDING} 

85 ) 

86 

87 # Skip if already successfully cached 

88 if cached.status == CachedSearchImage.STATUS_SUCCESS and cached.image: 

89 return 

90 

91 # Download image 

92 image_data = await self._fetch_image(url) 

93 if not image_data: 

94 cached.status = CachedSearchImage.STATUS_FAILED 

95 await sync_to_async(cached.save)(update_fields=['status']) 

96 return 

97 

98 # Convert to JPEG for iOS 9 compatibility (no WebP support) 

99 converted_data = self._convert_to_jpeg(image_data) 

100 if not converted_data: 

101 cached.status = CachedSearchImage.STATUS_FAILED 

102 await sync_to_async(cached.save)(update_fields=['status']) 

103 return 

104 

105 # Generate filename and save 

106 filename = self._generate_filename(url) 

107 cached.image = ContentFile(converted_data, name=filename) 

108 cached.status = CachedSearchImage.STATUS_SUCCESS 

109 await sync_to_async(cached.save)(update_fields=['image', 'status']) 

110 logger.info(f"Cached image from {url}") 

111 

112 except Exception as e: 

113 logger.error(f"Failed to cache image from {url}: {e}") 

114 # Try to mark as failed if we have a record 

115 try: 

116 from apps.recipes.models import CachedSearchImage 

117 cached = await sync_to_async( 

118 CachedSearchImage.objects.get 

119 )(external_url=url) 

120 cached.status = CachedSearchImage.STATUS_FAILED 

121 await sync_to_async(cached.save)(update_fields=['status']) 

122 except Exception: 

123 pass 

124 

125 async def _fetch_image(self, url: str) -> bytes | None: 

126 """ 

127 Fetch image content from URL with browser profile fallback. 

128 

129 Tries multiple browser profiles if initial request fails. 

130 Browser profiles are configured in fingerprint.py. 

131 

132 Args: 

133 url: Image URL to fetch 

134 

135 Returns: 

136 Image bytes or None if fetch fails 

137 """ 

138 if not self._is_image_url(url): 

139 return None 

140 

141 # Try each browser profile until one succeeds 

142 for profile in BROWSER_PROFILES: 

143 try: 

144 async with AsyncSession(impersonate=profile) as session: 

145 response = await session.get( 

146 url, 

147 timeout=self.DOWNLOAD_TIMEOUT, 

148 allow_redirects=True, 

149 ) 

150 

151 if response.status_code == 200: 

152 content_type = response.headers.get('content-type', '') 

153 if 'image' in content_type: 

154 return response.content 

155 

156 except Exception as e: 

157 logger.debug(f"Failed to fetch image {url} with {profile}: {e}") 

158 continue 

159 

160 return None 

161 

162 async def get_cached_urls_batch(self, urls: list) -> dict: 

163 """ 

164 Batch lookup of cached image URLs for API response. 

165 

166 Args: 

167 urls: List of external image URLs to check 

168 

169 Returns: 

170 Dict mapping external_url → cached_image_url (or None if not cached) 

171 """ 

172 if not urls: 

173 return {} 

174 

175 # Import here to avoid circular imports 

176 from apps.recipes.models import CachedSearchImage 

177 

178 # Query all at once 

179 cached_images = await sync_to_async( 

180 lambda: list( 

181 CachedSearchImage.objects.filter( 

182 external_url__in=urls, 

183 status=CachedSearchImage.STATUS_SUCCESS, 

184 image__isnull=False, 

185 ).exclude(image='') 

186 ) 

187 )() 

188 

189 # Build result dict 

190 result = {} 

191 for cached in cached_images: 

192 if cached.image: 

193 result[cached.external_url] = cached.image.url 

194 # Update access time without saving to DB unnecessarily 

195 # (updated via auto_now on next modification) 

196 

197 return result 

198 

199 def _generate_filename(self, image_url: str) -> str: 

200 """ 

201 Generate unique hash-based filename for cached image. 

202 

203 Args: 

204 image_url: External image URL 

205 

206 Returns: 

207 Filename like 'search_{hash}.{ext}' 

208 """ 

209 # Create hash from URL for uniqueness 

210 url_hash = hashlib.md5(image_url.encode()).hexdigest()[:12] 

211 

212 # Get extension from image URL 

213 ext = '.jpg' # default 

214 if image_url: 

215 parsed = urlparse(image_url) 

216 path_ext = Path(parsed.path).suffix.lower() 

217 if path_ext in ('.jpg', '.jpeg', '.png', '.gif', '.webp'): 

218 ext = path_ext 

219 

220 return f"search_{url_hash}{ext}" 

221 

222 def _convert_to_jpeg(self, image_data: bytes) -> bytes | None: 

223 """ 

224 Convert image to JPEG format for iOS 9 compatibility. 

225 

226 iOS 9 Safari doesn't support WebP (added in Safari 14/iOS 14). 

227 This converts any image format (WebP, PNG, etc.) to JPEG. 

228 

229 Args: 

230 image_data: Raw image bytes in any format 

231 

232 Returns: 

233 JPEG image bytes, or None if conversion fails 

234 """ 

235 try: 

236 # Open image from bytes 

237 img = Image.open(io.BytesIO(image_data)) 

238 

239 # Convert RGBA to RGB (JPEG doesn't support transparency) 

240 if img.mode in ('RGBA', 'LA', 'P'): 

241 # Create white background 

242 background = Image.new('RGB', img.size, (255, 255, 255)) 

243 if img.mode == 'P': 

244 img = img.convert('RGBA') 

245 background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None) 

246 img = background 

247 elif img.mode != 'RGB': 

248 img = img.convert('RGB') 

249 

250 # Save as JPEG 

251 output = io.BytesIO() 

252 img.save(output, format='JPEG', quality=92, optimize=True) 

253 return output.getvalue() 

254 

255 except Exception as e: 

256 logger.error(f"Failed to convert image to JPEG: {e}") 

257 return None 

258 

259 def _is_image_url(self, url: str) -> bool: 

260 """ 

261 Check if URL looks like an image based on extension. 

262 

263 Args: 

264 url: URL to check 

265 

266 Returns: 

267 True if URL has image extension, False otherwise 

268 """ 

269 image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.webp') 

270 parsed = urlparse(url) 

271 return parsed.path.lower().endswith(image_extensions) 

← Back to Dashboard