Coverage for apps / recipes / services / fingerprint.py: 93%
15 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-12 10:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-12 10:49 +0000
1"""
2Browser fingerprint configuration for web scraping.
4Uses curl_cffi's browser impersonation feature to bypass anti-bot detection.
5This module centralizes all fingerprint-related configuration for maintainability.
7Maintenance Notes:
8- Browser profiles should be updated periodically as curl_cffi releases new versions
9- Current profiles are based on curl_cffi >= 0.11
10- Check https://curl-cffi.readthedocs.io/en/latest/impersonate/targets.html for updates
11- If a browser version becomes unavailable, remove it from the fallback list
12"""
14import random
16# Primary browser profiles to impersonate
17# These are the most common browsers that recipe sites expect
18# Using auto-latest aliases where available so profiles stay current
19# Order matters: Chrome first as most compatible, then Safari, then Firefox
20BROWSER_PROFILES = [
21 "chrome", # Auto-latest Chrome (most compatible with majority of sites)
22 "safari", # Auto-latest Safari desktop
23 "chrome136", # Specific Chrome version as fallback
24 "safari184", # Specific Safari version as fallback
25]
27# Mobile profiles for sites that serve different content to mobile
28MOBILE_PROFILES = [
29 "safari_ios", # Auto-latest iOS Safari
30 "chrome_android", # Auto-latest Android Chrome
31]
33# Request timing configuration (in seconds)
34# Randomizing delays helps avoid bot detection patterns
35MIN_DELAY = 0.5 # Minimum delay between requests to same domain
36MAX_DELAY = 2.5 # Maximum delay between requests to same domain
39def get_random_profile() -> str:
40 """
41 Get a random browser profile from the pool.
43 Returns a weighted random choice favoring Chrome (most compatible).
44 """
45 weights = [45, 30, 15, 10] # Chrome latest, Safari latest, Chrome 136, Safari 184
46 return random.choices(BROWSER_PROFILES, weights=weights)[0] # nosec B311 - not cryptographic, used for browser fingerprint rotation
49def get_random_delay() -> float:
50 """
51 Get a random delay for rate limiting.
53 Returns a random float between MIN_DELAY and MAX_DELAY.
54 Uses slight randomization to avoid predictable patterns.
55 """
56 return random.uniform(MIN_DELAY, MAX_DELAY) # nosec B311 - not cryptographic, used for request delay jitter
59def get_fallback_profiles(exclude: str = None) -> list[str]:
60 """
61 Get list of fallback profiles, optionally excluding one.
63 Args:
64 exclude: Profile to exclude (e.g., if it just failed)
66 Returns:
67 List of profile names to try
68 """
69 profiles = BROWSER_PROFILES.copy()
70 if exclude and exclude in profiles:
71 profiles.remove(exclude)
72 return profiles