Coverage for apps / recipes / services / fingerprint.py: 62%
16 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 00:40 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 00:40 +0000
1"""
2Browser fingerprint configuration for web scraping.
4Uses curl_cffi's browser impersonation feature to bypass anti-bot detection.
5This module centralizes all fingerprint-related configuration for maintainability.
7Maintenance Notes:
8- Browser profiles should be updated periodically as curl_cffi releases new versions
9- Current profiles are based on curl_cffi >= 0.11
10- Check https://curl-cffi.readthedocs.io/en/latest/impersonate/targets.html for updates
11- If a browser version becomes unavailable, remove it from the fallback list
12"""
14import random
16# Primary browser profiles to impersonate
17# These are the most common browsers that recipe sites expect
18# Order matters: Chrome first as most compatible, then Safari, then Firefox
19BROWSER_PROFILES = [
20 'chrome136', # Latest Chrome (most compatible with majority of sites)
21 'safari184', # Latest Safari desktop
22 'firefox133', # Latest Firefox
23 'chrome131', # Fallback Chrome version
24]
26# Convenience aliases that auto-update to latest versions
27# Use these if you want curl_cffi to always use the newest fingerprint
28BROWSER_ALIASES = {
29 'chrome': 'chrome', # Auto-resolves to latest Chrome
30 'safari': 'safari', # Auto-resolves to latest Safari
31 'firefox': 'firefox133', # No alias for Firefox in curl_cffi
32}
34# Mobile profiles for sites that serve different content to mobile
35MOBILE_PROFILES = [
36 'safari184_ios', # Latest iOS Safari
37 'chrome131_android', # Android Chrome
38]
40# Request timing configuration (in seconds)
41# Randomizing delays helps avoid bot detection patterns
42MIN_DELAY = 0.5 # Minimum delay between requests to same domain
43MAX_DELAY = 2.5 # Maximum delay between requests to same domain
46def get_random_profile() -> str:
47 """
48 Get a random browser profile from the pool.
50 Returns a weighted random choice favoring Chrome (most compatible).
51 """
52 weights = [50, 25, 15, 10] # Chrome 136, Safari 184, Firefox 133, Chrome 131
53 return random.choices(BROWSER_PROFILES, weights=weights)[0]
56def get_random_delay() -> float:
57 """
58 Get a random delay for rate limiting.
60 Returns a random float between MIN_DELAY and MAX_DELAY.
61 Uses slight randomization to avoid predictable patterns.
62 """
63 return random.uniform(MIN_DELAY, MAX_DELAY)
66def get_fallback_profiles(exclude: str = None) -> list[str]:
67 """
68 Get list of fallback profiles, optionally excluding one.
70 Args:
71 exclude: Profile to exclude (e.g., if it just failed)
73 Returns:
74 List of profile names to try
75 """
76 profiles = BROWSER_PROFILES.copy()
77 if exclude and exclude in profiles:
78 profiles.remove(exclude)
79 return profiles