Coverage for apps / recipes / services / fingerprint.py: 93%

15 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-12 10:49 +0000

1""" 

2Browser fingerprint configuration for web scraping. 

3 

4Uses curl_cffi's browser impersonation feature to bypass anti-bot detection. 

5This module centralizes all fingerprint-related configuration for maintainability. 

6 

7Maintenance Notes: 

8- Browser profiles should be updated periodically as curl_cffi releases new versions 

9- Current profiles are based on curl_cffi >= 0.11 

10- Check https://curl-cffi.readthedocs.io/en/latest/impersonate/targets.html for updates 

11- If a browser version becomes unavailable, remove it from the fallback list 

12""" 

13 

14import random 

15 

16# Primary browser profiles to impersonate 

17# These are the most common browsers that recipe sites expect 

18# Using auto-latest aliases where available so profiles stay current 

19# Order matters: Chrome first as most compatible, then Safari, then Firefox 

20BROWSER_PROFILES = [ 

21 "chrome", # Auto-latest Chrome (most compatible with majority of sites) 

22 "safari", # Auto-latest Safari desktop 

23 "chrome136", # Specific Chrome version as fallback 

24 "safari184", # Specific Safari version as fallback 

25] 

26 

27# Mobile profiles for sites that serve different content to mobile 

28MOBILE_PROFILES = [ 

29 "safari_ios", # Auto-latest iOS Safari 

30 "chrome_android", # Auto-latest Android Chrome 

31] 

32 

33# Request timing configuration (in seconds) 

34# Randomizing delays helps avoid bot detection patterns 

35MIN_DELAY = 0.5 # Minimum delay between requests to same domain 

36MAX_DELAY = 2.5 # Maximum delay between requests to same domain 

37 

38 

39def get_random_profile() -> str: 

40 """ 

41 Get a random browser profile from the pool. 

42 

43 Returns a weighted random choice favoring Chrome (most compatible). 

44 """ 

45 weights = [45, 30, 15, 10] # Chrome latest, Safari latest, Chrome 136, Safari 184 

46 return random.choices(BROWSER_PROFILES, weights=weights)[0] # nosec B311 - not cryptographic, used for browser fingerprint rotation 

47 

48 

49def get_random_delay() -> float: 

50 """ 

51 Get a random delay for rate limiting. 

52 

53 Returns a random float between MIN_DELAY and MAX_DELAY. 

54 Uses slight randomization to avoid predictable patterns. 

55 """ 

56 return random.uniform(MIN_DELAY, MAX_DELAY) # nosec B311 - not cryptographic, used for request delay jitter 

57 

58 

59def get_fallback_profiles(exclude: str = None) -> list[str]: 

60 """ 

61 Get list of fallback profiles, optionally excluding one. 

62 

63 Args: 

64 exclude: Profile to exclude (e.g., if it just failed) 

65 

66 Returns: 

67 List of profile names to try 

68 """ 

69 profiles = BROWSER_PROFILES.copy() 

70 if exclude and exclude in profiles: 

71 profiles.remove(exclude) 

72 return profiles 

← Back to Dashboard