Coverage for apps / recipes / services / fingerprint.py: 62%

16 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 00:40 +0000

1""" 

2Browser fingerprint configuration for web scraping. 

3 

4Uses curl_cffi's browser impersonation feature to bypass anti-bot detection. 

5This module centralizes all fingerprint-related configuration for maintainability. 

6 

7Maintenance Notes: 

8- Browser profiles should be updated periodically as curl_cffi releases new versions 

9- Current profiles are based on curl_cffi >= 0.11 

10- Check https://curl-cffi.readthedocs.io/en/latest/impersonate/targets.html for updates 

11- If a browser version becomes unavailable, remove it from the fallback list 

12""" 

13 

14import random 

15 

16# Primary browser profiles to impersonate 

17# These are the most common browsers that recipe sites expect 

18# Order matters: Chrome first as most compatible, then Safari, then Firefox 

19BROWSER_PROFILES = [ 

20 'chrome136', # Latest Chrome (most compatible with majority of sites) 

21 'safari184', # Latest Safari desktop 

22 'firefox133', # Latest Firefox 

23 'chrome131', # Fallback Chrome version 

24] 

25 

26# Convenience aliases that auto-update to latest versions 

27# Use these if you want curl_cffi to always use the newest fingerprint 

28BROWSER_ALIASES = { 

29 'chrome': 'chrome', # Auto-resolves to latest Chrome 

30 'safari': 'safari', # Auto-resolves to latest Safari 

31 'firefox': 'firefox133', # No alias for Firefox in curl_cffi 

32} 

33 

34# Mobile profiles for sites that serve different content to mobile 

35MOBILE_PROFILES = [ 

36 'safari184_ios', # Latest iOS Safari 

37 'chrome131_android', # Android Chrome 

38] 

39 

40# Request timing configuration (in seconds) 

41# Randomizing delays helps avoid bot detection patterns 

42MIN_DELAY = 0.5 # Minimum delay between requests to same domain 

43MAX_DELAY = 2.5 # Maximum delay between requests to same domain 

44 

45 

46def get_random_profile() -> str: 

47 """ 

48 Get a random browser profile from the pool. 

49 

50 Returns a weighted random choice favoring Chrome (most compatible). 

51 """ 

52 weights = [50, 25, 15, 10] # Chrome 136, Safari 184, Firefox 133, Chrome 131 

53 return random.choices(BROWSER_PROFILES, weights=weights)[0] 

54 

55 

56def get_random_delay() -> float: 

57 """ 

58 Get a random delay for rate limiting. 

59 

60 Returns a random float between MIN_DELAY and MAX_DELAY. 

61 Uses slight randomization to avoid predictable patterns. 

62 """ 

63 return random.uniform(MIN_DELAY, MAX_DELAY) 

64 

65 

66def get_fallback_profiles(exclude: str = None) -> list[str]: 

67 """ 

68 Get list of fallback profiles, optionally excluding one. 

69 

70 Args: 

71 exclude: Profile to exclude (e.g., if it just failed) 

72 

73 Returns: 

74 List of profile names to try 

75 """ 

76 profiles = BROWSER_PROFILES.copy() 

77 if exclude and exclude in profiles: 

78 profiles.remove(exclude) 

79 return profiles 

← Back to Dashboard