Coverage for apps / ai / services / selector.py: 20%

54 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 00:40 +0000

1"""CSS selector repair service using AI.""" 

2 

3import logging 

4from typing import Optional 

5 

6from apps.recipes.models import SearchSource 

7 

8from ..models import AIPrompt 

9from .openrouter import OpenRouterService, AIUnavailableError, AIResponseError 

10from .validator import AIResponseValidator, ValidationError 

11 

12logger = logging.getLogger(__name__) 

13 

14# Default confidence threshold for auto-updating selectors 

15DEFAULT_CONFIDENCE_THRESHOLD = 0.8 

16 

17 

18def repair_selector( 

19 source: SearchSource, 

20 html_sample: str, 

21 target: str = 'recipe search result', 

22 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD, 

23 auto_update: bool = True, 

24) -> dict: 

25 """Attempt to repair a broken CSS selector using AI. 

26 

27 Analyzes the provided HTML sample and suggests new CSS selectors 

28 that could replace the broken one. 

29 

30 Args: 

31 source: The SearchSource with the broken selector. 

32 html_sample: Sample HTML from the search page (truncated to ~50KB). 

33 target: Description of the target element type. 

34 confidence_threshold: Minimum confidence to auto-update (0-1). 

35 auto_update: If True and confidence exceeds threshold, update the source. 

36 

37 Returns: 

38 Dict with keys: 

39 - suggestions: List of suggested CSS selectors 

40 - confidence: AI's confidence score (0-1) 

41 - original_selector: The original broken selector 

42 - updated: Whether the source was auto-updated 

43 - new_selector: The new selector if updated, else None 

44 

45 Raises: 

46 AIUnavailableError: If AI service is not available. 

47 AIResponseError: If AI returns invalid response. 

48 ValidationError: If response doesn't match expected schema. 

49 """ 

50 original_selector = source.result_selector 

51 

52 # Get the selector_repair prompt 

53 prompt = AIPrompt.get_prompt('selector_repair') 

54 

55 # Truncate HTML to avoid token limits (keep first ~50KB) 

56 truncated_html = html_sample[:50000] 

57 

58 # Format the user prompt 

59 user_prompt = prompt.format_user_prompt( 

60 selector=original_selector or '(none)', 

61 target=target, 

62 html_sample=truncated_html, 

63 ) 

64 

65 # Call AI service 

66 service = OpenRouterService() 

67 response = service.complete( 

68 system_prompt=prompt.system_prompt, 

69 user_prompt=user_prompt, 

70 model=prompt.model, 

71 json_response=True, 

72 ) 

73 

74 # Validate response 

75 validator = AIResponseValidator() 

76 validated = validator.validate('selector_repair', response) 

77 

78 suggestions = validated.get('suggestions', []) 

79 confidence = validated.get('confidence', 0) 

80 

81 result = { 

82 'suggestions': suggestions, 

83 'confidence': confidence, 

84 'original_selector': original_selector, 

85 'updated': False, 

86 'new_selector': None, 

87 } 

88 

89 # Auto-update if confidence is high enough and we have suggestions 

90 if auto_update and suggestions and confidence >= confidence_threshold: 

91 new_selector = suggestions[0] 

92 source.result_selector = new_selector 

93 source.needs_attention = False # Clear the attention flag 

94 source.save(update_fields=['result_selector', 'needs_attention']) 

95 

96 result['updated'] = True 

97 result['new_selector'] = new_selector 

98 

99 logger.info( 

100 f'Auto-updated selector for {source.host}: ' 

101 f'"{original_selector}" -> "{new_selector}" (confidence: {confidence:.2f})' 

102 ) 

103 else: 

104 logger.info( 

105 f'Selector repair suggestions for {source.host} ' 

106 f'(confidence: {confidence:.2f}, threshold: {confidence_threshold}): ' 

107 f'{suggestions}' 

108 ) 

109 

110 return result 

111 

112 

113def get_sources_needing_attention() -> list[SearchSource]: 

114 """Get all SearchSources that need attention. 

115 

116 Returns sources that have consecutive failures >= 3 or 

117 have needs_attention flag set. 

118 """ 

119 return list(SearchSource.objects.filter( 

120 needs_attention=True, 

121 is_enabled=True, 

122 )) 

123 

124 

125def repair_all_broken_selectors( 

126 html_samples: dict[str, str], 

127 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD, 

128) -> dict: 

129 """Attempt to repair all sources needing attention. 

130 

131 Args: 

132 html_samples: Dict mapping host to HTML sample. 

133 confidence_threshold: Minimum confidence to auto-update. 

134 

135 Returns: 

136 Dict with: 

137 - repaired: List of hosts that were successfully repaired 

138 - failed: List of hosts that could not be repaired 

139 - skipped: List of hosts with no HTML sample provided 

140 - results: Dict mapping host to repair result 

141 """ 

142 sources = get_sources_needing_attention() 

143 

144 repaired = [] 

145 failed = [] 

146 skipped = [] 

147 results = {} 

148 

149 for source in sources: 

150 host = source.host 

151 

152 if host not in html_samples: 

153 skipped.append(host) 

154 continue 

155 

156 try: 

157 result = repair_selector( 

158 source=source, 

159 html_sample=html_samples[host], 

160 confidence_threshold=confidence_threshold, 

161 ) 

162 results[host] = result 

163 

164 if result['updated']: 

165 repaired.append(host) 

166 else: 

167 failed.append(host) 

168 

169 except (AIUnavailableError, AIResponseError, ValidationError) as e: 

170 logger.error(f'Failed to repair selector for {host}: {e}') 

171 failed.append(host) 

172 results[host] = {'error': str(e)} 

173 

174 return { 

175 'repaired': repaired, 

176 'failed': failed, 

177 'skipped': skipped, 

178 'results': results, 

179 } 

← Back to Dashboard