Coverage for apps / ai / services / selector.py: 61%

54 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-12 10:49 +0000

1"""CSS selector repair service using AI.""" 

2 

3import logging 

4from typing import Optional 

5 

6from apps.recipes.models import SearchSource 

7 

8from ..models import AIPrompt 

9from .openrouter import OpenRouterService, AIUnavailableError, AIResponseError 

10from .validator import AIResponseValidator, ValidationError 

11 

12logger = logging.getLogger(__name__) 

13 

14# Default confidence threshold for auto-updating selectors 

15DEFAULT_CONFIDENCE_THRESHOLD = 0.8 

16 

17 

18def repair_selector( 

19 source: SearchSource, 

20 html_sample: str, 

21 target: str = "recipe search result", 

22 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD, 

23 auto_update: bool = True, 

24) -> dict: 

25 """Attempt to repair a broken CSS selector using AI. 

26 

27 Analyzes the provided HTML sample and suggests new CSS selectors 

28 that could replace the broken one. 

29 

30 Args: 

31 source: The SearchSource with the broken selector. 

32 html_sample: Sample HTML from the search page (truncated to ~50KB). 

33 target: Description of the target element type. 

34 confidence_threshold: Minimum confidence to auto-update (0-1). 

35 auto_update: If True and confidence exceeds threshold, update the source. 

36 

37 Returns: 

38 Dict with keys: 

39 - suggestions: List of suggested CSS selectors 

40 - confidence: AI's confidence score (0-1) 

41 - original_selector: The original broken selector 

42 - updated: Whether the source was auto-updated 

43 - new_selector: The new selector if updated, else None 

44 

45 Raises: 

46 AIUnavailableError: If AI service is not available. 

47 AIResponseError: If AI returns invalid response. 

48 ValidationError: If response doesn't match expected schema. 

49 """ 

50 original_selector = source.result_selector 

51 

52 # Get the selector_repair prompt 

53 prompt = AIPrompt.get_prompt("selector_repair") 

54 

55 # Truncate HTML to avoid token limits (keep first ~50KB) 

56 truncated_html = html_sample[:50000] 

57 

58 # Format the user prompt 

59 user_prompt = prompt.format_user_prompt( 

60 selector=original_selector or "(none)", 

61 target=target, 

62 html_sample=truncated_html, 

63 ) 

64 

65 # Call AI service 

66 service = OpenRouterService() 

67 response = service.complete( 

68 system_prompt=prompt.system_prompt, 

69 user_prompt=user_prompt, 

70 model=prompt.model, 

71 json_response=True, 

72 timeout=10, 

73 ) 

74 

75 # Validate response 

76 validator = AIResponseValidator() 

77 validated = validator.validate("selector_repair", response) 

78 

79 suggestions = validated.get("suggestions", []) 

80 confidence = validated.get("confidence", 0) 

81 

82 result = { 

83 "suggestions": suggestions, 

84 "confidence": confidence, 

85 "original_selector": original_selector, 

86 "updated": False, 

87 "new_selector": None, 

88 } 

89 

90 # Auto-update if confidence is high enough and we have suggestions 

91 if auto_update and suggestions and confidence >= confidence_threshold: 

92 new_selector = suggestions[0] 

93 source.result_selector = new_selector 

94 source.needs_attention = False # Clear the attention flag 

95 source.save(update_fields=["result_selector", "needs_attention"]) 

96 

97 result["updated"] = True 

98 result["new_selector"] = new_selector 

99 

100 logger.info( 

101 f"Auto-updated selector for {source.host}: " 

102 f'"{original_selector}" -> "{new_selector}" (confidence: {confidence:.2f})' 

103 ) 

104 else: 

105 logger.info( 

106 f"Selector repair suggestions for {source.host} " 

107 f"(confidence: {confidence:.2f}, threshold: {confidence_threshold}): " 

108 f"{suggestions}" 

109 ) 

110 

111 return result 

112 

113 

114def get_sources_needing_attention() -> list[SearchSource]: 

115 """Get all SearchSources that need attention. 

116 

117 Returns sources that have consecutive failures >= 3 or 

118 have needs_attention flag set. 

119 """ 

120 return list( 

121 SearchSource.objects.filter( 

122 needs_attention=True, 

123 is_enabled=True, 

124 ) 

125 ) 

126 

127 

128def repair_all_broken_selectors( 

129 html_samples: dict[str, str], 

130 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD, 

131) -> dict: 

132 """Attempt to repair all sources needing attention. 

133 

134 Args: 

135 html_samples: Dict mapping host to HTML sample. 

136 confidence_threshold: Minimum confidence to auto-update. 

137 

138 Returns: 

139 Dict with: 

140 - repaired: List of hosts that were successfully repaired 

141 - failed: List of hosts that could not be repaired 

142 - skipped: List of hosts with no HTML sample provided 

143 - results: Dict mapping host to repair result 

144 """ 

145 sources = get_sources_needing_attention() 

146 

147 repaired = [] 

148 failed = [] 

149 skipped = [] 

150 results = {} 

151 

152 for source in sources: 

153 host = source.host 

154 

155 if host not in html_samples: 

156 skipped.append(host) 

157 continue 

158 

159 try: 

160 result = repair_selector( 

161 source=source, 

162 html_sample=html_samples[host], 

163 confidence_threshold=confidence_threshold, 

164 ) 

165 results[host] = result 

166 

167 if result["updated"]: 

168 repaired.append(host) 

169 else: 

170 failed.append(host) 

171 

172 except (AIUnavailableError, AIResponseError, ValidationError) as e: 

173 logger.error(f"Failed to repair selector for {host}: {e}") 

174 failed.append(host) 

175 results[host] = {"error": str(e)} 

176 

177 return { 

178 "repaired": repaired, 

179 "failed": failed, 

180 "skipped": skipped, 

181 "results": results, 

182 } 

← Back to Dashboard