Coverage for apps / ai / services / selector.py: 20%

54 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-14 19:13 +0000

1"""CSS selector repair service using AI.""" 

2 

3import logging 

4from typing import Optional 

5 

6from apps.recipes.models import SearchSource 

7 

8from ..models import AIPrompt 

9from .openrouter import OpenRouterService, AIUnavailableError, AIResponseError 

10from .validator import AIResponseValidator, ValidationError 

11 

12logger = logging.getLogger(__name__) 

13 

14# Default confidence threshold for auto-updating selectors 

15DEFAULT_CONFIDENCE_THRESHOLD = 0.8 

16 

17 

18def repair_selector( 

19 source: SearchSource, 

20 html_sample: str, 

21 target: str = "recipe search result", 

22 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD, 

23 auto_update: bool = True, 

24) -> dict: 

25 """Attempt to repair a broken CSS selector using AI. 

26 

27 Analyzes the provided HTML sample and suggests new CSS selectors 

28 that could replace the broken one. 

29 

30 Args: 

31 source: The SearchSource with the broken selector. 

32 html_sample: Sample HTML from the search page (truncated to ~50KB). 

33 target: Description of the target element type. 

34 confidence_threshold: Minimum confidence to auto-update (0-1). 

35 auto_update: If True and confidence exceeds threshold, update the source. 

36 

37 Returns: 

38 Dict with keys: 

39 - suggestions: List of suggested CSS selectors 

40 - confidence: AI's confidence score (0-1) 

41 - original_selector: The original broken selector 

42 - updated: Whether the source was auto-updated 

43 - new_selector: The new selector if updated, else None 

44 

45 Raises: 

46 AIUnavailableError: If AI service is not available. 

47 AIResponseError: If AI returns invalid response. 

48 ValidationError: If response doesn't match expected schema. 

49 """ 

50 original_selector = source.result_selector 

51 

52 # Get the selector_repair prompt 

53 prompt = AIPrompt.get_prompt("selector_repair") 

54 

55 # Truncate HTML to avoid token limits (keep first ~50KB) 

56 truncated_html = html_sample[:50000] 

57 

58 # Format the user prompt 

59 user_prompt = prompt.format_user_prompt( 

60 selector=original_selector or "(none)", 

61 target=target, 

62 html_sample=truncated_html, 

63 ) 

64 

65 # Call AI service 

66 service = OpenRouterService() 

67 response = service.complete( 

68 system_prompt=prompt.system_prompt, 

69 user_prompt=user_prompt, 

70 model=prompt.model, 

71 json_response=True, 

72 ) 

73 

74 # Validate response 

75 validator = AIResponseValidator() 

76 validated = validator.validate("selector_repair", response) 

77 

78 suggestions = validated.get("suggestions", []) 

79 confidence = validated.get("confidence", 0) 

80 

81 result = { 

82 "suggestions": suggestions, 

83 "confidence": confidence, 

84 "original_selector": original_selector, 

85 "updated": False, 

86 "new_selector": None, 

87 } 

88 

89 # Auto-update if confidence is high enough and we have suggestions 

90 if auto_update and suggestions and confidence >= confidence_threshold: 

91 new_selector = suggestions[0] 

92 source.result_selector = new_selector 

93 source.needs_attention = False # Clear the attention flag 

94 source.save(update_fields=["result_selector", "needs_attention"]) 

95 

96 result["updated"] = True 

97 result["new_selector"] = new_selector 

98 

99 logger.info( 

100 f"Auto-updated selector for {source.host}: " 

101 f'"{original_selector}" -> "{new_selector}" (confidence: {confidence:.2f})' 

102 ) 

103 else: 

104 logger.info( 

105 f"Selector repair suggestions for {source.host} " 

106 f"(confidence: {confidence:.2f}, threshold: {confidence_threshold}): " 

107 f"{suggestions}" 

108 ) 

109 

110 return result 

111 

112 

113def get_sources_needing_attention() -> list[SearchSource]: 

114 """Get all SearchSources that need attention. 

115 

116 Returns sources that have consecutive failures >= 3 or 

117 have needs_attention flag set. 

118 """ 

119 return list( 

120 SearchSource.objects.filter( 

121 needs_attention=True, 

122 is_enabled=True, 

123 ) 

124 ) 

125 

126 

127def repair_all_broken_selectors( 

128 html_samples: dict[str, str], 

129 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD, 

130) -> dict: 

131 """Attempt to repair all sources needing attention. 

132 

133 Args: 

134 html_samples: Dict mapping host to HTML sample. 

135 confidence_threshold: Minimum confidence to auto-update. 

136 

137 Returns: 

138 Dict with: 

139 - repaired: List of hosts that were successfully repaired 

140 - failed: List of hosts that could not be repaired 

141 - skipped: List of hosts with no HTML sample provided 

142 - results: Dict mapping host to repair result 

143 """ 

144 sources = get_sources_needing_attention() 

145 

146 repaired = [] 

147 failed = [] 

148 skipped = [] 

149 results = {} 

150 

151 for source in sources: 

152 host = source.host 

153 

154 if host not in html_samples: 

155 skipped.append(host) 

156 continue 

157 

158 try: 

159 result = repair_selector( 

160 source=source, 

161 html_sample=html_samples[host], 

162 confidence_threshold=confidence_threshold, 

163 ) 

164 results[host] = result 

165 

166 if result["updated"]: 

167 repaired.append(host) 

168 else: 

169 failed.append(host) 

170 

171 except (AIUnavailableError, AIResponseError, ValidationError) as e: 

172 logger.error(f"Failed to repair selector for {host}: {e}") 

173 failed.append(host) 

174 results[host] = {"error": str(e)} 

175 

176 return { 

177 "repaired": repaired, 

178 "failed": failed, 

179 "skipped": skipped, 

180 "results": results, 

181 } 

← Back to Dashboard