Coverage for apps/ai/services/selector.py: 20%

1"""CSS selector repair service using AI."""

3import logging

4from typing import Optional

6from apps.recipes.models import SearchSource

8from ..models import AIPrompt

9from .openrouter import OpenRouterService, AIUnavailableError, AIResponseError

10from .validator import AIResponseValidator, ValidationError

12logger = logging.getLogger(__name__)

14# Default confidence threshold for auto-updating selectors

15DEFAULT_CONFIDENCE_THRESHOLD = 0.8

18def repair_selector(

19 source: SearchSource,

20 html_sample: str,

21 target: str = "recipe search result",

22 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,

23 auto_update: bool = True,

24) -> dict:

25 """Attempt to repair a broken CSS selector using AI.

27 Analyzes the provided HTML sample and suggests new CSS selectors

28 that could replace the broken one.

30 Args:

31 source: The SearchSource with the broken selector.

32 html_sample: Sample HTML from the search page (truncated to ~50KB).

33 target: Description of the target element type.

34 confidence_threshold: Minimum confidence to auto-update (0-1).

35 auto_update: If True and confidence exceeds threshold, update the source.

37 Returns:

38 Dict with keys:

39 - suggestions: List of suggested CSS selectors

40 - confidence: AI's confidence score (0-1)

41 - original_selector: The original broken selector

42 - updated: Whether the source was auto-updated

43 - new_selector: The new selector if updated, else None

45 Raises:

46 AIUnavailableError: If AI service is not available.

47 AIResponseError: If AI returns invalid response.

48 ValidationError: If response doesn't match expected schema.

49 """

50 original_selector = source.result_selector

52 # Get the selector_repair prompt

53 prompt = AIPrompt.get_prompt("selector_repair")

55 # Truncate HTML to avoid token limits (keep first ~50KB)

56 truncated_html = html_sample[:50000]

58 # Format the user prompt

59 user_prompt = prompt.format_user_prompt(

60 selector=original_selector or "(none)",

61 target=target,

62 html_sample=truncated_html,

63 )

65 # Call AI service

66 service = OpenRouterService()

67 response = service.complete(

68 system_prompt=prompt.system_prompt,

69 user_prompt=user_prompt,

70 model=prompt.model,

71 json_response=True,

72 )

74 # Validate response

75 validator = AIResponseValidator()

76 validated = validator.validate("selector_repair", response)

78 suggestions = validated.get("suggestions", [])

79 confidence = validated.get("confidence", 0)

81 result = {

82 "suggestions": suggestions,

83 "confidence": confidence,

84 "original_selector": original_selector,

85 "updated": False,

86 "new_selector": None,

87 }

89 # Auto-update if confidence is high enough and we have suggestions

90 if auto_update and suggestions and confidence >= confidence_threshold:

91 new_selector = suggestions[0]

92 source.result_selector = new_selector

93 source.needs_attention = False # Clear the attention flag

94 source.save(update_fields=["result_selector", "needs_attention"])

96 result["updated"] = True

97 result["new_selector"] = new_selector

99 logger.info(

100 f"Auto-updated selector for {source.host}: "

101 f'"{original_selector}" -> "{new_selector}" (confidence: {confidence:.2f})'

102 )

103 else:

104 logger.info(

105 f"Selector repair suggestions for {source.host} "

106 f"(confidence: {confidence:.2f}, threshold: {confidence_threshold}): "

107 f"{suggestions}"

108 )

109

110 return result

111

112

113def get_sources_needing_attention() -> list[SearchSource]:

114 """Get all SearchSources that need attention.

115

116 Returns sources that have consecutive failures >= 3 or

117 have needs_attention flag set.

118 """

119 return list(

120 SearchSource.objects.filter(

121 needs_attention=True,

122 is_enabled=True,

123 )

124 )

125

126

127def repair_all_broken_selectors(

128 html_samples: dict[str, str],

129 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,

130) -> dict:

131 """Attempt to repair all sources needing attention.

132

133 Args:

134 html_samples: Dict mapping host to HTML sample.

135 confidence_threshold: Minimum confidence to auto-update.

136

137 Returns:

138 Dict with:

139 - repaired: List of hosts that were successfully repaired

140 - failed: List of hosts that could not be repaired

141 - skipped: List of hosts with no HTML sample provided

142 - results: Dict mapping host to repair result

143 """

144 sources = get_sources_needing_attention()

145

146 repaired = []

147 failed = []

148 skipped = []

149 results = {}

150

151 for source in sources:

152 host = source.host

153

154 if host not in html_samples:

155 skipped.append(host)

156 continue

157

158 try:

159 result = repair_selector(

160 source=source,

161 html_sample=html_samples[host],

162 confidence_threshold=confidence_threshold,

163 )

164 results[host] = result

165

166 if result["updated"]:

167 repaired.append(host)

168 else:

169 failed.append(host)

170

171 except (AIUnavailableError, AIResponseError, ValidationError) as e:

172 logger.error(f"Failed to repair selector for {host}: {e}")

173 failed.append(host)

174 results[host] = {"error": str(e)}

175

176 return {

177 "repaired": repaired,

178 "failed": failed,

179 "skipped": skipped,

180 "results": results,

181 }

Coverage for apps / ai / services / selector.py: 20%

54 statements