Coverage for apps / ai / services / selector.py: 61%
54 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-12 10:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-12 10:49 +0000
1"""CSS selector repair service using AI."""
3import logging
4from typing import Optional
6from apps.recipes.models import SearchSource
8from ..models import AIPrompt
9from .openrouter import OpenRouterService, AIUnavailableError, AIResponseError
10from .validator import AIResponseValidator, ValidationError
12logger = logging.getLogger(__name__)
14# Default confidence threshold for auto-updating selectors
15DEFAULT_CONFIDENCE_THRESHOLD = 0.8
18def repair_selector(
19 source: SearchSource,
20 html_sample: str,
21 target: str = "recipe search result",
22 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
23 auto_update: bool = True,
24) -> dict:
25 """Attempt to repair a broken CSS selector using AI.
27 Analyzes the provided HTML sample and suggests new CSS selectors
28 that could replace the broken one.
30 Args:
31 source: The SearchSource with the broken selector.
32 html_sample: Sample HTML from the search page (truncated to ~50KB).
33 target: Description of the target element type.
34 confidence_threshold: Minimum confidence to auto-update (0-1).
35 auto_update: If True and confidence exceeds threshold, update the source.
37 Returns:
38 Dict with keys:
39 - suggestions: List of suggested CSS selectors
40 - confidence: AI's confidence score (0-1)
41 - original_selector: The original broken selector
42 - updated: Whether the source was auto-updated
43 - new_selector: The new selector if updated, else None
45 Raises:
46 AIUnavailableError: If AI service is not available.
47 AIResponseError: If AI returns invalid response.
48 ValidationError: If response doesn't match expected schema.
49 """
50 original_selector = source.result_selector
52 # Get the selector_repair prompt
53 prompt = AIPrompt.get_prompt("selector_repair")
55 # Truncate HTML to avoid token limits (keep first ~50KB)
56 truncated_html = html_sample[:50000]
58 # Format the user prompt
59 user_prompt = prompt.format_user_prompt(
60 selector=original_selector or "(none)",
61 target=target,
62 html_sample=truncated_html,
63 )
65 # Call AI service
66 service = OpenRouterService()
67 response = service.complete(
68 system_prompt=prompt.system_prompt,
69 user_prompt=user_prompt,
70 model=prompt.model,
71 json_response=True,
72 timeout=10,
73 )
75 # Validate response
76 validator = AIResponseValidator()
77 validated = validator.validate("selector_repair", response)
79 suggestions = validated.get("suggestions", [])
80 confidence = validated.get("confidence", 0)
82 result = {
83 "suggestions": suggestions,
84 "confidence": confidence,
85 "original_selector": original_selector,
86 "updated": False,
87 "new_selector": None,
88 }
90 # Auto-update if confidence is high enough and we have suggestions
91 if auto_update and suggestions and confidence >= confidence_threshold:
92 new_selector = suggestions[0]
93 source.result_selector = new_selector
94 source.needs_attention = False # Clear the attention flag
95 source.save(update_fields=["result_selector", "needs_attention"])
97 result["updated"] = True
98 result["new_selector"] = new_selector
100 logger.info(
101 f"Auto-updated selector for {source.host}: "
102 f'"{original_selector}" -> "{new_selector}" (confidence: {confidence:.2f})'
103 )
104 else:
105 logger.info(
106 f"Selector repair suggestions for {source.host} "
107 f"(confidence: {confidence:.2f}, threshold: {confidence_threshold}): "
108 f"{suggestions}"
109 )
111 return result
114def get_sources_needing_attention() -> list[SearchSource]:
115 """Get all SearchSources that need attention.
117 Returns sources that have consecutive failures >= 3 or
118 have needs_attention flag set.
119 """
120 return list(
121 SearchSource.objects.filter(
122 needs_attention=True,
123 is_enabled=True,
124 )
125 )
128def repair_all_broken_selectors(
129 html_samples: dict[str, str],
130 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
131) -> dict:
132 """Attempt to repair all sources needing attention.
134 Args:
135 html_samples: Dict mapping host to HTML sample.
136 confidence_threshold: Minimum confidence to auto-update.
138 Returns:
139 Dict with:
140 - repaired: List of hosts that were successfully repaired
141 - failed: List of hosts that could not be repaired
142 - skipped: List of hosts with no HTML sample provided
143 - results: Dict mapping host to repair result
144 """
145 sources = get_sources_needing_attention()
147 repaired = []
148 failed = []
149 skipped = []
150 results = {}
152 for source in sources:
153 host = source.host
155 if host not in html_samples:
156 skipped.append(host)
157 continue
159 try:
160 result = repair_selector(
161 source=source,
162 html_sample=html_samples[host],
163 confidence_threshold=confidence_threshold,
164 )
165 results[host] = result
167 if result["updated"]:
168 repaired.append(host)
169 else:
170 failed.append(host)
172 except (AIUnavailableError, AIResponseError, ValidationError) as e:
173 logger.error(f"Failed to repair selector for {host}: {e}")
174 failed.append(host)
175 results[host] = {"error": str(e)}
177 return {
178 "repaired": repaired,
179 "failed": failed,
180 "skipped": skipped,
181 "results": results,
182 }