Coverage for apps / ai / services / selector.py: 20%
54 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-14 19:13 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-14 19:13 +0000
1"""CSS selector repair service using AI."""
3import logging
4from typing import Optional
6from apps.recipes.models import SearchSource
8from ..models import AIPrompt
9from .openrouter import OpenRouterService, AIUnavailableError, AIResponseError
10from .validator import AIResponseValidator, ValidationError
12logger = logging.getLogger(__name__)
14# Default confidence threshold for auto-updating selectors
15DEFAULT_CONFIDENCE_THRESHOLD = 0.8
18def repair_selector(
19 source: SearchSource,
20 html_sample: str,
21 target: str = "recipe search result",
22 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
23 auto_update: bool = True,
24) -> dict:
25 """Attempt to repair a broken CSS selector using AI.
27 Analyzes the provided HTML sample and suggests new CSS selectors
28 that could replace the broken one.
30 Args:
31 source: The SearchSource with the broken selector.
32 html_sample: Sample HTML from the search page (truncated to ~50KB).
33 target: Description of the target element type.
34 confidence_threshold: Minimum confidence to auto-update (0-1).
35 auto_update: If True and confidence exceeds threshold, update the source.
37 Returns:
38 Dict with keys:
39 - suggestions: List of suggested CSS selectors
40 - confidence: AI's confidence score (0-1)
41 - original_selector: The original broken selector
42 - updated: Whether the source was auto-updated
43 - new_selector: The new selector if updated, else None
45 Raises:
46 AIUnavailableError: If AI service is not available.
47 AIResponseError: If AI returns invalid response.
48 ValidationError: If response doesn't match expected schema.
49 """
50 original_selector = source.result_selector
52 # Get the selector_repair prompt
53 prompt = AIPrompt.get_prompt("selector_repair")
55 # Truncate HTML to avoid token limits (keep first ~50KB)
56 truncated_html = html_sample[:50000]
58 # Format the user prompt
59 user_prompt = prompt.format_user_prompt(
60 selector=original_selector or "(none)",
61 target=target,
62 html_sample=truncated_html,
63 )
65 # Call AI service
66 service = OpenRouterService()
67 response = service.complete(
68 system_prompt=prompt.system_prompt,
69 user_prompt=user_prompt,
70 model=prompt.model,
71 json_response=True,
72 )
74 # Validate response
75 validator = AIResponseValidator()
76 validated = validator.validate("selector_repair", response)
78 suggestions = validated.get("suggestions", [])
79 confidence = validated.get("confidence", 0)
81 result = {
82 "suggestions": suggestions,
83 "confidence": confidence,
84 "original_selector": original_selector,
85 "updated": False,
86 "new_selector": None,
87 }
89 # Auto-update if confidence is high enough and we have suggestions
90 if auto_update and suggestions and confidence >= confidence_threshold:
91 new_selector = suggestions[0]
92 source.result_selector = new_selector
93 source.needs_attention = False # Clear the attention flag
94 source.save(update_fields=["result_selector", "needs_attention"])
96 result["updated"] = True
97 result["new_selector"] = new_selector
99 logger.info(
100 f"Auto-updated selector for {source.host}: "
101 f'"{original_selector}" -> "{new_selector}" (confidence: {confidence:.2f})'
102 )
103 else:
104 logger.info(
105 f"Selector repair suggestions for {source.host} "
106 f"(confidence: {confidence:.2f}, threshold: {confidence_threshold}): "
107 f"{suggestions}"
108 )
110 return result
113def get_sources_needing_attention() -> list[SearchSource]:
114 """Get all SearchSources that need attention.
116 Returns sources that have consecutive failures >= 3 or
117 have needs_attention flag set.
118 """
119 return list(
120 SearchSource.objects.filter(
121 needs_attention=True,
122 is_enabled=True,
123 )
124 )
127def repair_all_broken_selectors(
128 html_samples: dict[str, str],
129 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
130) -> dict:
131 """Attempt to repair all sources needing attention.
133 Args:
134 html_samples: Dict mapping host to HTML sample.
135 confidence_threshold: Minimum confidence to auto-update.
137 Returns:
138 Dict with:
139 - repaired: List of hosts that were successfully repaired
140 - failed: List of hosts that could not be repaired
141 - skipped: List of hosts with no HTML sample provided
142 - results: Dict mapping host to repair result
143 """
144 sources = get_sources_needing_attention()
146 repaired = []
147 failed = []
148 skipped = []
149 results = {}
151 for source in sources:
152 host = source.host
154 if host not in html_samples:
155 skipped.append(host)
156 continue
158 try:
159 result = repair_selector(
160 source=source,
161 html_sample=html_samples[host],
162 confidence_threshold=confidence_threshold,
163 )
164 results[host] = result
166 if result["updated"]:
167 repaired.append(host)
168 else:
169 failed.append(host)
171 except (AIUnavailableError, AIResponseError, ValidationError) as e:
172 logger.error(f"Failed to repair selector for {host}: {e}")
173 failed.append(host)
174 results[host] = {"error": str(e)}
176 return {
177 "repaired": repaired,
178 "failed": failed,
179 "skipped": skipped,
180 "results": results,
181 }