Coverage for apps / ai / services / selector.py: 20%
54 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 00:40 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 00:40 +0000
1"""CSS selector repair service using AI."""
3import logging
4from typing import Optional
6from apps.recipes.models import SearchSource
8from ..models import AIPrompt
9from .openrouter import OpenRouterService, AIUnavailableError, AIResponseError
10from .validator import AIResponseValidator, ValidationError
12logger = logging.getLogger(__name__)
14# Default confidence threshold for auto-updating selectors
15DEFAULT_CONFIDENCE_THRESHOLD = 0.8
18def repair_selector(
19 source: SearchSource,
20 html_sample: str,
21 target: str = 'recipe search result',
22 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
23 auto_update: bool = True,
24) -> dict:
25 """Attempt to repair a broken CSS selector using AI.
27 Analyzes the provided HTML sample and suggests new CSS selectors
28 that could replace the broken one.
30 Args:
31 source: The SearchSource with the broken selector.
32 html_sample: Sample HTML from the search page (truncated to ~50KB).
33 target: Description of the target element type.
34 confidence_threshold: Minimum confidence to auto-update (0-1).
35 auto_update: If True and confidence exceeds threshold, update the source.
37 Returns:
38 Dict with keys:
39 - suggestions: List of suggested CSS selectors
40 - confidence: AI's confidence score (0-1)
41 - original_selector: The original broken selector
42 - updated: Whether the source was auto-updated
43 - new_selector: The new selector if updated, else None
45 Raises:
46 AIUnavailableError: If AI service is not available.
47 AIResponseError: If AI returns invalid response.
48 ValidationError: If response doesn't match expected schema.
49 """
50 original_selector = source.result_selector
52 # Get the selector_repair prompt
53 prompt = AIPrompt.get_prompt('selector_repair')
55 # Truncate HTML to avoid token limits (keep first ~50KB)
56 truncated_html = html_sample[:50000]
58 # Format the user prompt
59 user_prompt = prompt.format_user_prompt(
60 selector=original_selector or '(none)',
61 target=target,
62 html_sample=truncated_html,
63 )
65 # Call AI service
66 service = OpenRouterService()
67 response = service.complete(
68 system_prompt=prompt.system_prompt,
69 user_prompt=user_prompt,
70 model=prompt.model,
71 json_response=True,
72 )
74 # Validate response
75 validator = AIResponseValidator()
76 validated = validator.validate('selector_repair', response)
78 suggestions = validated.get('suggestions', [])
79 confidence = validated.get('confidence', 0)
81 result = {
82 'suggestions': suggestions,
83 'confidence': confidence,
84 'original_selector': original_selector,
85 'updated': False,
86 'new_selector': None,
87 }
89 # Auto-update if confidence is high enough and we have suggestions
90 if auto_update and suggestions and confidence >= confidence_threshold:
91 new_selector = suggestions[0]
92 source.result_selector = new_selector
93 source.needs_attention = False # Clear the attention flag
94 source.save(update_fields=['result_selector', 'needs_attention'])
96 result['updated'] = True
97 result['new_selector'] = new_selector
99 logger.info(
100 f'Auto-updated selector for {source.host}: '
101 f'"{original_selector}" -> "{new_selector}" (confidence: {confidence:.2f})'
102 )
103 else:
104 logger.info(
105 f'Selector repair suggestions for {source.host} '
106 f'(confidence: {confidence:.2f}, threshold: {confidence_threshold}): '
107 f'{suggestions}'
108 )
110 return result
113def get_sources_needing_attention() -> list[SearchSource]:
114 """Get all SearchSources that need attention.
116 Returns sources that have consecutive failures >= 3 or
117 have needs_attention flag set.
118 """
119 return list(SearchSource.objects.filter(
120 needs_attention=True,
121 is_enabled=True,
122 ))
125def repair_all_broken_selectors(
126 html_samples: dict[str, str],
127 confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
128) -> dict:
129 """Attempt to repair all sources needing attention.
131 Args:
132 html_samples: Dict mapping host to HTML sample.
133 confidence_threshold: Minimum confidence to auto-update.
135 Returns:
136 Dict with:
137 - repaired: List of hosts that were successfully repaired
138 - failed: List of hosts that could not be repaired
139 - skipped: List of hosts with no HTML sample provided
140 - results: Dict mapping host to repair result
141 """
142 sources = get_sources_needing_attention()
144 repaired = []
145 failed = []
146 skipped = []
147 results = {}
149 for source in sources:
150 host = source.host
152 if host not in html_samples:
153 skipped.append(host)
154 continue
156 try:
157 result = repair_selector(
158 source=source,
159 html_sample=html_samples[host],
160 confidence_threshold=confidence_threshold,
161 )
162 results[host] = result
164 if result['updated']:
165 repaired.append(host)
166 else:
167 failed.append(host)
169 except (AIUnavailableError, AIResponseError, ValidationError) as e:
170 logger.error(f'Failed to repair selector for {host}: {e}')
171 failed.append(host)
172 results[host] = {'error': str(e)}
174 return {
175 'repaired': repaired,
176 'failed': failed,
177 'skipped': skipped,
178 'results': results,
179 }