Coverage for apps / recipes / services / sanitizer.py: 81%
16 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-12 10:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-12 10:49 +0000
1"""HTML sanitization for scraped recipe content.
3Strips all HTML tags from recipe text fields before storage.
4Defense-in-depth against stored XSS, regardless of frontend escaping.
5"""
7import nh3
10def sanitize_recipe_data(data: dict) -> None:
11 """Strip HTML tags from scraped recipe text fields in-place.
13 Recipe content should be plain text. Any HTML from upstream sites
14 is stripped to prevent stored XSS.
15 """
16 # Simple text fields: strip all HTML
17 for key in (
18 "title",
19 "author",
20 "description",
21 "site_name",
22 "yields",
23 "category",
24 "cuisine",
25 "cooking_method",
26 "language",
27 "instructions_text",
28 ):
29 if isinstance(data.get(key), str):
30 data[key] = nh3.clean(data[key], tags=set())
32 # List of strings fields
33 for key in (
34 "ingredients",
35 "instructions",
36 "keywords",
37 "dietary_restrictions",
38 "equipment",
39 ):
40 if isinstance(data.get(key), list):
41 data[key] = [nh3.clean(item, tags=set()) if isinstance(item, str) else item for item in data[key]]
43 # Ingredient groups: list of dicts with "purpose" and "ingredients"
44 for group in data.get("ingredient_groups", []):
45 if isinstance(group.get("purpose"), str):
46 group["purpose"] = nh3.clean(group["purpose"], tags=set())
47 if isinstance(group.get("ingredients"), list):
48 group["ingredients"] = [
49 nh3.clean(item, tags=set()) if isinstance(item, str) else item for item in group["ingredients"]
50 ]
52 # Links: list of dicts — sanitize display text only, not URLs
53 for link in data.get("links", []):
54 if isinstance(link.get("text"), str):
55 link["text"] = nh3.clean(link["text"], tags=set())