Coverage for apps / recipes / services / sanitizer.py: 81%

16 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-12 10:49 +0000

1"""HTML sanitization for scraped recipe content. 

2 

3Strips all HTML tags from recipe text fields before storage. 

4Defense-in-depth against stored XSS, regardless of frontend escaping. 

5""" 

6 

7import nh3 

8 

9 

10def sanitize_recipe_data(data: dict) -> None: 

11 """Strip HTML tags from scraped recipe text fields in-place. 

12 

13 Recipe content should be plain text. Any HTML from upstream sites 

14 is stripped to prevent stored XSS. 

15 """ 

16 # Simple text fields: strip all HTML 

17 for key in ( 

18 "title", 

19 "author", 

20 "description", 

21 "site_name", 

22 "yields", 

23 "category", 

24 "cuisine", 

25 "cooking_method", 

26 "language", 

27 "instructions_text", 

28 ): 

29 if isinstance(data.get(key), str): 

30 data[key] = nh3.clean(data[key], tags=set()) 

31 

32 # List of strings fields 

33 for key in ( 

34 "ingredients", 

35 "instructions", 

36 "keywords", 

37 "dietary_restrictions", 

38 "equipment", 

39 ): 

40 if isinstance(data.get(key), list): 

41 data[key] = [nh3.clean(item, tags=set()) if isinstance(item, str) else item for item in data[key]] 

42 

43 # Ingredient groups: list of dicts with "purpose" and "ingredients" 

44 for group in data.get("ingredient_groups", []): 

45 if isinstance(group.get("purpose"), str): 

46 group["purpose"] = nh3.clean(group["purpose"], tags=set()) 

47 if isinstance(group.get("ingredients"), list): 

48 group["ingredients"] = [ 

49 nh3.clean(item, tags=set()) if isinstance(item, str) else item for item in group["ingredients"] 

50 ] 

51 

52 # Links: list of dicts — sanitize display text only, not URLs 

53 for link in data.get("links", []): 

54 if isinstance(link.get("text"), str): 

55 link["text"] = nh3.clean(link["text"], tags=set()) 

← Back to Dashboard