File size: 765 Bytes
c6be992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import re

def extract_user_provided_tags_upto_3_words(prompt_in: str) -> list[str]:
    """

    Heuristic:

    - split on '.' and ','

    - strip leading/trailing whitespace

    - split on whitespace

    - keep items with <= 3 tokens

    """
    if not prompt_in:
        return []

    parts = re.split(r"[.,]+", prompt_in)

    out: list[str] = []
    seen = set()

    for raw in parts:
        item = raw.strip()
        if not item:
            continue

        tokens = item.split()
        if len(tokens) <= 3:
            key = item.lower()
            if key not in seen:
                seen.add(key)
                out.append(item)

    return out


if __name__ == "__main__":
    print("preproc.py imports ok")