Spaces:
Running
Running
| import re | |
| from lark import Lark, Token | |
| #Parser | |
| grammar=r""" | |
| !start: (prompt | /[][():]/+)* | |
| prompt: (emphasized | plain | comma | WHITESPACE)* | |
| !emphasized: "(" prompt ")" | |
| | "(" prompt ":" [WHITESPACE] NUMBER [WHITESPACE] ")" | |
| comma: "," | |
| WHITESPACE: /\s+/ | |
| plain: /([^,\\\[\]():|]|\\.)+/ | |
| %import common.SIGNED_NUMBER -> NUMBER | |
| """ | |
| # Initialize the parser | |
| parser = Lark(grammar, start='start') | |
| # Function to extract tags | |
| def extract_tags(tree): | |
| tags_with_positions = [] | |
| def _traverse(node): | |
| if isinstance(node, Token) and node.type == '__ANON_1': | |
| tag_position = node.start_pos | |
| tag_text = node.value | |
| tags_with_positions.append((tag_text, tag_position, "tag")) | |
| elif not isinstance(node, Token): | |
| for child in node.children: | |
| _traverse(child) | |
| _traverse(tree) | |
| return tags_with_positions | |
| def build_tag_offsets_dicts(new_image_tags_with_positions): | |
| # Structure the data for HighlightedText | |
| tag_data = [] | |
| for tag_text, start_pos, nodetype in new_image_tags_with_positions: | |
| # Modify the tag | |
| modified_tag = tag_text.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip() | |
| artist_matrix_tag = tag_text.replace('_', ' ').replace('\\(', '\(').replace('\\)', '\)').strip() | |
| tf_idf_matrix_tag = re.sub(r'\\([()])', r'\1', re.sub(r' ', '_', tag_text.strip().removeprefix('by ').removeprefix('by_'))) | |
| # Calculate the end position based on the original tag length | |
| end_pos = start_pos + len(tag_text) | |
| # Append the structured data for each tag | |
| tag_data.append({ | |
| "original_tag": tag_text, | |
| "start_pos": start_pos, | |
| "end_pos": end_pos, | |
| "modified_tag": modified_tag, | |
| "artist_matrix_tag": artist_matrix_tag, | |
| "tf_idf_matrix_tag": tf_idf_matrix_tag, | |
| "node_type": nodetype | |
| }) | |
| return tag_data | |
| if __name__ == "__main__": | |
| print("prompt_grammar.py imports ok") | |