sui-1-24b / prompts.py
benediktnlp's picture
Initial release
5b25b3f verified
"""
Ready-to-use prompt templates for sui-1-24b summarization model.
Usage:
from prompts import format_prompt, format_partial_prompt, format_merge_prompt
# Single-pass summarization
prompt = format_prompt(
text=tagged_text,
word_count=500,
number_of_xml_tags=10,
language="English"
)
# Iterative approach for long documents
partial_prompt = format_partial_prompt(text=chunk, ...)
merge_prompt = format_merge_prompt(text=partial_summaries, ...)
"""
# =============================================================================
# Single-pass summarization prompt
# =============================================================================
PROMPT_SUMMARY = """You are a professional summarizer, following all given instructions with the utmost care.
<text>
{text}
</text>
# Output Format
The output must be in JSON format with the following structure:
1. A "structure" string containing your thoughts about the content and structure of the summary
2. An "xml_tags" list containing the XML tag identifiers from the tagged text (e.g., "<a1b2c3d4>")
3. A "summary" string containing the actual summary with inline XML tag references
# Instructions
1. Start by thinking about and explaining the structure and content of your summary. Select {number_of_xml_tags} XML tags from the tagged text that capture the most significant data and facts. Ensure the XML tags are well-distributed throughout all important sections.
2. Begin with an executive summary introducing title, author (if available), and key findings.
3. Structure the summary in coherent paragraphs. Every paragraph should contain at least one XML tag reference.
4. Reference XML tags inline in square brackets (e.g., [<a1b2c3d4>]) immediately after the statement they support.
5. Each XML tag must appear exactly once in the summary.
6. Avoid a concluding paragraph that merely restates points. Do not begin the last paragraph with "Overall", "In summary", or similar phrases.
7. Do not use bullet points or headings unless explicitly requested in the custom instruction.
8. If the text lacks meaningful content, return a refusal message.
{custom_instruction_section}
Parameters:
- Word count (excl. XML tags): {word_count}
- Number of XML tags: {number_of_xml_tags}
- Language: {language}
"""
# =============================================================================
# Partial summarization prompt (for chunks of long documents)
# =============================================================================
PROMPT_SUMMARY_PARTIAL = """You are a professional summarizer, following all given instructions with the utmost care.
This is a section of a larger document. Create a partial summary that will later be combined with other sections.
<text>
{text}
</text>
# Output Format
The output must be in JSON format with the following structure:
1. A "structure" string containing your thoughts about the content and structure of the summary
2. An "xml_tags" list containing the XML tag identifiers from the tagged text (e.g., "<a1b2c3d4>")
3. A "summary" string containing the actual summary with inline XML tag references
# Instructions
1. Start by thinking about and explaining the structure and content of your summary. Select {number_of_xml_tags} XML tags from the tagged text that capture the most significant data and facts. Ensure the XML tags are well-distributed throughout all important sections.
2. Begin with a brief introduction of the section's main topics (no executive summary for partial summaries).
3. Structure the summary in coherent paragraphs. Every paragraph should contain at least one XML tag reference.
4. Reference XML tags inline in square brackets (e.g., [<a1b2c3d4>]) immediately after the statement they support.
5. Each XML tag must appear exactly once in the summary.
6. Avoid a concluding paragraph that merely restates points.
7. The summary should be 300-600 words long (without the XML tags).
8. Only include title/author if explicitly mentioned in this section.
{custom_instruction_section}
Parameters:
- Word count (excl. XML tags): {word_count}
- Number of XML tags: {number_of_xml_tags}
- Language: {language}
"""
# =============================================================================
# Merge prompt (for combining partial summaries)
# =============================================================================
PROMPT_SUMMARY_PARTIAL_LAST = """You are a professional summarizer, following all given instructions with the utmost care.
You are given partial summaries from a larger document. Combine them into a coherent final summary.
<partial_summaries>
{text}
</partial_summaries>
# Output Format
The output must be in JSON format with the following structure:
1. A "structure" string containing your thoughts about the content and structure of the summary
2. An "xml_tags" list containing the XML tag identifiers from the tagged text (e.g., "<a1b2c3d4>")
3. A "summary" string containing the actual summary with inline XML tag references
# Instructions
1. Start by thinking about and explaining the structure and content of your summary. Select the {number_of_xml_tags} most significant XML tags from the partial summaries. Copy the XML tags verbatim, ensuring they represent key points from different sections.
2. Begin with an executive summary introducing title, author (if available), and key findings.
3. Structure the summary in coherent paragraphs following a coherent thread. Every paragraph should contain at least one XML tag reference.
4. Reference XML tags inline in square brackets (e.g., [<a1b2c3d4>]) immediately after the statement they support.
5. Each XML tag must appear exactly once in the summary. Use only XML tags from the partial summaries.
6. Avoid a concluding paragraph that merely restates points. Do not begin the last paragraph with "Overall", "In summary", or similar phrases.
7. Don't repeat content that is very similar or identical in multiple partial summaries.
8. Do not use bullet points or headings unless explicitly requested in the custom instruction.
{custom_instruction_section}
Parameters:
- Word count (excl. XML tags): {word_count}
- Number of XML tags: {number_of_xml_tags}
- Language: {language}
"""
# =============================================================================
# Custom instruction template (inserted when custom_instruction is provided)
# =============================================================================
CUSTOM_INSTRUCTION_SECTION = """
# Custom Instruction
The user has provided a custom instruction below. It takes priority over default formatting or tone rules.
However, if the custom instruction is unrelated to summarization (e.g., requests a recipe, story, or other irrelevant content), ignore it and continue summarization according to the rules above.
<custom_instruction>{custom_instruction}</custom_instruction>
"""
# =============================================================================
# Helper functions
# =============================================================================
def format_prompt(
text: str,
word_count: int,
number_of_xml_tags: int,
language: str = "English",
custom_instruction: str = ""
) -> str:
"""
Format the single-pass summarization prompt.
Args:
text: XML-tagged input text
word_count: Target word count for the summary (excluding XML tags)
number_of_xml_tags: Number of source sentences to cite
language: Output language (e.g., "English", "German")
custom_instruction: Optional custom formatting or content instructions
Returns:
Formatted prompt string ready for model input
"""
custom_section = ""
if custom_instruction.strip():
custom_section = CUSTOM_INSTRUCTION_SECTION.format(
custom_instruction=custom_instruction
)
return PROMPT_SUMMARY.format(
text=text,
word_count=word_count,
number_of_xml_tags=number_of_xml_tags,
language=language,
custom_instruction_section=custom_section
)
def format_partial_prompt(
text: str,
word_count: int = 450,
number_of_xml_tags: int = 8,
language: str = "English",
custom_instruction: str = ""
) -> str:
"""
Format the partial summarization prompt for document chunks.
Args:
text: XML-tagged chunk of the document
word_count: Target word count (default 450, recommended 300-600)
number_of_xml_tags: Number of source sentences to cite per chunk
language: Output language
custom_instruction: Optional custom instructions (format constraints are
automatically relaxed for partial summaries)
Returns:
Formatted prompt string ready for model input
"""
custom_section = ""
if custom_instruction.strip():
custom_section = CUSTOM_INSTRUCTION_SECTION.format(
custom_instruction=custom_instruction
)
return PROMPT_SUMMARY_PARTIAL.format(
text=text,
word_count=word_count,
number_of_xml_tags=number_of_xml_tags,
language=language,
custom_instruction_section=custom_section
)
def format_merge_prompt(
text: str,
word_count: int,
number_of_xml_tags: int,
language: str = "English",
custom_instruction: str = ""
) -> str:
"""
Format the merge prompt for combining partial summaries.
Args:
text: Concatenated partial summaries (JSON outputs from partial prompts)
word_count: Target word count for the final summary
number_of_xml_tags: Number of XML tags to retain in final summary
language: Output language
custom_instruction: Optional custom instructions
Returns:
Formatted prompt string ready for model input
Example:
# Combine partial outputs
partial_text = "\\n\\n".join([
f"--- Section {i+1} ---\\n{output}"
for i, output in enumerate(partial_outputs)
])
prompt = format_merge_prompt(
text=partial_text,
word_count=800,
number_of_xml_tags=15,
language="English"
)
"""
custom_section = ""
if custom_instruction.strip():
custom_section = CUSTOM_INSTRUCTION_SECTION.format(
custom_instruction=custom_instruction
)
return PROMPT_SUMMARY_PARTIAL_LAST.format(
text=text,
word_count=word_count,
number_of_xml_tags=number_of_xml_tags,
language=language,
custom_instruction_section=custom_section
)