Specification Generation
Learn how to generate safety specifications from rule sets.
Overview
In this tutorial, you will learn:
How SpecAlign organizes safety rules
How to generate specifications from classified rules
How to customize specification parameters
How to generate seed prompts for testing
Prerequisites
SpecAlign installed
Input rules file (e.g.,
Stage_classified.md)Basic familiarity with the CLI
Complete Example
"""
Specification Generation Example
This script demonstrates the complete specification generation workflow,
from loading rules to generating testable seed prompts.
"""
import json
from pathlib import Path
from specalign.specgen import SpecSampler, InstructionGeneratorV2C
from specalign.specgen.tag_rules import RuleTagger
from specalign.core.seed_generator import SeedGenerator
from specalign.config import load_config
def load_and_tag_rules(rules_path: str) -> dict:
"""
Load rules from markdown file and tag them.
Args:
rules_path: Path to Stage_classified.md
Returns:
Tagged rules dictionary
"""
print("Step 1: Loading and tagging rules...")
tagger = RuleTagger()
tagged_rules = tagger.tag_from_file(rules_path)
print(f"✓ Loaded {len(tagged_rules)} rules")
print(f"✓ Categories: {set(r['category'] for r in tagged_rules.values())}")
return tagged_rules
def generate_specifications(tagged_rules: dict, num_specs: int = 10) -> list:
"""
Sample specifications from tagged rules.
Args:
tagged_rules: Dictionary of tagged rules
num_specs: Number of specifications to generate
Returns:
List of specification dictionaries
"""
print("\nStep 2: Generating specifications...")
sampler = SpecSampler(
rules=tagged_rules,
min_rules=3,
max_rules=5,
ensure_coverage=True
)
specs = sampler.sample(num_specs)
print(f"✓ Generated {len(specs)} specifications")
print(f"✓ Average rules per spec: {sum(len(s['rules']) for s in specs) / len(specs):.1f}")
return specs
def generate_instructions(specs: list, config: dict) -> list:
"""
Generate natural language instructions for specifications.
Args:
specs: List of specifications
config: Configuration dictionary
Returns:
Specifications with instructions added
"""
print("\nStep 3: Generating instructions...")
generator = InstructionGeneratorV2C(
api_config=config['api'],
topics_per_spec=3
)
for i, spec in enumerate(specs):
spec['instruction'] = generator.generate(spec['rules'])
print(f"✓ Spec {i+1}/{len(specs)}: Generated instruction")
return specs
def generate_seeds(specs: list, per_spec: int = 10) -> list:
"""
Generate seed prompts for testing.
Args:
specs: List of specifications with instructions
per_spec: Number of seeds per specification
Returns:
List of seed dictionaries
"""
print("\nStep 4: Generating seed prompts...")
generator = SeedGenerator()
seeds = []
for spec in specs:
spec_seeds = generator.generate(
spec=spec,
num_seeds=per_spec
)
seeds.extend(spec_seeds)
print(f"✓ Generated {len(seeds)} total seeds")
return seeds
def save_outputs(specs: list, seeds: list, output_dir: str):
"""Save generated data to files."""
print("\nStep 5: Saving outputs...")
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# Save specifications
specs_file = output_path / "specs.json"
with open(specs_file, 'w') as f:
json.dump(specs, f, indent=2)
print(f"✓ Saved specifications to {specs_file}")
# Save seeds
seeds_file = output_path / "seeds.json"
with open(seeds_file, 'w') as f:
json.dump(seeds, f, indent=2)
print(f"✓ Saved seeds to {seeds_file}")
def main():
"""Run the complete specification generation workflow."""
print("=" * 50)
print("Specification Generation Example")
print("=" * 50)
# Configuration
config = load_config("config.json")
rules_path = "data/Stage_classified.md"
output_dir = "output"
# Execute workflow
tagged_rules = load_and_tag_rules(rules_path)
specs = generate_specifications(tagged_rules, num_specs=10)
specs = generate_instructions(specs, config)
seeds = generate_seeds(specs, per_spec=10)
save_outputs(specs, seeds, output_dir)
print("\n" + "=" * 50)
print("✓ Specification generation complete!")
print("=" * 50)
if __name__ == "__main__":
main()
Expected Output
==================================================
Specification Generation Example
==================================================
Step 1: Loading and tagging rules...
✓ Loaded 45 rules
✓ Categories: {'safety', 'privacy', 'content', 'behavior'}
Step 2: Generating specifications...
✓ Generated 10 specifications
✓ Average rules per spec: 4.2
Step 3: Generating instructions...
✓ Spec 1/10: Generated instruction
✓ Spec 2/10: Generated instruction
...
✓ Spec 10/10: Generated instruction
Step 4: Generating seed prompts...
✓ Generated 100 total seeds
Step 5: Saving outputs...
✓ Saved specifications to output/specs.json
✓ Saved seeds to output/seeds.json
==================================================
✓ Specification generation complete!
==================================================
CLI Alternative
The same workflow can be run using the CLI:
# Generate everything in one command
specalign generate all data/Stage_classified.md \
--num-specs 10 \
--seeds-per-spec 10 \
--output output/
# Or step by step
specalign generate specs data/Stage_classified.md --num-specs 10
specalign generate instructions output/specs.json
specalign generate seeds output/specs.json --per-spec 10
Key Takeaways
Rule tagging categorizes safety rules for balanced specification sampling
SpecSampler ensures diverse rule combinations with coverage guarantees
InstructionGenerator creates natural language context for specifications
SeedGenerator produces diverse test prompts targeting specification rules
CLI commands provide convenient access to the same functionality
Next Steps
Red Team Pipeline - Use generated seeds for adversarial testing
Configuration Reference - Customize generation parameters
Specgen Module - API reference for specgen module