Quickstart Tutorial¶
This guide walks through UCEF's core workflow: initializing the system, storing documents, querying, and interpreting results. All code examples are runnable end-to-end.
Prerequisites¶
You'll also need a model client that implements the ModelClient protocol. UCEF provides a base adapter pattern you can implement for any LLM API.
Step 1: Define a Model Client¶
UCEF is model-agnostic. You need to implement the ModelClient protocol:
from ucef.core.types import ModelClient
class MyModelClient:
"""Simple model client for demonstration."""
def __init__(self, api_key: str, base_url: str):
self._api_key = api_key
self._base_url = base_url
@property
def model_name(self) -> str:
return "my-model"
@property
def context_window(self) -> int:
return 128_000 # Your model's native context window
async def generate(
self,
prompt: str,
max_tokens: int = 4096,
temperature: float = 0.7,
**kwargs,
) -> str:
# Call your LLM API here
response = await call_your_api(prompt, max_tokens, temperature)
return response
async def count_tokens(self, text: str) -> int:
# Use your tokenizer
return len(text.split()) # Rough approximation
Using with OpenAI-compatible APIs¶
import openai
class OpenAIModelClient:
"""Model client for OpenAI-compatible APIs."""
def __init__(self, model: str = "gpt-4o", api_key: str = None):
self._client = openai.AsyncOpenAI(api_key=api_key)
self._model = model
@property
def model_name(self) -> str:
return self._model
@property
def context_window(self) -> int:
windows = {
"gpt-4o": 128_000,
"gpt-4o-mini": 128_000,
"gpt-3.5-turbo": 16_385,
}
return windows.get(self._model, 128_000)
async def generate(self, prompt: str, max_tokens: int = 4096,
temperature: float = 0.7, **kwargs) -> str:
response = await self._client.chat.completions.create(
model=self._model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
)
return response.choices[0].message.content
async def count_tokens(self, text: str) -> int:
# Use tiktoken for accurate counts
import tiktoken
enc = tiktoken.encoding_for_model(self._model)
return len(enc.encode(text))
Step 2: Initialize the System¶
import asyncio
from ucef import UniversalContextSystem, UCEFConfig
async def setup():
# Create model client
client = OpenAIModelClient(model="gpt-4o", api_key="sk-...")
# Create configuration (use defaults or customize)
config = UCEFConfig(
target_extended_context=1_000_000, # Extend to 1M tokens
hyperbolic=HyperbolicConfig(
embedding_dim=128,
n_neighbors=50,
),
quantum=QuantumConfig(
enabled=True,
top_k_measurements=10,
),
quality=QualityConfig(
quality_threshold=0.75,
),
)
# Initialize system
system = UniversalContextSystem(
model_client=client,
model_name="gpt-4o",
config=config,
)
await system.initialize()
return system
The initialize() call:
- Profiles the model — Detects context window, measures performance curve, assesses quality retention
- Creates directories — Sets up data, cache, warm, and cold storage paths
- Initializes compression engine — Sets up the adaptive compressor with model-aware strategies
- Starts quality monitor — Begins tracking query quality in a rolling window
- Prepares feedback loop — Ready to refine low-quality results automatically
Step 3: Store Documents¶
UCEF can store documents from various sources:
Store individual documents¶
from ucef import Document
# Store a single document
doc = await system.store_text(
text="The Eiffel Tower is a wrought-iron lattice tower on the "
"Champ de Mars in Paris, France. It is named after the "
"engineer Gustave Eiffel, whose company designed and built "
"the tower from 1887 to 1889.",
doc_id="eiffel_001",
metadata={
"source": "wikipedia",
"category": "landmarks",
"language": "en",
},
)
print(f"Stored document: {doc.id} ({doc.estimate_tokens()} tokens)")
Store multiple documents at once¶
documents = [
Document(
id="paper_001",
text="Hyperbolic embeddings provide exponentially more capacity "
"than Euclidean space for representing hierarchical data...",
metadata={"source": "neurips_2017", "type": "research_paper"},
),
Document(
id="paper_002",
text="The Minimum Description Length principle provides a "
"rigorous framework for model selection based on "
"information theory...",
metadata={"source": "mit_press_2007", "type": "book"},
),
Document(
id="report_001",
text="Quarterly financial report Q3 2025: Revenue increased "
"by 15% year-over-year...",
metadata={"source": "finance", "type": "report", "quarter": "Q3"},
),
]
stored_count = await system.store_documents(documents)
print(f"Stored {stored_count} documents")
Store with pre-computed embeddings¶
import numpy as np
from ucef import HyperbolicPoint
# If you have pre-computed embeddings
doc_with_embedding = Document(
id="embedded_001",
text="Document with known embedding...",
euclidean_embedding=np.random.randn(128).astype(np.float64),
hyperbolic_embedding=HyperbolicPoint.random(dim=128, max_norm=0.9),
)
await system.store_documents([doc_with_embedding])
Step 4: Query for Context¶
Basic query¶
result = await system.query(
"What is the Eiffel Tower and when was it built?"
)
print(f"Query: {result.query}")
print(f"Overall quality: {result.overall_quality:.3f}")
print(f"Context blocks selected: {len(result.context_blocks)}")
print(f"Total tokens: {result.total_tokens}")
print(f"Retrieval time: {result.retrieval_time_ms:.1f}ms")
# Print each context block
for i, block in enumerate(result.context_blocks, 1):
print(f"\n[Block {i}] (score: {block.relevance_score:.3f})")
print(f" Tokens: {block.token_count}")
print(f" Measurement prob: {block.measurement_probability:.3f}")
print(f" Text: {block.text[:100]}...")
Query with quality threshold override¶
result = await system.query(
"Explain the financial performance in Q3",
top_k=20, # Retrieve more candidates
quality_threshold=0.85, # Higher quality bar
)
Query with full model response¶
# One-shot: query + generate response with extended context
response = await system.query_with_response(
"Summarize the key findings from all documents"
)
print(response)
Step 5: Interpret Results¶
Quality Metrics¶
UCEF evaluates context quality across four dimensions:
result = await system.query("What are the main topics?")
print(f"Relevance: {result.relevance_score:.3f} # How well blocks match query")
print(f"Completeness: {result.completeness_score:.3f} # Coverage of query terms")
print(f"Coherence: {result.coherence_score:.3f} # Context consistency")
print(f"Accuracy: {result.accuracy_score:.3f} # Confidence in information")
print(f"Overall: {result.overall_quality:.3f} # Weighted combination")
The overall quality is computed as:
where \(R\) = relevance, \(C_{complete}\) = completeness, \(C_{coherent}\) = coherence, \(A\) = accuracy.
QueryResult Fields¶
| Field | Type | Description |
|---|---|---|
query |
str |
The original query string |
context_blocks |
List[ContextBlock] |
Selected context segments |
total_tokens |
int |
Total tokens in selected context |
relevance_score |
float |
Average block relevance (0-1) |
completeness_score |
float |
Query term coverage (0-1) |
coherence_score |
float |
Context consistency estimate (0-1) |
accuracy_score |
float |
Information confidence (0-1) |
overall_quality |
float |
Weighted quality score (0-1) |
retrieval_strategy |
str |
Strategy used (adaptive/aggressive/etc.) |
compression_used |
CompressionStrategy |
Compression level applied |
retrieval_time_ms |
float |
Total pipeline latency |
ContextBlock Fields¶
| Field | Type | Description |
|---|---|---|
document_id |
str |
Source document identifier |
text |
str |
Block text content |
relevance_score |
float |
Relevance to query |
token_count |
int |
Tokens in this block |
quantum_amplitude |
complex |
Quantum state amplitude |
measurement_probability |
float |
Born rule probability |
Step 6: Monitor Quality¶
UCEF provides real-time quality monitoring:
# Get quality statistics
stats = system.get_quality_stats()
print(f"Total queries: {stats['total_queries']}")
print(f"Mean quality: {stats['mean_quality']:.3f}")
print(f"P95 quality: {stats['p95_quality']:.3f}")
print(f"Quality degraded: {stats['quality_degraded']}")
Query with feedback loop details¶
# Get detailed feedback loop results
feedback = await system.query_with_feedback(
"Explain the key concepts",
quality_threshold=0.75,
)
print(f"Converged: {feedback.converged}")
print(f"Iterations: {feedback.iterations}")
print(f"Total improvement: {feedback.total_improvement:.3f}")
for step in feedback.steps:
print(f" Iteration {step.iteration}: "
f"{step.quality_before:.3f} -> {step.quality_after:.3f} "
f"({step.action.name}, {step.elapsed_ms:.1f}ms)")
Step 7: System Statistics¶
stats = await system.get_stats()
print(f"Model: {stats['model']}")
print(f"Context category: {stats['model_category']}")
print(f"Native window: {stats['native_context_window']} tokens")
print(f"Target context: {stats['target_context']} tokens")
print(f"Documents stored: {stats['documents_stored']}")
print(f"Quantum selection: {stats['quantum_selection_enabled']}")
Complete Example¶
Here's a full runnable example that ties everything together:
import asyncio
import numpy as np
from ucef import (
UniversalContextSystem,
UCEFConfig,
Document,
HyperbolicPoint,
)
# --- Mock model client for demo ---
class DemoClient:
@property
def model_name(self): return "demo-model"
@property
def context_window(self): return 4096
async def generate(self, prompt, max_tokens=4096, **kw):
return "This is a generated response."
async def count_tokens(self, text):
return len(text.split()) // 4
async def main():
# Initialize
system = UniversalContextSystem(
model_client=DemoClient(),
model_name="demo-model",
)
await system.initialize()
# Store documents
docs = [
Document(id="doc1", text="Machine learning is a subset of AI..."),
Document(id="doc2", text="Deep learning uses neural networks..."),
Document(id="doc3", text="Natural language processing enables..."),
]
await system.store_documents(docs)
# Query
result = await system.query("What is deep learning?")
print(f"Quality: {result.overall_quality:.3f}")
print(f"Blocks: {len(result.context_blocks)}")
print(f"Latency: {result.retrieval_time_ms:.1f}ms")
# Format context for model
context = result.format_context()
print(f"\nFormatted context:\n{context}")
asyncio.run(main())
What's Next?¶
- Configuration Reference — Customize every aspect of UCEF
- Architecture Overview — Understand the mathematical foundations
- API Reference — Full class and method documentation
Previous: Installation | Next: Configuration