API Documentation#
This page provides an overview of Stringy's public API. For complete API documentation, run cargo doc --open in the project directory.
Core Types#
FoundString#
The primary data structure representing an extracted string with metadata.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FoundString {
/// The extracted string text
pub text: String,
/// The encoding used for this string
pub encoding: Encoding,
/// File offset where the string was found
pub offset: u64,
/// Relative Virtual Address (if available)
pub rva: Option<u64>,
/// Section name where the string was found
pub section: Option<String>,
/// Length of the string in bytes
pub length: u32,
/// Semantic tags applied to this string
pub tags: Vec<Tag>,
/// Relevance score for ranking
pub score: i32,
/// Source of the string (section data, import, etc.)
pub source: StringSource,
}
Encoding#
Supported string encodings.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Encoding {
Ascii,
Utf8,
Utf16Le,
Utf16Be,
}
Tag#
Semantic classification tags.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum Tag {
Url,
Domain,
IPv4,
IPv6,
FilePath,
RegistryPath,
Guid,
Email,
Base64,
FormatString,
UserAgent,
Import,
Export,
Version,
Manifest,
Resource,
}
Main API Functions#
extract_strings#
Extract strings from binary data.
pub fn extract_strings(
data: &[u8],
config: &ExtractionConfig
) -> Result<Vec<FoundString>>
Parameters:
data: Binary data to analyzeconfig: Extraction configuration options
Returns:
Result<Vec<FoundString>>: Extracted strings with metadata
Example:
use stringy::{extract_strings, ExtractionConfig};
let data = std::fs::read("binary.exe")?;
let config = ExtractionConfig::default();
let strings = extract_strings(&data, &config)?;
for string in strings {
println!("{}: {}", string.score, string.text);
}
detect_format#
Detect the binary format of the given data.
pub fn detect_format(data: &[u8]) -> BinaryFormat
Parameters:
data: Binary data to analyze
Returns:
BinaryFormat: Detected format (ELF, PE, MachO, or Unknown)
Example:
use stringy::detect_format;
let data = std::fs::read("binary")?;
let format = detect_format(&data);
println!("Detected format: {:?}", format);
Configuration#
ExtractionConfig#
Configuration options for string extraction.
pub struct ExtractionConfig {
/// Minimum length for ASCII strings
pub min_ascii_len: usize,
/// Minimum length for UTF-16 strings
pub min_utf16_len: usize,
/// Maximum string length
pub max_string_len: usize,
/// Encodings to extract
pub encodings: Vec<Encoding>,
/// Sections to include (None = all)
pub include_sections: Option<Vec<String>>,
/// Sections to exclude
pub exclude_sections: Vec<String>,
/// Include debug sections
pub include_debug: bool,
/// Include import/export names
pub include_symbols: bool,
}
impl Default for ExtractionConfig {
fn default() -> Self {
Self {
min_ascii_len: 4,
min_utf16_len: 3,
max_string_len: 1024,
encodings: vec![Encoding::Ascii, Encoding::Utf16Le],
include_sections: None,
exclude_sections: Vec::new(),
include_debug: false,
include_symbols: true,
}
}
}
ClassificationConfig#
Configuration for semantic classification.
pub struct ClassificationConfig {
/// Enable URL detection
pub detect_urls: bool,
/// Enable domain detection
pub detect_domains: bool,
/// Enable IP address detection
pub detect_ips: bool,
/// Enable file path detection
pub detect_paths: bool,
/// Enable GUID detection
pub detect_guids: bool,
/// Enable email detection
pub detect_emails: bool,
/// Enable Base64 detection
pub detect_base64: bool,
/// Enable format string detection
pub detect_format_strings: bool,
/// Minimum confidence threshold
pub min_confidence: f32,
}
Container Parsing#
ContainerParser Trait#
Trait for implementing binary format parsers.
pub trait ContainerParser {
/// Detect if this parser can handle the given data
fn detect(data: &[u8]) -> bool
where
Self: Sized;
/// Parse the container and extract metadata
fn parse(&self, data: &[u8]) -> Result<ContainerInfo>;
}
ContainerInfo#
Information about a parsed binary container.
pub struct ContainerInfo {
/// The binary format detected
pub format: BinaryFormat,
/// List of sections in the binary
pub sections: Vec<SectionInfo>,
/// Import information
pub imports: Vec<ImportInfo>,
/// Export information
pub exports: Vec<ExportInfo>,
}
SectionInfo#
Information about a section within the binary.
pub struct SectionInfo {
/// Section name
pub name: String,
/// File offset of the section
pub offset: u64,
/// Size of the section in bytes
pub size: u64,
/// Relative Virtual Address (if available)
pub rva: Option<u64>,
/// Classification of the section type
pub section_type: SectionType,
/// Whether the section is executable
pub is_executable: bool,
/// Whether the section is writable
pub is_writable: bool,
}
Output Formatting#
OutputFormatter Trait#
Trait for implementing output formatters.
pub trait OutputFormatter {
/// Format the strings for output
fn format(&self, strings: &[FoundString], config: &OutputConfig) -> Result<String>;
}
Built-in Formatters#
// Human-readable table format
pub struct HumanFormatter;
// JSON Lines format
pub struct JsonFormatter;
// YARA rule format
pub struct YaraFormatter;
Example:
use stringy::output::{JsonFormatter, OutputFormatter, OutputConfig};
let formatter = JsonFormatter::new();
let config = OutputConfig::default();
let output = formatter.format(&strings, &config)?;
println!("{}", output);
Error Handling#
StringyError#
Comprehensive error type for the library.
#[derive(Debug, thiserror::Error)]
pub enum StringyError {
#[error("Unsupported file format")]
UnsupportedFormat,
#[error("File I/O error: {0}")]
IoError(#[from] std::io::Error),
#[error("Binary parsing error: {0}")]
ParseError(String),
#[error("Invalid encoding in string at offset {offset}")]
EncodingError { offset: u64 },
#[error("Configuration error: {0}")]
ConfigError(String),
#[error("Memory mapping error: {0}")]
MemoryMapError(String),
}
Result Type#
Convenient result type alias.
pub type Result<T> = std::result::Result<T, StringyError>;
Advanced Usage#
Custom Classification#
Implement custom semantic classifiers:
use stringy::classification::{ClassificationResult, Classifier};
pub struct CustomClassifier {
// Custom implementation
}
impl Classifier for CustomClassifier {
fn classify(&self, text: &str, context: &StringContext) -> Vec<ClassificationResult> {
// Custom classification logic
vec![]
}
}
Memory-Mapped Files#
For large files, use memory mapping:
use memmap2::Mmap;
use std::fs::File;
let file = File::open("large_binary.exe")?;
let mmap = unsafe { Mmap::map(&file)? };
let strings = extract_strings(&mmap[..], &config)?;
Parallel Processing#
Process multiple files in parallel:
use rayon::prelude::*;
let files = vec!["file1.exe", "file2.dll", "file3.so"];
let results: Vec<_> = files
.par_iter()
.map(|path| {
let data = std::fs::read(path)?;
extract_strings(&data, &config)
})
.collect();
Feature Flags#
Optional features can be enabled in Cargo.toml:
[dependencies]
stringy = { version = "0.1", features = ["pe-resources", "dwarf-debug"] }
Available features:
pe-resources: Enhanced PE resource extractiondwarf-debug: DWARF debugging information supportcapstone: Disassembly support for reference analysisparallel: Parallel processing support
Examples#
Basic String Extraction#
use stringy::{ExtractionConfig, extract_strings};
fn main() -> stringy::Result<()> {
let data = std::fs::read("binary.exe")?;
let config = ExtractionConfig::default();
let strings = extract_strings(&data, &config)?;
// Print top 10 strings
for string in strings.iter().take(10) {
println!("{:3} | {}", string.score, string.text);
}
Ok(())
}
Filtered Extraction#
use stringy::{Encoding, ExtractionConfig, Tag, extract_strings};
fn extract_network_indicators(data: &[u8]) -> stringy::Result<Vec<String>> {
let config = ExtractionConfig {
min_ascii_len: 6,
encodings: vec![Encoding::Ascii, Encoding::Utf8],
..Default::default()
};
let strings = extract_strings(data, &config)?;
let network_strings: Vec<String> = strings
.into_iter()
.filter(|s| {
s.tags
.iter()
.any(|tag| matches!(tag, Tag::Url | Tag::Domain | Tag::IPv4 | Tag::IPv6))
})
.filter(|s| s.score >= 70)
.map(|s| s.text)
.collect();
Ok(network_strings)
}
Custom Output Format#
use serde_json::json;
use stringy::output::{OutputConfig, OutputFormatter};
pub struct CustomFormatter;
impl OutputFormatter for CustomFormatter {
fn format(&self, strings: &[FoundString], _config: &OutputConfig) -> stringy::Result<String> {
let output = json!({
"total_strings": strings.len(),
"high_confidence": strings.iter().filter(|s| s.score >= 80).count(),
"strings": strings.iter().take(20).collect::<Vec<_>>()
});
Ok(serde_json::to_string_pretty(&output)?)
}
}
For complete API documentation with all methods and implementation details, run:
cargo doc --open