In [1]:
# Import necessary libraries
import os
import re
import subprocess
from typing import Optional
In [2]:
# Define constants
OPT_EXECUTABLE = "opt"
LLC_EXECUTABLE = "llc"
CLANG_EXECUTABLE = "clang++"
CPP_CODE_FILE = "original_code.cpp"
ORIGINAL_IR_FILE = "original_code.ll"
OPTIMIZED_IR_FILE = "optimized_code.ll"
COUNTER_PASS_FILE = "counter_pass.so"
OBJ_FILE = "program.o"
EXECUTABLE_FILE = "program.out" 
In [3]:
# Example: C++ code to be compiled
cpp_code = """
#include <iostream>

void print_message(const char* message, int value) {
    if (value > 10) {
        std::cout << message << ": " << value << " is greater than 10." << std::endl;
    } else {
        std::cout << message << ": " << value << " is not greater than 10." << std::endl;
    }
}

void process_data(int a, int b, int c) {
    print_message("Data A", a);
    print_message("Data B", b);
    print_message("Data C", c);
}

int main() {
    process_data(5, 15, 8);
    return 0;
}
"""
In [4]:
def compile_cpp_to_llvm_ir(cpp_code_str, input_code_file=CPP_CODE_FILE, output_ir_file=ORIGINAL_IR_FILE, passes="-O3"):
    with open(input_code_file, "w") as f:
        f.write(cpp_code_str)

    try:
        subprocess.run(
            [CLANG_EXECUTABLE, "-S", "-emit-llvm", passes, input_code_file, "-o", output_ir_file],
            check=True,
            capture_output=True,
            text=True
        )
        print(f"C++ successfully compiled to LLVM IR -> {output_ir_file}")
        return output_ir_file
    except subprocess.CalledProcessError as e:
        print(f"Error during C++ to LLVM IR compilation. {e.stderr}")
        return None
    except FileNotFoundError:
        print("Error: clang++ not found. Please ensure Clang is installed and in your PATH.")
        return None
In [5]:
def compile_llvm_ir_to_executable(ir_file=ORIGINAL_IR_FILE, obj_file=OBJ_FILE, executable_file=EXECUTABLE_FILE):
    try:
        subprocess.run(
            [LLC_EXECUTABLE, ir_file, "-filetype=obj", "-relocation-model=pic", "-o", obj_file],
            check=True,
            capture_output=True,
            text=True
        )
        print(f"LLVM IR successfully compiled to position-independent object file -> {obj_file}")

        # Link the object file into an executable using a C++ compiler
        # This step is needed especially if your C++ code uses standard libraries
        subprocess.run(
            ["clang++", obj_file, "-o", executable_file],
            check=True,
            capture_output=True,
            text=True
        )
        print(f"Object file successfully linked into executable -> {executable_file}")
        return executable_file
    except subprocess.CalledProcessError as e:
        print(f"Error during LLVM IR to executable compilation/linking: {e.stderr}")
        return None
    except FileNotFoundError:
        print("Error: llc or clang++/g++ not found. Please ensure LLVM and a C++ compiler are installed and in your PATH.")
        return None
In [6]:
def get_instruction_count(input_ir_file: str, pass_plugin_path: str, pass_name: str = "counter") -> Optional[int]:
    # 1. Pre-flight checks for file existence
    if not os.path.exists(input_ir_file):
        print(f"Error: Input IR file not found at '{input_ir_file}'")
        return None
    if not os.path.exists(pass_plugin_path):
        print(f"Error: Pass plugin not found at '{pass_plugin_path}'")
        return None

    # 2. Construct the command
    command = [
        OPT_EXECUTABLE,
        f"-load-pass-plugin=./{pass_plugin_path}",
        f"-passes={pass_name}",
        "-disable-output",
        input_ir_file,
    ]

    # 3. Execute the command and handle potential errors
    try:
        result = subprocess.run(
            command,
            capture_output=True,
            text=True,
            check=True,
            encoding='utf-8'
        )
    except FileNotFoundError:
        print(f"Error: The command '{OPT_EXECUTABLE}' was not found.")
        print("Please ensure LLVM's bin directory is in your system's PATH.")
        return None
    except subprocess.CalledProcessError as e:
        print(f"Error: The 'opt' command failed with return code {e.returncode}.")
        print("--- Stderr from opt ---")
        print(e.stderr.strip())
        return None
    
    # 4. Parse the output from stderr to find the instruction count
    output_text = result.stderr
    
    # 5. Parse the output from stderr to find the instruction count
    pattern = r"^Total instructions: (\d+)$"
    matches = re.findall(pattern, output_text, re.MULTILINE)
    if matches:
        return int(matches[0])
    else:
        return None
In [7]:
def run_executable(executable_file=EXECUTABLE_FILE):
    if not os.path.exists(executable_file):
        print(f"Executable file not found: {executable_file}")
        return None, None

    try:
        executable_path = f"./{executable_file}" if not executable_file.startswith(("./", "/")) else executable_file
        result = subprocess.run(
            [executable_path],
            check=True,
            capture_output=True,
            text=True
        )
        print(f"Successfully ran executable: {executable_file}")
        return result.stdout, result.stderr
    except subprocess.CalledProcessError as e:
        print(f"Error running executable {executable_file}:")
        print(f"Return code: {e.returncode}")
        print(f"Stdout: {e.stdout}")
        print(f"Stderr: {e.stderr}")
        return e.stdout, e.stderr
    except FileNotFoundError:
        print(f"Error: Could not execute {executable_file}. Make sure it has execute permissions and is in the correct path.")
        return None, None
In [8]:
# Step 1: Compile C++ code to LLVM IR
original_ir_file = compile_cpp_to_llvm_ir(cpp_code_str=cpp_code, input_code_file=CPP_CODE_FILE, output_ir_file=ORIGINAL_IR_FILE, passes="")
optimized_ir_file = compile_cpp_to_llvm_ir(cpp_code_str=cpp_code, input_code_file=CPP_CODE_FILE, output_ir_file=OPTIMIZED_IR_FILE, passes="-Oz")
print(f"[+] Compiled C++ code to LLVM IR")
C++ successfully compiled to LLVM IR -> original_code.ll
C++ successfully compiled to LLVM IR -> optimized_code.ll
[+] Compiled C++ code to LLVM IR
In [9]:
# Step 2: Get instruction count
# clang++ -fPIC -shared counter_pass.cpp -o counter_pass.so $(llvm-config --cxxflags --ldflags --libs)
original_instruction_count = get_instruction_count(input_ir_file=original_ir_file, pass_plugin_path=COUNTER_PASS_FILE, pass_name="counter")
optimized_instruction_count = get_instruction_count(input_ir_file=optimized_ir_file, pass_plugin_path=COUNTER_PASS_FILE, pass_name="counter")
print(f"[+] Original: {original_instruction_count} -> Optimized: {optimized_instruction_count}")
[+] Original: 46 -> Optimized: 17
In [10]:
# Step 3: Execute the original and optimized code
executable_original_file = compile_llvm_ir_to_executable(ir_file=original_ir_file, obj_file=OBJ_FILE, executable_file=EXECUTABLE_FILE)
executable_optimized_file = compile_llvm_ir_to_executable(ir_file=optimized_ir_file, obj_file=OBJ_FILE, executable_file=EXECUTABLE_FILE)

executable_original_output, executable_original_error = run_executable(executable_file=executable_original_file)
executable_optimized_output, executable_optimized_error = run_executable(executable_file=executable_optimized_file)

assert executable_original_output == executable_optimized_output
assert executable_original_error == executable_optimized_error
print('[+] Bingo!')
LLVM IR successfully compiled to position-independent object file -> program.o
Object file successfully linked into executable -> program.out
LLVM IR successfully compiled to position-independent object file -> program.o
Object file successfully linked into executable -> program.out
Successfully ran executable: program.out
Successfully ran executable: program.out
[+] Bingo!