16 """Detect the pointer width and byte order of a parsed ``lief`` binary.
19 binary: A ``lief.Binary`` instance (ELF, PE, or MachO).
22 A ``(pointer_width, endian)`` tuple where *pointer_width* is ``4`` or ``8``
23 bytes and *endian* is ``"<"`` (little-endian) or ``">"`` (big-endian).
24 Falls back to ``(8, "<")`` if detection fails.
29 if fmt == lief.Binary.FORMATS.ELF:
30 cls = binary.header.identity_class
31 data = binary.header.identity_data
32 bits = 64
if cls == lief.ELF.ELF_CLASS.CLASS64
else 32
33 end =
"<" if data == lief.ELF.ELF_DATA.ELFDATA2LSB
else ">"
34 return (8
if bits == 64
else 4), end
35 if fmt == lief.Binary.FORMATS.PE:
36 magic = binary.optional_header.magic
37 bits = 64
if magic == lief.PE.PE_TYPE.PE32_PLUS
else 32
38 return (8
if bits == 64
else 4),
"<"
39 if fmt == lief.Binary.FORMATS.MACHO:
40 cpu = binary.header.cpu_type
42 lief.MachO.CPU_TYPES.ARM64,
43 lief.MachO.CPU_TYPES.x86_64,
45 return (8
if bits == 64
else 4),
"<"
101 """Heuristically determine the bytecode payload size encoded in the ``.phsb`` section.
103 Scans *sec_data* for an integer field that plausibly encodes the length of a
104 contiguous non-zero region — the strategy used by the Phasor linker to store
105 the bytecode size alongside the payload.
108 sec_data: Raw bytes of the ``.phsb`` binary section.
109 sz_width: Width of the size field in bytes (``4`` or ``8``, from :func:`_arch_info`).
110 endian: Struct endian character (``"<"`` or ``">"``, from :func:`_arch_info`).
113 The detected payload length in bytes, or ``None`` if no consistent size
114 field could be located.
116 fmt = endian + (
"Q" if sz_width == 8
else "I")
118 non_zero = [i
for i
in range(L)
if sec_data[i] != 0]
120 for padding
in range(L - sz_width):
121 N = L - sz_width - padding
125 for sz_off
in _find_all(sec_data, struct.pack(fmt, N)):
126 sz_end = sz_off + sz_width
127 outside_nz = [i
for i
in non_zero
if i < sz_off
or i >= sz_end]
132 span_start = outside_nz[0]
133 span_end = outside_nz[-1] + 1
135 if span_end - span_start > N:
138 for bc_start
in range(
139 max(0, span_end - N), min(span_start, L - N) + 1
141 bc_end = bc_start + N
142 if bc_start < sz_end
and bc_end > sz_off:
149 """Extract the raw ``.phsb`` bytecode payload from a native binary.
151 Parses the ELF, PE, or MachO binary at *path* using ``lief``, locates the
152 ``.phsb`` section, and returns the bytecode payload starting at the
153 :data:`_MAGIC_BYTES` marker.
156 path: Path to the compiled native binary.
159 The raw ``.phsb`` bytes, suitable for passing to
160 :meth:`~phasor.Bytecode.Bytecode.from_bytes`.
163 ImportError: If the ``lief`` package is not installed.
164 FileNotFoundError: If *path* does not exist.
165 RuntimeError: If the binary cannot be parsed, no ``.phsb`` section is
166 found, the PHSB magic bytes are absent, or the payload size cannot
171 except ImportError
as exc:
173 "The 'lief' package is required for native binary extraction.\n"
174 "Install it with: pip install lief"
178 if not path.is_file():
179 raise FileNotFoundError(f
"Binary not found: {path}")
181 binary = lief.parse(str(path))
183 raise RuntimeError(f
"Could not parse binary: {path}")
187 raise RuntimeError(f
"No '.phsb' section found in {path}")
190 sec_data = bytes(section.content)
192 magic_off = sec_data.find(_MAGIC_BYTES)
194 raise RuntimeError(
"PHSB magic bytes not found in '.phsb' section")
199 "Could not determine bytecode payload size from '.phsb' section"
202 return sec_data[magic_off : magic_off + N]