1 : /*
2 : * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 : * Use of this source code is governed by a BSD-style license that can be
4 : * found in the LICENSE file.
5 : */
6 :
7 : #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
8 : #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
9 :
10 : /*
11 : * The SFI validator, and some utility classes it uses.
12 : */
13 :
14 : #include <stdint.h>
15 : #include <stdlib.h>
16 : #include <vector>
17 :
18 : #include "native_client/src/include/nacl_string.h"
19 : #include "native_client/src/include/portability.h"
20 : #include "native_client/src/trusted/validator_mips/address_set.h"
21 : #include "native_client/src/trusted/validator_mips/decode.h"
22 : #include "native_client/src/trusted/validator_mips/inst_classes.h"
23 : #include "native_client/src/trusted/validator_mips/model.h"
24 :
25 : namespace nacl_mips_val {
26 :
27 : /*
28 : * Forward declarations of classes used by-reference in the validator, and
29 : * defined at the end of this file.
30 : */
31 : class CodeSegment;
32 : class DecodedInstruction;
33 : class ProblemSink;
34 :
35 :
36 : /*
37 : * A simple model of an instruction bundle. Bundles consist of one or more
38 : * instructions (two or more, in the useful case); the precise size is
39 : * controlled by the parameters passed into SfiValidator, below.
40 : */
41 : class Bundle {
42 : public:
43 25 : Bundle(uint32_t virtual_base, uint32_t size_bytes)
44 25 : : virtual_base_(virtual_base), size_(size_bytes) {}
45 :
46 23 : uint32_t BeginAddr() const { return virtual_base_; }
47 2 : uint32_t EndAddr() const { return virtual_base_ + size_; }
48 :
49 : bool operator!=(const Bundle &other) const {
50 : // Note that all Bundles are currently assumed to be the same size.
51 : return virtual_base_ != other.virtual_base_;
52 : }
53 :
54 : private:
55 : uint32_t virtual_base_;
56 : uint32_t size_;
57 : };
58 :
59 :
60 : /*
61 : * The SFI validator itself. The validator is controlled by the following
62 : * inputs:
63 : * bytes_per_bundle: the number of bytes in each bundle of instructions.
64 : * Currently this tends to be 16, but we've evaluated alternatives.
65 : * code_region_bytes: number of bytes in the code region, starting at address
66 : * 0 and including the trampolines, etc. Must be a power of two.
67 : * data_region_bits: number of bytes in the data region, starting at address
68 : * 0 and including the code region. Must be a power of two.
69 : * read_only_registers: registers that untrusted code must not alter (but may
70 : * read). This currently applies to t6 - jump mask, t7 - load/store mask
71 : * and t8 - tls index.
72 : * data_address_registers: registers that must contain a valid data-region
73 : * address at all times. This currently applies to the stack pointer and
74 : * TLS register but could be extended to include a frame pointer for
75 : * C-like languages. Adding register to data_address_registers only means
76 : * that load/store access can be done without checks. Check for register
77 : * value change still needs to be executed.
78 : *
79 : * The values of these inputs will typically be taken from the headers of
80 : * untrusted code -- either by the ABI version they indicate, or (perhaps in
81 : * the future) explicit indicators of what SFI model they follow.
82 : */
83 : class SfiValidator {
84 : public:
85 : SfiValidator(uint32_t bytes_per_bundle,
86 : uint32_t code_region_bytes,
87 : uint32_t data_region_bytes,
88 : nacl_mips_dec::RegisterList read_only_registers,
89 : nacl_mips_dec::RegisterList data_address_registers);
90 :
91 : /*
92 : * The main validator entry point. Validates the provided CodeSegments,
93 : * which must be in sorted order, reporting any problems through the
94 : * ProblemSink.
95 : *
96 : * Returns true iff no problems were found.
97 : */
98 : bool Validate(const std::vector<CodeSegment> &, ProblemSink *out);
99 :
100 : // Returns true if validation did not depend on the code's base address.
101 : bool is_position_independent() {
102 : return is_position_independent_;
103 : }
104 :
105 : /*
106 : * Checks whether the given Register always holds a valid data region address.
107 : * This implies that the register is safe to use in unguarded stores.
108 : */
109 : bool IsDataAddressRegister(nacl_mips_dec::Register) const;
110 :
111 : uint32_t data_address_mask() const { return data_address_mask_; }
112 0 : uint32_t code_address_mask() const { return code_address_mask_; }
113 : uint32_t code_region_bytes() const { return code_region_bytes_; }
114 3 : uint32_t bytes_per_bundle() const { return bytes_per_bundle_; }
115 0 : uint32_t code_region_start() const { return code_region_start_; }
116 0 : uint32_t trampoline_region_start() const { return trampoline_region_start_; }
117 :
118 118 : nacl_mips_dec::RegisterList read_only_registers() const {
119 118 : return read_only_registers_;
120 : }
121 16 : nacl_mips_dec::RegisterList data_address_registers() const {
122 16 : return data_address_registers_;
123 : }
124 :
125 : // Returns the Bundle containing a given address.
126 : const Bundle BundleForAddress(uint32_t) const;
127 :
128 : /*
129 : * Change masks: this is useful for debugging and cannot be completely
130 : * controlled with constructor arguments
131 : */
132 : void ChangeMasks(uint32_t code_address_mask, uint32_t data_address_mask) {
133 : code_address_mask_ = code_address_mask;
134 : data_address_mask_ = data_address_mask;
135 : }
136 :
137 : /*
138 : * Find all the branch instructions which jump on the dest_address.
139 : */
140 : bool FindBranch(const std::vector<CodeSegment> &segments,
141 : const AddressSet &branches,
142 : uint32_t dest_address,
143 : std::vector<DecodedInstruction> *instrs) const;
144 :
145 : private:
146 : bool IsBundleHead(uint32_t address) const;
147 :
148 : /*
149 : * Validates a straight-line execution of the code, applying patterns. This
150 : * is the first validation pass, which fills out the AddressSets for
151 : * consumption by later analyses.
152 : * branches: gets filled in with the address of every direct branch.
153 : * branch_targets: gets filled in with the target address of every direct
154 : * branch.
155 : * critical: gets filled in with every address that isn't safe to jump to,
156 : * because it would split an otherwise-safe pseudo-op.
157 : *
158 : * Returns true iff no problems were found.
159 : */
160 : bool ValidateFallthrough(const CodeSegment &, ProblemSink *,
161 : AddressSet *branches, AddressSet *branch_targets,
162 : AddressSet *critical);
163 :
164 : /*
165 : * Factor of validate_fallthrough, above. Checks a single instruction using
166 : * the instruction patterns defined in the .cc file, with two possible
167 : * results:
168 : * 1. No patterns matched, or all were safe: nothing happens.
169 : * 2. Patterns matched and were unsafe: problems get sent to 'out'.
170 : */
171 : bool ApplyPatterns(const DecodedInstruction &, ProblemSink *out);
172 :
173 : /*
174 : * Factor of validate_fallthrough, above. Checks a pair of instructions using
175 : * the instruction patterns defined in the .cc file, with three possible
176 : * results:
177 : * 1. No patterns matched: nothing happens.
178 : * 2. Patterns matched and were safe: the addresses are filled into
179 : * 'critical' for use by the second pass.
180 : * 3. Patterns matched and were unsafe: problems get sent to 'out'.
181 : */
182 : bool ApplyPatterns(const DecodedInstruction &first,
183 : const DecodedInstruction &second, AddressSet *critical, ProblemSink *out);
184 :
185 :
186 : /*
187 : * 2nd pass - checks if some branch instruction tries to jump onto the middle
188 : * of the pseudo-instruction, and if some pseudo-instruction crosses bundle
189 : * borders.
190 : */
191 : bool ValidatePseudos(const SfiValidator &sfi,
192 : const std::vector<CodeSegment> &segments,
193 : const AddressSet &branches,
194 : const AddressSet &branch_targets,
195 : const AddressSet &critical,
196 : ProblemSink *out);
197 :
198 : uint32_t const bytes_per_bundle_;
199 : uint32_t const code_region_bytes_;
200 : uint32_t data_address_mask_;
201 : uint32_t code_address_mask_;
202 :
203 : // TODO(petarj): Think about pulling these values from some config header.
204 : static uint32_t const code_region_start_ = 0x20000;
205 : static uint32_t const trampoline_region_start_ = 0x10000;
206 :
207 : // Registers which cannot be modified by untrusted code.
208 : nacl_mips_dec::RegisterList read_only_registers_;
209 : // Registers which must always contain a valid data region address.
210 : nacl_mips_dec::RegisterList data_address_registers_;
211 : const nacl_mips_dec::DecoderState *decode_state_;
212 : // True if validation did not depend on the code's base address.
213 : bool is_position_independent_;
214 : };
215 :
216 :
217 : /*
218 : * A facade that combines an Instruction with its address and a ClassDecoder.
219 : * This makes the patterns substantially easier to write and read than managing
220 : * all three variables separately.
221 : *
222 : * ClassDecoders do all decoding on-demand, with no caching. DecodedInstruction
223 : * has knowledge of the validator, and pairs a ClassDecoder with a constant
224 : * Instruction -- so it can cache commonly used values, and does. Caching
225 : * safety and defs doubles validator performance. Add other values only
226 : * under guidance of a profiler.
227 : */
228 0 : class DecodedInstruction {
229 : public:
230 : DecodedInstruction(uint32_t vaddr, nacl_mips_dec::Instruction inst,
231 : const nacl_mips_dec::ClassDecoder &decoder);
232 : // We permit the default copy ctor and assignment operator.
233 :
234 52 : uint32_t addr() const { return vaddr_; }
235 :
236 : // The methods below mirror those on ClassDecoder, but are cached and cheap.
237 132 : nacl_mips_dec::SafetyLevel safety() const { return safety_; }
238 :
239 : // The methods below pull values from ClassDecoder on demand.
240 16 : const nacl_mips_dec::Register BaseAddressRegister() const {
241 16 : return decoder_->BaseAddressRegister(inst_);
242 : }
243 :
244 482 : nacl_mips_dec::Register DestGprReg() const {
245 482 : return decoder_->DestGprReg(inst_);
246 : }
247 :
248 4 : nacl_mips_dec::Register TargetReg() const {
249 4 : return decoder_->TargetReg(inst_);
250 : }
251 :
252 0 : uint32_t DestAddr() const {
253 0 : return decoder_->DestAddr(inst_, addr());
254 : }
255 :
256 167 : bool HasDelaySlot() const {
257 167 : return decoder_-> HasDelaySlot();
258 : }
259 :
260 118 : bool IsJal() const {
261 118 : return decoder_-> IsJal();
262 : }
263 :
264 82 : bool IsMask(const nacl_mips_dec::Register& dest,
265 : const nacl_mips_dec::Register& mask) const {
266 82 : return decoder_->IsMask(inst_, dest, mask);
267 : }
268 :
269 148 : bool IsJmpReg() const {
270 148 : return decoder_->IsJmpReg();
271 : }
272 :
273 148 : bool IsLoadStore() const {
274 148 : return decoder_->IsLoadStore();
275 : }
276 :
277 236 : bool IsDirectJump() const {
278 236 : return decoder_->IsDirectJump();
279 : }
280 :
281 118 : bool IsDestGprReg(nacl_mips_dec::RegisterList rl) const {
282 118 : return rl.ContainsAny(nacl_mips_dec::RegisterList(DestGprReg()));
283 : }
284 :
285 1 : bool IsDataRegMask() const {
286 1 : return IsMask(DestGprReg(), nacl_mips_dec::Register::LoadStoreMask());
287 : }
288 :
289 : private:
290 : uint32_t vaddr_;
291 : nacl_mips_dec::Instruction inst_;
292 : const nacl_mips_dec::ClassDecoder *decoder_;
293 :
294 : nacl_mips_dec::SafetyLevel safety_;
295 : };
296 :
297 :
298 : /*
299 : * Describes a memory region that contains executable code. Note that the code
300 : * need not live in its final location -- we pretend the code lives at the
301 : * provided start_addr, regardless of where the base pointer actually points.
302 : */
303 0 : class CodeSegment {
304 : public:
305 30 : CodeSegment(const uint8_t *base, uint32_t start_addr, size_t size)
306 30 : : base_(base), start_addr_(start_addr), size_(size) {}
307 :
308 66 : uint32_t BeginAddr() const { return start_addr_; }
309 214 : uint32_t EndAddr() const { return start_addr_ + size_; }
310 : uint32_t size() const { return size_; }
311 6 : bool ContainsAddress(uint32_t a) const {
312 6 : return (a >= BeginAddr()) && (a < EndAddr());
313 : }
314 :
315 248 : const nacl_mips_dec::Instruction operator[](uint32_t address) const {
316 248 : const uint8_t *element = &base_[address - start_addr_];
317 : return nacl_mips_dec::Instruction(
318 248 : *reinterpret_cast<const uint32_t *>(element));
319 : }
320 :
321 : bool operator<(const CodeSegment &other) const {
322 : return start_addr_ < other.start_addr_;
323 : }
324 :
325 : private:
326 : const uint8_t *base_;
327 : uint32_t start_addr_;
328 : size_t size_;
329 : };
330 :
331 :
332 : /*
333 : * A class that consumes reports of validation problems, and may decide whether
334 : * to continue validating, or early-exit.
335 : *
336 : * In a sel_ldr context, we early-exit at the first problem we find. In an SDK
337 : * context, however, we collect more reports to give the developer feedback;
338 : * even then it may be desirable to exit after the first, say, 200 reports.
339 : */
340 30 : class ProblemSink {
341 : public:
342 30 : virtual ~ProblemSink() {}
343 :
344 : /*
345 : * Reports a problem in untrusted code.
346 : * vaddr: the virtual address where the problem occurred. Note that this is
347 : * probably not the address of memory that contains the offending
348 : * instruction, since we allow CodeSegments to lie about their base
349 : * addresses.
350 : * safety: the safety level of the instruction, as reported by the decoder.
351 : * This may be MAY_BE_SAFE while still indicating a problem.
352 : * problem_code: a constant string, defined below, that uniquely identifies
353 : * the problem. These are not intended to be human-readable, and should
354 : * be looked up for localization and presentation to the developer.
355 : * ref_vaddr: A second virtual address of more code that affected the
356 : * decision -- typically a branch target.
357 : */
358 0 : virtual void ReportProblem(uint32_t vaddr, nacl_mips_dec::SafetyLevel safety,
359 : const nacl::string &problem_code, uint32_t ref_vaddr = 0) {
360 : UNREFERENCED_PARAMETER(vaddr);
361 : UNREFERENCED_PARAMETER(safety);
362 : UNREFERENCED_PARAMETER(problem_code);
363 : UNREFERENCED_PARAMETER(ref_vaddr);
364 0 : }
365 :
366 : /*
367 : * Called after each invocation of report_problem. If this returns false,
368 : * the validator exits.
369 : */
370 0 : virtual bool ShouldContinue() { return false; }
371 : };
372 :
373 : /*
374 : * Strings used to describe the current set of validator problems. These may
375 : * be worth splitting into a separate header file, so that dev tools can
376 : * process them into localized messages without needing to pull in the whole
377 : * validator...we'll see.
378 : */
379 :
380 : // An instruction is unsafe -- more information in the SafetyLevel.
381 : const char * const kProblemUnsafe = "kProblemUnsafe";
382 : // A branch would break a pseudo-operation pattern.
383 : const char * const kProblemBranchSplitsPattern = "kProblemBranchSplitsPattern";
384 : // A branch targets an invalid code address (out of segment).
385 : const char * const kProblemBranchInvalidDest = "kProblemBranchInvalidDest";
386 : // A load/store uses an unsafe (non-masked) base address.
387 : const char * const kProblemUnsafeLoadStore = "kProblemUnsafeLoadStore";
388 : // An instruction updates a data-address register (e.g. SP) without masking.
389 : const char * const kProblemUnsafeDataWrite = "kProblemUnsafeDataWrite";
390 : // An instruction updates a read-only register (e.g. t6, t7, t8).
391 : const char * const kProblemReadOnlyRegister = "kProblemReadOnlyRegister";
392 : // A pseudo-op pattern crosses a bundle boundary.
393 : const char * const kProblemPatternCrossesBundle =
394 : "kProblemPatternCrossesBundle";
395 : // A linking branch instruction is not in the last bundle slot.
396 : const char * const kProblemMisalignedCall = "kProblemMisalignedCall";
397 : // A data register is found in a branch delay slot.
398 : const char * const kProblemDataRegInDelaySlot = "kProblemDataRegInDelaySlot";
399 : // A jump to trampoline instruction which is not a start of a bundle.
400 : const char * const kProblemUnalignedJumpToTrampoline =
401 : "kProblemUnalignedJumpToTrampoline";
402 : // A jump register instruction is not guarded.
403 : const char * const kProblemUnsafeJumpRegister = "kProblemUnsafeJumpRegister";
404 : // Two consecutive branches/jumps. Branch/jump in the delay slot.
405 : const char * const kProblemBranchInDelaySlot = "kProblemBranchInDelaySlot";
406 : } // namespace nacl_mips_val
407 :
408 : #endif // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
|