1 : /*
2 : * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 : * Use of this source code is governed by a BSD-style license that can be
4 : * found in the LICENSE file.
5 : */
6 :
7 : #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
8 : #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
9 :
10 : /*
11 : * The SFI validator, and some utility classes it uses.
12 : */
13 :
14 : #include <limits>
15 : #include <stdlib.h>
16 : #include <vector>
17 :
18 : #include "native_client/src/include/nacl_compiler_annotations.h"
19 : #include "native_client/src/include/nacl_string.h"
20 : #include "native_client/src/include/portability.h"
21 : #include "native_client/src/shared/platform/nacl_check.h"
22 : #include "native_client/src/trusted/validator/ncvalidate.h"
23 : #include "native_client/src/trusted/validator_arm/address_set.h"
24 : #include "native_client/src/trusted/cpu_features/arch/arm/cpu_arm.h"
25 : #include "native_client/src/trusted/validator_arm/gen/arm32_decode.h"
26 : #include "native_client/src/trusted/validator_arm/inst_classes.h"
27 : #include "native_client/src/trusted/validator_arm/model.h"
28 :
29 : namespace nacl_arm_val {
30 :
31 : // Forward declarations of classes used by-reference in the validator, and
32 : // defined at the end of this file.
33 : class CodeSegment;
34 : class DecodedInstruction;
35 : class ProblemSink;
36 :
37 : // A simple model of an instruction bundle. Bundles consist of one or more
38 : // instructions (two or more, in the useful case); the precise size is
39 : // controlled by the parameters passed into SfiValidator, below.
40 : class Bundle {
41 : public:
42 0 : Bundle(uint32_t virtual_base, uint32_t size_bytes)
43 : : virtual_base_(virtual_base), size_(size_bytes) {}
44 :
45 : uint32_t begin_addr() const { return virtual_base_; }
46 0 : uint32_t end_addr() const { return virtual_base_ + size_; }
47 :
48 0 : bool operator==(const Bundle& other) const {
49 : // Note that all Bundles are currently assumed to be the same size.
50 0 : return virtual_base_ == other.virtual_base_;
51 0 : }
52 :
53 : private:
54 : uint32_t virtual_base_;
55 : uint32_t size_;
56 : };
57 :
58 : // The SFI validator itself. The validator is controlled by the following
59 : // inputs:
60 : // bytes_per_bundle - the number of bytes in each bundle of instructions.
61 : // Currently this tends to be 16, but we've evaluated alternatives.
62 : // Must be a power of two.
63 : // code_region_bytes - number of bytes in the code region, starting at address
64 : // 0 and including the trampolines, etc. Must be a power of two.
65 : // data_region_bits - number of bytes in the data region, starting at address
66 : // 0 and including the code region. Must be a power of two.
67 : // read_only_registers - registers that untrusted code must not alter (but may
68 : // read). This currently applies to r9, where we store some thread state.
69 : // data_address_registers - registers that must contain a valid data-region
70 : // address at all times. This currently applies to the stack pointer, but
71 : // could be extended to include a frame pointer for C-like languages.
72 : // cpu_features - the ARM CPU whose features should be considered during
73 : // validation. This matters because some CPUs don't support some
74 : // instructions, leak information or have erratas when others do not,
75 : // yet we still want to emit performant code for the given target.
76 : //
77 : // The values of these inputs will typically be taken from the headers of
78 : // untrusted code -- either by the ABI version they indicate, or (perhaps in
79 : // the future) explicit indicators of what SFI model they follow.
80 : class SfiValidator {
81 : public:
82 : SfiValidator(uint32_t bytes_per_bundle,
83 : uint32_t code_region_bytes,
84 : uint32_t data_region_bytes,
85 : nacl_arm_dec::RegisterList read_only_registers,
86 : nacl_arm_dec::RegisterList data_address_registers,
87 : const NaClCPUFeaturesArm *cpu_features);
88 :
89 : explicit SfiValidator(const SfiValidator& v);
90 :
91 : // The main validator entry point. Validates the provided CodeSegments,
92 : // which must be in sorted order, reporting any problems through the
93 : // ProblemSink.
94 : //
95 : // Returns true iff no problems were found.
96 0 : bool validate(const std::vector<CodeSegment>& segments, ProblemSink* out) {
97 0 : return find_violations(segments, out) == nacl_arm_dec::kNoViolations;
98 0 : }
99 :
100 : // Returns true if validation did not depend on the code's base address.
101 : bool is_position_independent() {
102 : return is_position_independent_;
103 : }
104 :
105 : // Alternate validator entry point. Validates the provided
106 : // CodeSegments, which must be in sorted order, reporting any
107 : // problems through the ProblemSink.
108 : //
109 : // Returns the violation set of found violations. Note: if problem
110 : // sink short ciruits the validation of all code (via method
111 : // should_continue), this set may not contain all types of
112 : // violations found. All that this method guarantees is if the code
113 : // has validation violations, the returned set will be non-empty.
114 : //
115 : // Note: This version of validating is useful for testing, when one
116 : // might want to know why the code did not validate.
117 : nacl_arm_dec::ViolationSet find_violations(
118 : const std::vector<CodeSegment>& segments, ProblemSink* out);
119 :
120 : // Entry point for validation of dynamic code replacement. Allows
121 : // micromodifications of dynamically generated code in form of
122 : // constant updates for inline caches and similar VM techniques.
123 : // Very minimal modifications allowed, essentially only immediate
124 : // value update for MOV or ORR instruction.
125 : // Returns true iff no problems were found.
126 : bool ValidateSegmentPair(const CodeSegment& old_code,
127 : const CodeSegment& new_code,
128 : ProblemSink* out);
129 :
130 : // Entry point for dynamic code creation. Copies code from
131 : // source segment to destination, performing validation
132 : // and accounting for need of safe handling of cases,
133 : // where code being replaced is executed.
134 : // Returns true iff no problems were found.
135 : bool CopyCode(const CodeSegment& source_code,
136 : CodeSegment& dest_code,
137 : NaClCopyInstructionFunc copy_func,
138 : ProblemSink* out);
139 :
140 : // A 2-dimensional array, defined on the Condition of two
141 : // instructions, defining when we can statically prove that the
142 : // conditions of the first instruction implies the conditions of the
143 : // second instruction.
144 : //
145 : // Note: The first index (i.e. row) corresponds to the condition of
146 : // the first instruction, while the second index (i.e. column)
147 : // corresponds to the condition of the second instruction.
148 : //
149 : // Note: The order the instructions execute is not important in
150 : // this array. The context defines which instruction, of the
151 : // instruction pair being compared, appears first.
152 : //
153 : // Note: The decoder should prevent UNCONDITIONAL (0b1111) from ever
154 : // occurring, but we include entries for it out of paranoia, which also
155 : // happens to make the table 16x16, which is easier to index into.
156 : static const bool
157 : condition_implies[nacl_arm_dec::Instruction::kConditionSize + 1]
158 : [nacl_arm_dec::Instruction::kConditionSize + 1];
159 :
160 : // Checks whether the given Register always holds a valid data region address.
161 : // This implies that the register is safe to use in unguarded stores.
162 0 : bool is_data_address_register(nacl_arm_dec::Register r) const {
163 0 : return data_address_registers_.Contains(r);
164 0 : }
165 :
166 : // Number of A32 instructions per bundle.
167 : uint32_t InstructionsPerBundle() const {
168 : return bytes_per_bundle_ / (nacl_arm_dec::kArm32InstSize / 8);
169 : }
170 :
171 0 : uint32_t code_address_mask() const {
172 0 : return ~(code_region_bytes_ - 1) | (bytes_per_bundle_ - 1);
173 0 : }
174 0 : uint32_t data_address_mask() const {
175 0 : return ~(data_region_bytes_ - 1);
176 0 : }
177 :
178 0 : nacl_arm_dec::RegisterList read_only_registers() const {
179 0 : return read_only_registers_;
180 0 : }
181 0 : nacl_arm_dec::RegisterList data_address_registers() const {
182 0 : return data_address_registers_;
183 0 : }
184 :
185 0 : const NaClCPUFeaturesArm *CpuFeatures() const {
186 0 : return &cpu_features_;
187 0 : }
188 :
189 0 : bool conditional_memory_access_allowed_for_sfi() const {
190 0 : return NaClGetCPUFeatureArm(CpuFeatures(), NaClCPUFeatureArm_CanUseTstMem);
191 0 : }
192 :
193 : // Utility function that applies the decoder of the validator.
194 : const nacl_arm_dec::ClassDecoder& decode(
195 : nacl_arm_dec::Instruction inst) const {
196 : return decode_state_.decode(inst);
197 : }
198 :
199 : // Returns the Bundle containing a given address.
200 : inline const Bundle bundle_for_address(uint32_t address) const;
201 :
202 : // Returns true if both addresses are in the same bundle.
203 : inline bool in_same_bundle(const DecodedInstruction& first,
204 : const DecodedInstruction& second) const;
205 :
206 : // Checks that both instructions can be in the same bundle,
207 : // add updates the critical set to include the second instruction,
208 : // since it can't be safely jumped to. If the instruction crosses
209 : // a bundle, a set with the given violation will be returned.
210 : inline nacl_arm_dec::ViolationSet validate_instruction_pair_allowed(
211 : const DecodedInstruction& first,
212 : const DecodedInstruction& second,
213 : AddressSet* critical,
214 : nacl_arm_dec::Violation violation) const;
215 :
216 : // Copy the given validator state.
217 : SfiValidator& operator=(const SfiValidator& v);
218 :
219 : // Returns true if address is the first address of a bundle.
220 0 : bool is_bundle_head(uint32_t address) const {
221 0 : return (address & (bytes_per_bundle_ - 1)) == 0;
222 0 : };
223 :
224 : private:
225 : // The SfiValidator constructor could have been given invalid values.
226 : // Returns true the values were bad, and send the details to the ProblemSink.
227 : // This method should be called from every public validation method.
228 : bool ConstructionFailed(ProblemSink* out);
229 :
230 : // Validates a straight-line execution of the code, applying patterns. This
231 : // is the first validation pass, which fills out the AddressSets for
232 : // consumption by later analyses.
233 : // branches - gets filled in with the address of every direct branch.
234 : // critical - gets filled in with every address that isn't safe to jump to,
235 : // because it would split an otherwise-safe pseudo-op.
236 : //
237 : // Returns the violation set of found violations. Note: if problem
238 : // sink short ciruits the validation of all code (via method
239 : // should_continue), this set may not contain all types of
240 : // violations found. All that this method guarantees is if the code
241 : // has validation violations, the returned set will be non-empty.
242 : nacl_arm_dec::ViolationSet validate_fallthrough(
243 : const CodeSegment& segment, ProblemSink* out,
244 : AddressSet* branches, AddressSet* critical);
245 :
246 : // Validates all branches found by a previous pass, checking
247 : // destinations. Returns the violation set of found branch
248 : // violations. Note: if problem sink short ciruits the validation of
249 : // all code (via method should_continue), this set may not contain
250 : // all types of violations found. All that this method guarantees is
251 : // if the code has validation violations, the returned set will be
252 : // non-empty.
253 : nacl_arm_dec::ViolationSet validate_branches(
254 : const std::vector<CodeSegment>& segments,
255 : const AddressSet& branches, const AddressSet& critical,
256 : ProblemSink* out);
257 :
258 : NaClCPUFeaturesArm cpu_features_;
259 : uint32_t bytes_per_bundle_;
260 : uint32_t code_region_bytes_;
261 : uint32_t data_region_bytes_;
262 : // Registers which cannot be modified by untrusted code.
263 : nacl_arm_dec::RegisterList read_only_registers_;
264 : // Registers which must always contain a valid data region address.
265 : nacl_arm_dec::RegisterList data_address_registers_;
266 : // Defines the decoder parser to use.
267 : const nacl_arm_dec::Arm32DecoderState decode_state_;
268 : // True if construction failed and further validation should be prevented.
269 : bool construction_failed_;
270 : // True if validation did not depend on the code's base address.
271 : bool is_position_independent_;
272 : };
273 :
274 :
275 : // A facade that combines an Instruction with its address and a ClassDecoder.
276 : // This makes the patterns substantially easier to write and read than managing
277 : // all three variables separately.
278 : //
279 : // ClassDecoders do all decoding on-demand, with no caching. DecodedInstruction
280 : // has knowledge of the validator, and pairs a ClassDecoder with a constant
281 : // Instruction -- so it can cache commonly used values, and does. Caching
282 : // safety and defs doubles validator performance. Add other values only
283 : // under guidance of a profiler.
284 : class DecodedInstruction {
285 : public:
286 : DecodedInstruction(uint32_t vaddr, nacl_arm_dec::Instruction inst,
287 : const nacl_arm_dec::ClassDecoder& decoder)
288 : // We eagerly compute both safety and defs here, because it turns out to
289 : // be faster by 10% than doing either lazily and memoizing the result.
290 : : vaddr_(vaddr),
291 : inst_(inst),
292 : decoder_(&decoder),
293 : safety_(decoder.safety(inst_)),
294 : defs_(decoder.defs(inst_))
295 0 : {}
296 :
297 0 : uint32_t addr() const { return vaddr_; }
298 :
299 : // 'this' dominates 'other', where 'this' is the instruction
300 : // immediately preceding 'other': if 'other' executes, we can guarantee
301 : // that 'this' was executed as well.
302 :
303 : // This is important if 'this' produces a sandboxed value that 'other'
304 : // must consume.
305 : //
306 : // Note: If the conditions of the two instructions do
307 : // not statically infer that the conditional execution is correct,
308 : // we assume that it is not.
309 : //
310 : // Note that this function can't see the bundle size, so this result
311 : // does not take it into account. The SfiValidator reasons on this
312 : // separately.
313 0 : bool always_dominates(const DecodedInstruction& other) const {
314 0 : nacl_arm_dec::Instruction::Condition cond1 = inst_.GetCondition();
315 0 : nacl_arm_dec::Instruction::Condition cond2 = other.inst_.GetCondition();
316 : return !defines(nacl_arm_dec::Register::Conditions()) &&
317 : // TODO(jfb) Put back mixed-condition handling. See issue #3221.
318 : // SfiValidator::condition_implies[cond2][cond1];
319 0 : ((cond1 == nacl_arm_dec::Instruction::AL) || (cond1 == cond2));
320 0 : }
321 :
322 : // 'this' post-dominates 'other', where 'other' is the instruction
323 : // immediately preceding 'this': if 'other' executes, we can guarantee
324 : // that 'this' is executed as well.
325 : //
326 : // This is important if 'other' produces an unsafe value that 'this'
327 : // fixes before it can leak out.
328 : //
329 : // Note: if the conditions of the two
330 : // instructions do not statically infer that the conditional
331 : // execution is correct, we assume that it is not.
332 : //
333 : // Note that this function can't see the bundle size, so this result
334 : // does not take it into account. The SfiValidator reasons on this
335 : // separately.
336 0 : bool always_postdominates(const DecodedInstruction& other) const {
337 0 : nacl_arm_dec::Instruction::Condition cond1 = other.inst_.GetCondition();
338 0 : nacl_arm_dec::Instruction::Condition cond2 = inst_.GetCondition();
339 : return !other.defines(nacl_arm_dec::Register::Conditions()) &&
340 : // TODO(jfb) Put back mixed-condition handling. See issue #3221.
341 : // SfiValidator::condition_implies[cond1][cond2];
342 0 : ((cond2 == nacl_arm_dec::Instruction::AL) || (cond1 == cond2));
343 0 : }
344 :
345 : // Checks that the execution of 'this' is conditional on the test result
346 : // (specifically, the Z flag being set) from 'other' -- which must be
347 : // adjacent for this simple check to be meaningful.
348 0 : bool is_eq_conditional_on(const DecodedInstruction& other) const {
349 : return inst_.GetCondition() == nacl_arm_dec::Instruction::EQ
350 : && other.inst_.GetCondition() == nacl_arm_dec::Instruction::AL
351 0 : && other.defines(nacl_arm_dec::Register::Conditions());
352 0 : }
353 :
354 : // The methods below mirror those on ClassDecoder, but are cached and cheap.
355 0 : nacl_arm_dec::SafetyLevel safety() const { return safety_; }
356 0 : nacl_arm_dec::RegisterList defs() const { return defs_; }
357 :
358 : // The methods below pull values from ClassDecoder on demand.
359 0 : bool is_relative_branch() const {
360 0 : return decoder_->is_relative_branch(inst_);
361 0 : }
362 :
363 0 : const nacl_arm_dec::Register branch_target_register() const {
364 0 : return decoder_->branch_target_register(inst_);
365 0 : }
366 :
367 0 : bool is_literal_pool_head() const {
368 0 : return decoder_->is_literal_pool_head(inst_);
369 0 : }
370 :
371 0 : uint32_t branch_target() const {
372 : // branch_target_offset takes care of adding 8 to the instruction's
373 : // immediate: the ARM manual states that "PC reads as the address of
374 : // the current instruction plus 8".
375 0 : return vaddr_ + decoder_->branch_target_offset(inst_);
376 0 : }
377 :
378 0 : const nacl_arm_dec::Register base_address_register() const {
379 0 : return decoder_->base_address_register(inst_);
380 0 : }
381 :
382 0 : bool is_literal_load() const {
383 0 : return decoder_->is_literal_load(inst_);
384 0 : }
385 :
386 0 : bool clears_bits(uint32_t mask) const {
387 0 : return decoder_->clears_bits(inst_, mask);
388 0 : }
389 :
390 0 : bool sets_Z_if_bits_clear(nacl_arm_dec::Register r, uint32_t mask) const {
391 0 : return decoder_->sets_Z_if_bits_clear(inst_, r, mask);
392 0 : }
393 :
394 0 : bool base_address_register_writeback_small_immediate() const {
395 0 : return decoder_->base_address_register_writeback_small_immediate(inst_);
396 0 : }
397 :
398 0 : bool is_load_thread_address_pointer() const {
399 0 : return decoder_->is_load_thread_address_pointer(inst_);
400 0 : }
401 :
402 : // Some convenience methods, defined in terms of ClassDecoder:
403 0 : bool defines(nacl_arm_dec::Register r) const {
404 0 : return defs().Contains(r);
405 0 : }
406 :
407 0 : bool defines_any(nacl_arm_dec::RegisterList rl) const {
408 0 : return defs().ContainsAny(rl);
409 0 : }
410 :
411 0 : bool defines_all(nacl_arm_dec::RegisterList rl) const {
412 0 : return defs().ContainsAll(rl);
413 0 : }
414 :
415 : // Returns true if the instruction uses the given register.
416 0 : bool uses(nacl_arm_dec::Register r) const {
417 0 : return decoder_->uses(inst_).Contains(r);
418 0 : }
419 :
420 0 : const nacl_arm_dec::Instruction& inst() const {
421 0 : return inst_;
422 0 : }
423 :
424 0 : DecodedInstruction& Copy(const DecodedInstruction& other) {
425 0 : vaddr_ = other.vaddr_;
426 0 : inst_.Copy(other.inst_);
427 0 : decoder_ = other.decoder_;
428 0 : safety_ = other.safety_;
429 0 : defs_.Copy(other.defs_);
430 0 : return *this;
431 0 : }
432 :
433 : private:
434 : uint32_t vaddr_;
435 : nacl_arm_dec::Instruction inst_;
436 : const nacl_arm_dec::ClassDecoder* decoder_;
437 :
438 : nacl_arm_dec::SafetyLevel safety_;
439 : nacl_arm_dec::RegisterList defs_;
440 :
441 : NACL_DISALLOW_COPY_AND_ASSIGN(DecodedInstruction);
442 : };
443 :
444 : // Describes a memory region that contains executable code. Note that the code
445 : // need not live in its final location -- we pretend the code lives at the
446 : // provided start_addr, regardless of where the base pointer actually points.
447 : class CodeSegment {
448 : public:
449 : CodeSegment(const uint8_t* base, uint32_t start_addr, size_t size)
450 : : base_(base),
451 : start_addr_(start_addr),
452 0 : size_(static_cast<uint32_t>(size)) {
453 0 : CHECK(size <= std::numeric_limits<uint32_t>::max());
454 0 : CHECK(start_addr <= std::numeric_limits<uint32_t>::max() - size_);
455 0 : }
456 :
457 0 : uint32_t begin_addr() const { return start_addr_; }
458 0 : uint32_t end_addr() const { return start_addr_ + size_; }
459 : uint32_t size() const { return size_; }
460 0 : bool contains_address(uint32_t a) const {
461 0 : return (a >= begin_addr()) && (a < end_addr());
462 0 : }
463 :
464 0 : const nacl_arm_dec::Instruction operator[](uint32_t address) const {
465 0 : const uint8_t* element = &base_[address - start_addr_];
466 : return nacl_arm_dec::Instruction(
467 0 : *reinterpret_cast<const uint32_t *>(element));
468 0 : }
469 :
470 : bool operator<(const CodeSegment& other) const {
471 : return start_addr_ < other.start_addr_;
472 : }
473 :
474 0 : const uint8_t* base() const {
475 0 : return base_;
476 0 : }
477 :
478 : private:
479 : const uint8_t* base_;
480 : uint32_t start_addr_;
481 : uint32_t size_;
482 : };
483 :
484 : // A class that consumes reports of validation problems.
485 : //
486 : // Default implementation to be used with sel_ldr. All methods are
487 : // just placeholders, so that code to generate diagnostics will link.
488 : // If you want to generate error messages, use derived class ProblemReporter
489 : // in problem_reporter.h
490 : class ProblemSink {
491 : public:
492 0 : ProblemSink() {}
493 0 : virtual ~ProblemSink() {}
494 :
495 : // Helper function for reporting generic error messages using a
496 : // printf style. How the corresponding data is used is left to
497 : // the derived class.
498 : //
499 : // Arguments are:
500 : // violation - The type of violation being reported.
501 : // vaddr - The address of the instruction associated with the violation.
502 : // format - The format string to print out the corresponding diagnostic
503 : // message.
504 : // ... - Arguments to use with the format.
505 : virtual void ReportProblemDiagnostic(nacl_arm_dec::Violation violation,
506 : uint32_t vaddr,
507 : const char* format, ...)
508 : // Note: format is the 4th argument because of implicit this.
509 : ATTRIBUTE_FORMAT_PRINTF(4, 5) = 0;
510 :
511 : private:
512 : NACL_DISALLOW_COPY_AND_ASSIGN(ProblemSink);
513 : };
514 :
515 0 : const Bundle SfiValidator::bundle_for_address(uint32_t address) const {
516 0 : uint32_t base = address & ~(bytes_per_bundle_ - 1);
517 0 : return Bundle(base, bytes_per_bundle_);
518 0 : }
519 :
520 : bool SfiValidator::in_same_bundle(const DecodedInstruction& first,
521 0 : const DecodedInstruction& second) const {
522 0 : return bundle_for_address(first.addr()) == bundle_for_address(second.addr());
523 0 : }
524 :
525 : nacl_arm_dec::ViolationSet SfiValidator::validate_instruction_pair_allowed(
526 : const DecodedInstruction& first,
527 : const DecodedInstruction& second,
528 : AddressSet* critical,
529 0 : nacl_arm_dec::Violation violation) const {
530 0 : if (!in_same_bundle(first, second))
531 0 : return nacl_arm_dec::ViolationBit(violation);
532 0 : critical->add(second.addr());
533 0 : return nacl_arm_dec::kNoViolations;
534 0 : }
535 :
536 : } // namespace nacl_arm_val
537 :
538 : #endif // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
|