1 : /*
2 : * Copyright (c) 2011 The Native Client Authors. All rights reserved.
3 : * Use of this source code is governed by a BSD-style license that can be
4 : * found in the LICENSE file.
5 : */
6 :
7 : /*
8 : * ncdis.c - disassemble using NaCl decoder.
9 : * Mostly for testing.
10 : */
11 :
12 :
13 : #ifndef NACL_TRUSTED_BUT_NOT_TCB
14 : #error("This file is not meant for use in the TCB")
15 : #endif
16 :
17 : #include <errno.h>
18 : #include <stdarg.h>
19 : #include <stdio.h>
20 : #include <stdlib.h>
21 : #include <string.h>
22 :
23 : #include "native_client/src/shared/gio/gio.h"
24 : #include "native_client/src/shared/utils/types.h"
25 : #include "native_client/src/shared/utils/flags.h"
26 : #include "native_client/src/shared/platform/nacl_log.h"
27 : #include "native_client/src/trusted/validator/ncfileutil.h"
28 : #include "native_client/src/trusted/validator/x86/decoder/nc_inst_state.h"
29 : #include "native_client/src/trusted/validator/x86/decoder/ncopcode_desc.h"
30 : #include "native_client/src/trusted/validator/x86/decoder/nc_decode_tables.h"
31 : #include "native_client/src/trusted/validator/x86/ncval_seg_sfi/ncdecode_verbose.h"
32 : #include "native_client/src/trusted/validator/x86/ncval_seg_sfi/ncvalidate_internaltypes.h"
33 : #include "native_client/src/trusted/validator_x86/nc_read_segment.h"
34 : #include "native_client/src/trusted/validator_x86/ncdis_segments.h"
35 :
36 : /* True if we should use the full decoder when decoding. */
37 : /* TODO(karl): When the full_decoder is working for both the x86-32 and
38 : * x86-64 platforms, change to use full decoder for both as default.
39 : */
40 : static Bool NACL_FLAGS_full_decoder =
41 : #if NACL_TARGET_SUBARCH == 64
42 : TRUE
43 : #else
44 : FALSE
45 : #endif
46 : ;
47 :
48 : /* True if we should use the validator decoder when decoding. */
49 : static Bool NACL_FLAGS_validator_decoder =
50 : #if NACL_TARGET_SUBARCH == 64
51 : FALSE
52 : #else
53 : TRUE
54 : #endif
55 : ;
56 :
57 : /* True if we should print internal representations while decoding. */
58 : static Bool NACL_FLAGS_internal = FALSE;
59 :
60 : /* The name of the executable that is being run. */
61 : static const char* exec_name = "???";
62 :
63 0 : static void Fatal(const char *fmt, ...) {
64 0 : FILE* fp = stdout;
65 : va_list ap;
66 0 : fprintf(fp, "Fatal: ");
67 0 : va_start(ap, fmt);
68 0 : vfprintf(fp, fmt, ap);
69 0 : va_end(ap);
70 0 : exit(-1);
71 : }
72 :
73 0 : void Info(const char *fmt, ...) {
74 0 : FILE* fp = stdout;
75 : va_list ap;
76 0 : fprintf(fp, "Info: ");
77 0 : va_start(ap, fmt);
78 0 : vfprintf(fp, fmt, ap);
79 0 : va_end(ap);
80 0 : }
81 :
82 0 : static void usage(void) {
83 : fprintf(stderr,
84 : "usage: ncdis [options] [file]\n"
85 : "\n"
86 : "Options are:\n"
87 : "--commands=<file>\n"
88 : "\tAdditional command line arguments are specified in the given\n"
89 : "\tfile ('#' acts as a comment character). Use '-' as its value to\n"
90 : "\tredirect command line arguments from standard input.\n"
91 : "--full_decoder\n"
92 : "\tDisassemble the elf executable using native client's\n"
93 : "\tfull decoder.\n"
94 : "--help\n"
95 : "\tPrint out this usage message\n"
96 : "--hex_text=<file>\n"
97 : "\tDefine code section as sequence of (textual) hexidecimal bytes\n"
98 : "\tdefined in the given file. Lines beginning with '#' will be\n"
99 : "\treated as comments. If the first non-comment line begins with\n"
100 : "\t'@' the following hexidecimal number will be used as the\n"
101 : "\tbeginning (RIP/EIP) instruction address of the code segment.\n"
102 : "\tUse '-' as its value to redirect standard input as the\n"
103 : "\ttext file to process.\n"
104 : "-i=XXXX\n"
105 : "\tXXXX specifies the sequence of hexidecimal digits that define\n"
106 : "\tan instruction to be decoded.\n"
107 : "--internal\n"
108 : "\tFor the iterator model (only), prints out each the decoded\n"
109 : "\tinstruction, followed by the internals for the matched\n"
110 : "\tinstruction.\n"
111 : "--pc=XXX\n"
112 : "\tSet program counter (i.e. RIP or EIP) to XXX.\n"
113 : "--self_document\n"
114 : "\tProcess input hext_text file in such a way, that it also\n"
115 : "\trepresents the output that will be generated by ncdis.\n"
116 : "\tThat is, copy comment lines (i.e. lines beginning with\n"
117 : "\t'#') to stdout. In addition, it assumes that each line\n"
118 : "\tconsists of an '-i' command line argument (and possibly\n"
119 : "\ta '--pc' command line argument, followed by a '#',\n"
120 : "\tfollowed by the corresponding disassembled text. On such\n"
121 : "\tlines, the input is copied up to (and including) the '#'.,\n"
122 : "\tand then the disassembled instruction is printed.\n"
123 : "--validator_decoder\n"
124 : "\tDisassemble the file using the partial instruction decoder used\n"
125 : "\tby the validator.\n"
126 0 : );
127 0 : exit(1);
128 : }
129 :
130 : /* Converts command line flags to corresponding disassemble flags. */
131 1 : static NaClDisassembleFlags NaClGetDisassembleFlags(void) {
132 1 : NaClDisassembleFlags flags = 0;
133 1 : if (NACL_FLAGS_validator_decoder) {
134 1 : NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleValidatorDecoder));
135 : }
136 1 : if (NACL_FLAGS_full_decoder) {
137 1 : NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleFull));
138 : }
139 1 : if (NACL_FLAGS_internal) {
140 1 : NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleAddInternals));
141 : }
142 1 : return flags;
143 1 : }
144 :
145 0 : static int AnalyzeSections(ncfile *ncf) {
146 0 : int badsections = 0;
147 : int ii;
148 0 : const Elf_Shdr* shdr = ncf->sheaders;
149 :
150 0 : for (ii = 0; ii < ncf->shnum; ii++) {
151 : Info("section %d sh_addr %x offset %x flags %x\n",
152 : ii, (uint32_t)shdr[ii].sh_addr,
153 0 : (uint32_t)shdr[ii].sh_offset, (uint32_t)shdr[ii].sh_flags);
154 0 : if ((shdr[ii].sh_flags & SHF_EXECINSTR) != SHF_EXECINSTR)
155 0 : continue;
156 0 : Info("parsing section %d\n", ii);
157 : NaClDisassembleSegment(ncf->data + (shdr[ii].sh_addr - ncf->vbase),
158 : shdr[ii].sh_addr, shdr[ii].sh_size,
159 0 : NaClGetDisassembleFlags());
160 0 : }
161 0 : return -badsections;
162 0 : }
163 :
164 0 : static void AnalyzeCodeSegments(ncfile *ncf, const char *fname) {
165 0 : if (AnalyzeSections(ncf) < 0) {
166 0 : fprintf(stderr, "%s: text validate failed\n", fname);
167 : }
168 0 : }
169 :
170 : /* Capture a sequence of bytes defining an instruction (up to a
171 : * MAX_BYTES_PER_X86_INSTRUCTION). This sequence is used to run
172 : * a (debug) test of the disassembler.
173 : */
174 : static uint8_t FLAGS_decode_instruction[NACL_MAX_BYTES_PER_X86_INSTRUCTION];
175 :
176 : /* Define the number of bytes supplied for a debug instruction. */
177 : static int FLAGS_decode_instruction_size = 0;
178 :
179 : /* Flag defining the value of the pc to use when decoding an instruction
180 : * through decode_instruction.
181 : */
182 : static uint32_t FLAGS_decode_pc = 0;
183 :
184 : /* Flag defining an input file to use as command line arguments
185 : * (one per input line). When specified, run the disassembler
186 : * on each command line. The empty string "" denotes that no command
187 : * line file was specified. A dash ("-") denotes that standard input
188 : * should be used to get command line arguments.
189 : */
190 : static char* FLAGS_commands = "";
191 :
192 : /* Flag defining the name of a hex text to be used as the code segment. Assumes
193 : * that the pc associated with the code segment is defined by
194 : * FLAGS_decode_pc.
195 : */
196 : static char* FLAGS_hex_text = "";
197 :
198 : /* Flag, when used in combination with the commands flag, will turn
199 : * on input copy rules, making the genrated output contain comments
200 : * and the command line arguments as part of the corresponding
201 : * generated output. For more details on this, see ProcessInputFile
202 : * below.
203 : */
204 : static Bool FLAGS_self_document = FALSE;
205 :
206 : /*
207 : * Store default values of flags on the first call. On subsequent
208 : * calls, resets the flags to the default value.
209 : *
210 : * *WARNING* In order for this to work, this function must be
211 : * called before GrokFlags
212 : *
213 : * NOTE: we only allow the specification of -use_iter at the top-level
214 : * command line..
215 : */
216 1 : static void ResetFlags(void) {
217 : int i;
218 : static uint32_t DEFAULT_decode_pc;
219 : static char* DEFAULT_commands;
220 : static Bool DEFAULT_self_document;
221 : static Bool is_first_call = TRUE;
222 1 : if (is_first_call) {
223 1 : DEFAULT_decode_pc = FLAGS_decode_pc;
224 1 : DEFAULT_commands = FLAGS_commands;
225 1 : DEFAULT_self_document = FLAGS_self_document;
226 1 : is_first_call = FALSE;
227 : }
228 :
229 1 : FLAGS_decode_pc = DEFAULT_decode_pc;
230 1 : FLAGS_commands = DEFAULT_commands;
231 1 : FLAGS_self_document = DEFAULT_self_document;
232 : /* Always clear the decode instruction. */
233 1 : FLAGS_decode_instruction_size = 0;
234 1 : for (i = 0; i < NACL_MAX_BYTES_PER_X86_INSTRUCTION; ++i) {
235 1 : FLAGS_decode_instruction[i] = 0;
236 1 : }
237 1 : }
238 :
239 : /* Returns true if all characters in the string are zero. */
240 1 : static Bool IsZero(const char* arg) {
241 1 : while (*arg) {
242 1 : if ('0' != *arg) {
243 0 : return FALSE;
244 : }
245 1 : ++arg;
246 1 : }
247 1 : return TRUE;
248 1 : }
249 :
250 1 : uint8_t HexToByte(const char* hex_value) {
251 1 : unsigned long value = strtoul(hex_value, NULL, 16);
252 : /* Verify that arg is all zeros when zero is returned. Otherwise,
253 : * assume that the zero value was due to an error.
254 : */
255 1 : if (0L == value && !IsZero(hex_value)) {
256 0 : Fatal("-i option specifies illegal hex value '%s'\n", hex_value);
257 : }
258 1 : return (uint8_t) value;
259 1 : }
260 :
261 : /* Recognizes flags in argv, processes them, and then removes them.
262 : * Returns the updated value for argc.
263 : */
264 1 : int GrokFlags(int argc, const char *argv[]) {
265 : int i;
266 : int new_argc;
267 : char* hex_instruction;
268 1 : Bool help = FALSE;
269 1 : if (argc == 0) return 0;
270 1 : exec_name = argv[0];
271 1 : new_argc = 1;
272 1 : for (i = 1; i < argc; ++i) {
273 1 : const char* arg = argv[i];
274 : if (GrokUint32HexFlag("--pc", arg, &FLAGS_decode_pc) ||
275 : GrokCstringFlag("--commands", arg, &FLAGS_commands) ||
276 : GrokCstringFlag("--hex_text", arg, &FLAGS_hex_text) ||
277 : GrokBoolFlag("--self_document", arg, &FLAGS_self_document) ||
278 : GrokBoolFlag("--internal", arg, &NACL_FLAGS_internal) ||
279 1 : GrokBoolFlag("--help", arg, &help)) {
280 1 : if (help) usage();
281 1 : } else if (GrokBoolFlag("--validator_decoder", arg,
282 1 : &NACL_FLAGS_validator_decoder)) {
283 1 : NACL_FLAGS_full_decoder = !NACL_FLAGS_validator_decoder;
284 1 : } else if (GrokBoolFlag("--full_decoder", arg,
285 1 : &NACL_FLAGS_full_decoder)) {
286 1 : NACL_FLAGS_validator_decoder = !NACL_FLAGS_full_decoder;
287 1 : } else if (GrokCstringFlag("-i", arg, &hex_instruction)) {
288 1 : int i = 0;
289 : char buffer[3];
290 1 : char* buf = &(hex_instruction[0]);
291 1 : buffer[2] = '\0';
292 1 : while (*buf) {
293 1 : buffer[i++] = *(buf++);
294 1 : if (i == 2) {
295 1 : uint8_t byte = HexToByte(buffer);
296 1 : FLAGS_decode_instruction[FLAGS_decode_instruction_size++] = byte;
297 : if (FLAGS_decode_instruction_size >
298 1 : NACL_MAX_BYTES_PER_X86_INSTRUCTION) {
299 0 : Fatal("-i=%s specifies too long of a hex value\n", hex_instruction);
300 : }
301 1 : i = 0;
302 : }
303 1 : }
304 1 : if (i != 0) {
305 0 : Fatal("-i=%s doesn't specify a sequence of bytes\n", hex_instruction);
306 : }
307 1 : } else {
308 0 : argv[new_argc++] = argv[i];
309 : }
310 1 : }
311 1 : return new_argc;
312 1 : }
313 :
314 : /* Process the command line arguments. */
315 0 : static const char* GrokArgv(int argc, const char* argv[]) {
316 0 : if (argc != 2) {
317 0 : Fatal("no filename specified\n");
318 : }
319 0 : return argv[argc-1];
320 0 : }
321 :
322 : static void ProcessCommandLine(int argc, const char* argv[]);
323 :
324 : /* Defines the maximum number of characters allowed on an input line
325 : * of the input text defined by the commands command line option.
326 : */
327 : #define MAX_INPUT_LINE 4096
328 :
329 : /* Defines the characters used as (token) separators to recognize command
330 : * line arguments when processing lines of text in the text file specified
331 : * by the commands command line option.
332 : */
333 : #define CL_SEPARATORS " \t\n"
334 :
335 : /* Copies the text from the input line (which should be command line options),
336 : * up to any trailing comments (i.e. the pound sign).
337 : * input_line - The line of text to process.
338 : * tokens - The extracted text from the input_line.
339 : * max_length - The maximum length of input_line and tokens.
340 : *
341 : * Note: If input_line doesn't end with a null terminator, one is automatically
342 : * inserted.
343 : */
344 : static void CopyCommandLineTokens(char* input_line,
345 : char* token_text,
346 1 : size_t max_length) {
347 : size_t i;
348 1 : for (i = 0; i < max_length; ++i) {
349 : char ch;
350 1 : if (max_length == i + 1) {
351 : /* Be sure we end the string with a null terminator. */
352 0 : input_line[i] = '\0';
353 : }
354 1 : ch = input_line[i];
355 1 : token_text[i] = ch;
356 1 : if (ch == '\0') return;
357 1 : if (ch == '#') {
358 1 : token_text[i] = '\0';
359 1 : return;
360 : }
361 1 : }
362 1 : }
363 :
364 : /* Tokenize the given text to find command line arguments, and
365 : * add them to the given list of command line arguments.
366 : *
367 : * *WARNING* This function will (destructively) modify the
368 : * contents of token_text, by converting command line option
369 : * separator characters into newlines.
370 : */
371 : static void ExtractTokensAndAddToArgv(
372 : char* token_text,
373 : int* argc,
374 1 : const char* argv[]) {
375 : /* Note: Assume that each command line argument corresponds to
376 : * non-blank text, which is a HACK, but should be sufficient for
377 : * what we need.
378 : */
379 1 : char* token = strtok(token_text, CL_SEPARATORS);
380 1 : while (token != NULL) {
381 1 : argv[(*argc)++] = token;
382 1 : token = strtok(NULL, CL_SEPARATORS);
383 1 : }
384 1 : }
385 :
386 : /* Print out the contents of text, up to the first occurence of the
387 : * pound sign.
388 : */
389 1 : static void PrintUpToPound(const char text[]) {
390 : int i;
391 1 : struct Gio* g = NaClLogGetGio();
392 1 : for (i = 0; i < MAX_INPUT_LINE; ++i) {
393 1 : char ch = text[i];
394 1 : switch (ch) {
395 : case '#':
396 1 : gprintf(g, "%c", ch);
397 1 : return;
398 : case '\0':
399 0 : return;
400 : default:
401 1 : gprintf(g, "%c", ch);
402 : break;
403 : }
404 1 : }
405 1 : }
406 :
407 : /* Reads the given text file and processes the command line options specified
408 : * inside of it. Each line specifies a separate sequence of command line
409 : * arguments to process.
410 : *
411 : * Note:
412 : * (a) The '#' is used as a comment delimiter.
413 : * (b) whitespace lines are ignored.
414 : * (c) If flag --self_document is specified, comment lines and whitespace
415 : * lines will automatically be copied to stdout. In addition, command
416 : * line arguments will be copied to stdout before processing them.
417 : * Further, if the command line arguments are followed by a comment,
418 : * only text up to (and including) the '#' will be copied. This allows
419 : * the input file to contain the (hopefully single lined) output that
420 : * would be generated by the given command line arguments. Therefore,
421 : * if set up correctly, the output of the disassembler (in this case)
422 : * should be the same as the input file (making it easy to use the
423 : * input file as the the corresponding GOLD file to test against).
424 : */
425 1 : static void ProcessInputFile(FILE* file) {
426 : char input_line[MAX_INPUT_LINE];
427 1 : const Bool self_document = FLAGS_self_document;
428 1 : while (fgets(input_line, MAX_INPUT_LINE, file) != NULL) {
429 : char token_text[MAX_INPUT_LINE];
430 : const char* line_argv[MAX_INPUT_LINE];
431 1 : int line_argc = 0;
432 :
433 : /* Copy the input line (up to the first #) into token_text */
434 1 : CopyCommandLineTokens(input_line, token_text, MAX_INPUT_LINE);
435 :
436 : /* Tokenize the commands to build argv.
437 : * Note: Since each token is separated by a blank,
438 : * and the input is no more than MAX_INPUT_LINE,
439 : * we know (without checking) that line_argc
440 : * will not exceed MAX_INPUT_LINE.
441 : */
442 1 : line_argv[line_argc++] = exec_name;
443 1 : ExtractTokensAndAddToArgv(token_text, &line_argc, line_argv);
444 :
445 : /* Process the parsed input line. */
446 1 : if (1 == line_argc) {
447 : /* No command line arguments. */
448 1 : if (self_document) {
449 1 : printf("%s", input_line);
450 : }
451 1 : } else {
452 : /* Process the tokenized command line. */
453 1 : if (self_document) {
454 1 : PrintUpToPound(input_line);
455 : }
456 1 : ProcessCommandLine(line_argc, line_argv);
457 : }
458 1 : }
459 1 : ResetFlags();
460 1 : }
461 :
462 : /* Run the disassembler using the given command line arguments. */
463 1 : static void ProcessCommandLine(int argc, const char* argv[]) {
464 : int new_argc;
465 :
466 1 : ResetFlags();
467 1 : new_argc = GrokFlags(argc, argv);
468 1 : if (FLAGS_decode_instruction_size > 0) {
469 : /* Command line options specify an instruction to decode, run
470 : * the disassembler on the instruction to print out the decoded
471 : * results.
472 : */
473 1 : if (new_argc > 1) {
474 0 : Fatal("unrecognized option '%s'\n", argv[1]);
475 : }
476 : NaClDisassembleSegment(FLAGS_decode_instruction, FLAGS_decode_pc,
477 : FLAGS_decode_instruction_size,
478 1 : NaClGetDisassembleFlags());
479 1 : } else if (0 != strcmp(FLAGS_hex_text, "")) {
480 : uint8_t bytes[MAX_INPUT_LINE];
481 : size_t num_bytes;
482 : NaClPcAddress pc;
483 1 : if (0 == strcmp(FLAGS_hex_text, "-")) {
484 1 : num_bytes = NaClReadHexTextWithPc(stdin, &pc, bytes, MAX_INPUT_LINE);
485 : NaClDisassembleSegment(bytes, pc, (NaClMemorySize) num_bytes,
486 1 : NaClGetDisassembleFlags());
487 1 : } else {
488 0 : FILE* input = fopen(FLAGS_hex_text, "r");
489 0 : if (NULL == input) {
490 0 : Fatal("Can't open hex text file: %s\n", FLAGS_hex_text);
491 : }
492 0 : num_bytes = NaClReadHexTextWithPc(input, &pc, bytes, MAX_INPUT_LINE);
493 0 : fclose(input);
494 : NaClDisassembleSegment(bytes, pc, (NaClMemorySize) num_bytes,
495 0 : NaClGetDisassembleFlags());
496 1 : }
497 1 : } else if (0 != strcmp(FLAGS_commands, "")) {
498 : /* Use the given input file to find command line arguments,
499 : * and process.
500 : */
501 1 : if (0 == strcmp(FLAGS_commands, "-")) {
502 1 : ProcessInputFile(stdin);
503 1 : } else {
504 0 : FILE* input = fopen(FLAGS_commands, "r");
505 0 : if (NULL == input) {
506 0 : Fatal("Can't open commands file: %s\n", FLAGS_commands);
507 : }
508 0 : ProcessInputFile(input);
509 0 : fclose(input);
510 : }
511 1 : } else {
512 : /* Command line should specify an executable to disassemble.
513 : * Read the file and disassemble it.
514 : */
515 : ncfile *ncf;
516 0 : const char* filename = GrokArgv(new_argc, argv);
517 :
518 0 : Info("processing %s", filename);
519 0 : ncf = nc_loadfile_depending(filename, NULL);
520 0 : if (ncf == NULL) {
521 0 : Fatal("nc_loadfile(%s): %s\n", filename, strerror(errno));
522 : }
523 :
524 0 : AnalyzeCodeSegments(ncf, filename);
525 :
526 0 : nc_freefile(ncf);
527 : }
528 1 : }
529 :
530 1 : int main(int argc, const char *argv[]) {
531 : struct GioFile gout_file;
532 1 : struct Gio* gout = (struct Gio*) &gout_file;
533 1 : if (!GioFileRefCtor(&gout_file, stdout)) {
534 0 : fprintf(stderr, "Unable to create gio file for stdout!\n");
535 0 : return 1;
536 : }
537 1 : NaClLogModuleInitExtended(LOG_INFO, gout);
538 1 : ProcessCommandLine(argc, argv);
539 1 : NaClLogModuleFini();
540 1 : GioFileDtor(gout);
541 1 : return 0;
542 1 : }
|