// SPDX-License-Identifier: GPL-2.0 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "build-id.h" #include "event.h" #include "debug.h" #include "evlist.h" #include "namespaces.h" #include "symbol.h" #include #include "tsc.h" #include "session.h" #include "jit.h" #include "jitdump.h" #include "genelf.h" #include "thread.h" #include #include struct jit_buf_desc { struct perf_data *output; struct perf_session *session; struct machine *machine; struct nsinfo *nsi; union jr_entry *entry; void *buf; uint64_t sample_type; size_t bufsize; FILE *in; bool needs_bswap; /* handles cross-endianness */ bool use_arch_timestamp; void *debug_data; void *unwinding_data; uint64_t unwinding_size; uint64_t unwinding_mapped_size; uint64_t eh_frame_hdr_size; size_t nr_debug_entries; uint32_t code_load_count; u64 bytes_written; struct rb_root code_root; char dir[PATH_MAX]; }; struct jit_tool { struct perf_tool tool; struct perf_data output; struct perf_data input; u64 bytes_written; }; #define hmax(a, b) ((a) > (b) ? (a) : (b)) #define get_jit_tool(t) (container_of(tool, struct jit_tool, tool)) static int jit_emit_elf(struct jit_buf_desc *jd, char *filename, const char *sym, uint64_t code_addr, const void *code, int csize, void *debug, int nr_debug_entries, void *unwinding, uint32_t unwinding_header_size, uint32_t unwinding_size) { int ret, fd, saved_errno; struct nscookie nsc; if (verbose > 0) fprintf(stderr, "write ELF image %s\n", filename); nsinfo__mountns_enter(jd->nsi, &nsc); fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644); saved_errno = errno; nsinfo__mountns_exit(&nsc); if (fd == -1) { pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(saved_errno)); return -1; } ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries, unwinding, unwinding_header_size, unwinding_size); close(fd); if (ret) { nsinfo__mountns_enter(jd->nsi, &nsc); unlink(filename); nsinfo__mountns_exit(&nsc); } return ret; } static void jit_close(struct jit_buf_desc *jd) { if (!(jd && jd->in)) return; funlockfile(jd->in); fclose(jd->in); jd->in = NULL; } static int jit_validate_events(struct perf_session *session) { struct evsel *evsel; /* * check that all events use CLOCK_MONOTONIC */ evlist__for_each_entry(session->evlist, evsel) { if (evsel->core.attr.use_clockid == 0 || evsel->core.attr.clockid != CLOCK_MONOTONIC) return -1; } return 0; } static int jit_open(struct jit_buf_desc *jd, const char *name) { struct jitheader header; struct nscookie nsc; struct jr_prefix *prefix; ssize_t bs, bsz = 0; void *n, *buf = NULL; int ret, retval = -1; nsinfo__mountns_enter(jd->nsi, &nsc); jd->in = fopen(name, "r"); nsinfo__mountns_exit(&nsc); if (!jd->in) return -1; bsz = hmax(sizeof(header), sizeof(*prefix)); buf = malloc(bsz); if (!buf) goto error; /* * protect from writer modifying the file while we are reading it */ flockfile(jd->in); ret = fread(buf, sizeof(header), 1, jd->in); if (ret != 1) goto error; memcpy(&header, buf, sizeof(header)); if (header.magic != JITHEADER_MAGIC) { if (header.magic != JITHEADER_MAGIC_SW) goto error; jd->needs_bswap = true; } if (jd->needs_bswap) { header.version = bswap_32(header.version); header.total_size = bswap_32(header.total_size); header.pid = bswap_32(header.pid); header.elf_mach = bswap_32(header.elf_mach); header.timestamp = bswap_64(header.timestamp); header.flags = bswap_64(header.flags); } jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP; if (verbose > 2) pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n", header.version, header.total_size, (unsigned long long)header.timestamp, header.pid, header.elf_mach, jd->use_arch_timestamp); if (header.version > JITHEADER_VERSION) { pr_err("wrong jitdump version %u, expected " __stringify(JITHEADER_VERSION), header.version); goto error; } if (header.flags & JITDUMP_FLAGS_RESERVED) { pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED); goto error; } if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) { pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n"); goto error; } /* * validate event is using the correct clockid */ if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) { pr_err("error, jitted code must be sampled with perf record -k 1\n"); goto error; } bs = header.total_size - sizeof(header); if (bs > bsz) { n = realloc(buf, bs); if (!n) goto error; bsz = bs; buf = n; /* read extra we do not know about */ ret = fread(buf, bs - bsz, 1, jd->in); if (ret != 1) goto error; } /* * keep dirname for generating files and mmap records */ strcpy(jd->dir, name); dirname(jd->dir); free(buf); return 0; error: free(buf); funlockfile(jd->in); fclose(jd->in); return retval; } static union jr_entry * jit_get_next_entry(struct jit_buf_desc *jd) { struct jr_prefix *prefix; union jr_entry *jr; void *addr; size_t bs, size; int id, ret; if (!(jd && jd->in)) return NULL; if (jd->buf == NULL) { size_t sz = getpagesize(); if (sz < sizeof(*prefix)) sz = sizeof(*prefix); jd->buf = malloc(sz); if (jd->buf == NULL) return NULL; jd->bufsize = sz; } prefix = jd->buf; /* * file is still locked at this point */ ret = fread(prefix, sizeof(*prefix), 1, jd->in); if (ret != 1) return NULL; if (jd->needs_bswap) { prefix->id = bswap_32(prefix->id); prefix->total_size = bswap_32(prefix->total_size); prefix->timestamp = bswap_64(prefix->timestamp); } id = prefix->id; size = prefix->total_size; bs = (size_t)size; if (bs < sizeof(*prefix)) return NULL; if (id >= JIT_CODE_MAX) { pr_warning("next_entry: unknown record type %d, skipping\n", id); } if (bs > jd->bufsize) { void *n; n = realloc(jd->buf, bs); if (!n) return NULL; jd->buf = n; jd->bufsize = bs; } addr = ((void *)jd->buf) + sizeof(*prefix); ret = fread(addr, bs - sizeof(*prefix), 1, jd->in); if (ret != 1) return NULL; jr = (union jr_entry *)jd->buf; switch(id) { case JIT_CODE_DEBUG_INFO: if (jd->needs_bswap) { uint64_t n; jr->info.code_addr = bswap_64(jr->info.code_addr); jr->info.nr_entry = bswap_64(jr->info.nr_entry); for (n = 0 ; n < jr->info.nr_entry; n++) { jr->info.entries[n].addr = bswap_64(jr->info.entries[n].addr); jr->info.entries[n].lineno = bswap_32(jr->info.entries[n].lineno); jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim); } } break; case JIT_CODE_UNWINDING_INFO: if (jd->needs_bswap) { jr->unwinding.unwinding_size = bswap_64(jr->unwinding.unwinding_size); jr->unwinding.eh_frame_hdr_size = bswap_64(jr->unwinding.eh_frame_hdr_size); jr->unwinding.mapped_size = bswap_64(jr->unwinding.mapped_size); } break; case JIT_CODE_CLOSE: break; case JIT_CODE_LOAD: if (jd->needs_bswap) { jr->load.pid = bswap_32(jr->load.pid); jr->load.tid = bswap_32(jr->load.tid); jr->load.vma = bswap_64(jr->load.vma); jr->load.code_addr = bswap_64(jr->load.code_addr); jr->load.code_size = bswap_64(jr->load.code_size); jr->load.code_index= bswap_64(jr->load.code_index); } jd->code_load_count++; break; case JIT_CODE_MOVE: if (jd->needs_bswap) { jr->move.pid = bswap_32(jr->move.pid); jr->move.tid = bswap_32(jr->move.tid); jr->move.vma = bswap_64(jr->move.vma); jr->move.old_code_addr = bswap_64(jr->move.old_code_addr); jr->move.new_code_addr = bswap_64(jr->move.new_code_addr); jr->move.code_size = bswap_64(jr->move.code_size); jr->move.code_index = bswap_64(jr->move.code_index); } break; case JIT_CODE_MAX: default: /* skip unknown record (we have read them) */ break; } return jr; } static int jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) { ssize_t size; size = perf_data__write(jd->output, event, event->header.size); if (size < 0) return -1; jd->bytes_written += size; return 0; } static pid_t jr_entry_pid(struct jit_buf_desc *jd, union jr_entry *jr) { if (jd->nsi && nsinfo__in_pidns(jd->nsi)) return nsinfo__tgid(jd->nsi); return jr->load.pid; } static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr) { if (jd->nsi && nsinfo__in_pidns(jd->nsi)) return nsinfo__pid(jd->nsi); return jr->load.tid; } static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) { struct perf_tsc_conversion tc = { .time_shift = 0, }; struct perf_record_time_conv *time_conv = &jd->session->time_conv; if (!jd->use_arch_timestamp) return timestamp; tc.time_shift = time_conv->time_shift; tc.time_mult = time_conv->time_mult; tc.time_zero = time_conv->time_zero; /* * The event TIME_CONV was extended for the fields from "time_cycles" * when supported cap_user_time_short, for backward compatibility, * checks the event size and assigns these extended fields if these * fields are contained in the event. */ if (event_contains(*time_conv, time_cycles)) { tc.time_cycles = time_conv->time_cycles; tc.time_mask = time_conv->time_mask; tc.cap_user_time_zero = time_conv->cap_user_time_zero; tc.cap_user_time_short = time_conv->cap_user_time_short; if (!tc.cap_user_time_zero) return 0; } return tsc_to_perf_time(timestamp, &tc); } static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; union perf_event *event; const struct perf_tool *tool = jd->session->tool; uint64_t code, addr; uintptr_t uaddr; char *filename; struct stat st; size_t size; u16 idr_size; const char *sym; uint64_t count; int ret, csize, usize; pid_t nspid, pid, tid; struct { u32 pid, tid; u64 time; } *id; nspid = jr->load.pid; pid = jr_entry_pid(jd, jr); tid = jr_entry_tid(jd, jr); csize = jr->load.code_size; usize = jd->unwinding_mapped_size; addr = jr->load.code_addr; sym = (void *)((unsigned long)jr + sizeof(jr->load)); code = (unsigned long)jr + jr->load.p.total_size - csize; count = jr->load.code_index; idr_size = jd->machine->id_hdr_size; event = calloc(1, sizeof(*event) + idr_size); if (!event) return -1; filename = event->mmap2.filename; size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", jd->dir, nspid, count); size++; /* for \0 */ size = PERF_ALIGN(size, sizeof(u64)); uaddr = (uintptr_t)code; ret = jit_emit_elf(jd, filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries, jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size); if (jd->debug_data && jd->nr_debug_entries) { zfree(&jd->debug_data); jd->nr_debug_entries = 0; } if (jd->unwinding_data && jd->eh_frame_hdr_size) { zfree(&jd->unwinding_data); jd->eh_frame_hdr_size = 0; jd->unwinding_mapped_size = 0; jd->unwinding_size = 0; } if (ret) { free(event); return -1; } if (nsinfo__stat(filename, &st, jd->nsi)) memset(&st, 0, sizeof(st)); event->mmap2.header.type = PERF_RECORD_MMAP2; event->mmap2.header.misc = PERF_RECORD_MISC_USER; event->mmap2.header.size = (sizeof(event->mmap2) - (sizeof(event->mmap2.filename) - size) + idr_size); event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; event->mmap2.start = addr; event->mmap2.len = usize ? ALIGN_8(csize) + usize : csize; event->mmap2.pid = pid; event->mmap2.tid = tid; event->mmap2.ino = st.st_ino; event->mmap2.maj = major(st.st_dev); event->mmap2.min = minor(st.st_dev); event->mmap2.prot = st.st_mode; event->mmap2.flags = MAP_SHARED; event->mmap2.ino_generation = 1; id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); if (jd->sample_type & PERF_SAMPLE_TID) { id->pid = pid; id->tid = tid; } if (jd->sample_type & PERF_SAMPLE_TIME) id->time = convert_timestamp(jd, jr->load.p.timestamp); /* * create pseudo sample to induce dso hit increment * use first address as sample address */ memset(&sample, 0, sizeof(sample)); sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; sample.time = id->time; sample.ip = addr; ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); if (ret) goto out; ret = jit_inject_event(jd, event); /* * mark dso as use to generate buildid in the header */ if (!ret) build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); out: free(event); return ret; } static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; union perf_event *event; const struct perf_tool *tool = jd->session->tool; char *filename; size_t size; struct stat st; int usize; u16 idr_size; int ret; pid_t nspid, pid, tid; struct { u32 pid, tid; u64 time; } *id; nspid = jr->load.pid; pid = jr_entry_pid(jd, jr); tid = jr_entry_tid(jd, jr); usize = jd->unwinding_mapped_size; idr_size = jd->machine->id_hdr_size; /* * +16 to account for sample_id_all (hack) */ event = calloc(1, sizeof(*event) + 16); if (!event) return -1; filename = event->mmap2.filename; size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", jd->dir, nspid, jr->move.code_index); size++; /* for \0 */ if (nsinfo__stat(filename, &st, jd->nsi)) memset(&st, 0, sizeof(st)); size = PERF_ALIGN(size, sizeof(u64)); event->mmap2.header.type = PERF_RECORD_MMAP2; event->mmap2.header.misc = PERF_RECORD_MISC_USER; event->mmap2.header.size = (sizeof(event->mmap2) - (sizeof(event->mmap2.filename) - size) + idr_size); event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; event->mmap2.start = jr->move.new_code_addr; event->mmap2.len = usize ? ALIGN_8(jr->move.code_size) + usize : jr->move.code_size; event->mmap2.pid = pid; event->mmap2.tid = tid; event->mmap2.ino = st.st_ino; event->mmap2.maj = major(st.st_dev); event->mmap2.min = minor(st.st_dev); event->mmap2.prot = st.st_mode; event->mmap2.flags = MAP_SHARED; event->mmap2.ino_generation = 1; id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); if (jd->sample_type & PERF_SAMPLE_TID) { id->pid = pid; id->tid = tid; } if (jd->sample_type & PERF_SAMPLE_TIME) id->time = convert_timestamp(jd, jr->load.p.timestamp); /* * create pseudo sample to induce dso hit increment * use first address as sample address */ memset(&sample, 0, sizeof(sample)); sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; sample.time = id->time; sample.ip = jr->move.new_code_addr; ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); if (ret) return ret; ret = jit_inject_event(jd, event); if (!ret) build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); return ret; } static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr) { void *data; size_t sz; if (!(jd && jr)) return -1; sz = jr->prefix.total_size - sizeof(jr->info); data = malloc(sz); if (!data) return -1; memcpy(data, &jr->info.entries, sz); jd->debug_data = data; /* * we must use nr_entry instead of size here because * we cannot distinguish actual entry from padding otherwise */ jd->nr_debug_entries = jr->info.nr_entry; return 0; } static int jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr) { void *unwinding_data; uint32_t unwinding_data_size; if (!(jd && jr)) return -1; unwinding_data_size = jr->prefix.total_size - sizeof(jr->unwinding); unwinding_data = malloc(unwinding_data_size); if (!unwinding_data) return -1; memcpy(unwinding_data, &jr->unwinding.unwinding_data, unwinding_data_size); jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size; jd->unwinding_size = jr->unwinding.unwinding_size; jd->unwinding_mapped_size = jr->unwinding.mapped_size; free(jd->unwinding_data); jd->unwinding_data = unwinding_data; return 0; } static int jit_process_dump(struct jit_buf_desc *jd) { union jr_entry *jr; int ret = 0; while ((jr = jit_get_next_entry(jd))) { switch(jr->prefix.id) { case JIT_CODE_LOAD: ret = jit_repipe_code_load(jd, jr); break; case JIT_CODE_MOVE: ret = jit_repipe_code_move(jd, jr); break; case JIT_CODE_DEBUG_INFO: ret = jit_repipe_debug_info(jd, jr); break; case JIT_CODE_UNWINDING_INFO: ret = jit_repipe_unwinding_info(jd, jr); break; default: ret = 0; continue; } } return ret; } static int jit_inject(struct jit_buf_desc *jd, const char *path) { int ret; if (verbose > 0) fprintf(stderr, "injecting: %s\n", path); ret = jit_open(jd, path); if (ret) return -1; ret = jit_process_dump(jd); jit_close(jd); if (verbose > 0) fprintf(stderr, "injected: %s (%d)\n", path, ret); return 0; } /* * File must be with pattern .../jit-XXXX.dump * where XXXX is the PID of the process which did the mmap() * as captured in the RECORD_MMAP record */ static int jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi) { char *p; char *end = NULL; pid_t pid2; if (verbose > 2) fprintf(stderr, "jit marker trying : %s\n", mmap_name); /* * get file name */ p = strrchr(mmap_name, '/'); if (!p) return -1; /* * match prefix */ if (strncmp(p, "/jit-", 5)) return -1; /* * skip prefix */ p += 5; /* * must be followed by a pid */ if (!isdigit(*p)) return -1; pid2 = (int)strtol(p, &end, 10); if (!end) return -1; /* * pid does not match mmap pid * pid==0 in system-wide mode (synthesized) */ if (pid && pid2 != nsinfo__nstgid(nsi)) return -1; /* * validate suffix */ if (strcmp(end, ".dump")) return -1; if (verbose > 0) fprintf(stderr, "jit marker found: %s\n", mmap_name); return 0; } static void jit_add_pid(struct machine *machine, pid_t pid) { struct thread *thread = machine__findnew_thread(machine, pid, pid); if (!thread) { pr_err("%s: thread %d not found or created\n", __func__, pid); return; } thread__set_priv(thread, (void *)true); thread__put(thread); } static bool jit_has_pid(struct machine *machine, pid_t pid) { struct thread *thread = machine__find_thread(machine, pid, pid); void *priv; if (!thread) return false; priv = thread__priv(thread); thread__put(thread); return (bool)priv; } int jit_process(struct perf_session *session, struct perf_data *output, struct machine *machine, const char *filename, pid_t pid, pid_t tid, u64 *nbytes) { struct thread *thread; struct nsinfo *nsi; struct evsel *first; struct jit_buf_desc jd; int ret; thread = machine__findnew_thread(machine, pid, tid); if (thread == NULL) { pr_err("problem processing JIT mmap event, skipping it.\n"); return 0; } nsi = nsinfo__get(thread__nsinfo(thread)); thread__put(thread); /* * first, detect marker mmap (i.e., the jitdump mmap) */ if (jit_detect(filename, pid, nsi)) { nsinfo__put(nsi); /* * Strip //anon*, [anon:* and /memfd:* mmaps if we processed a jitdump for this pid */ if (jit_has_pid(machine, pid) && ((strncmp(filename, "//anon", 6) == 0) || (strncmp(filename, "[anon:", 6) == 0) || (strncmp(filename, "/memfd:", 7) == 0))) return 1; return 0; } memset(&jd, 0, sizeof(jd)); jd.session = session; jd.output = output; jd.machine = machine; jd.nsi = nsi; /* * track sample_type to compute id_all layout * perf sets the same sample type to all events as of now */ first = evlist__first(session->evlist); jd.sample_type = first->core.attr.sample_type; *nbytes = 0; ret = jit_inject(&jd, filename); if (!ret) { jit_add_pid(machine, pid); *nbytes = jd.bytes_written; ret = 1; } nsinfo__put(jd.nsi); free(jd.buf); return ret; }