/* * stp_tracepoint.c * * SystemTap layer for registering various kernel tracepoint APIs. * * Copyright (C) 2014-2018 Red Hat Inc. * * Initially derived from lttng-tracepoints.c: * LTTng adaptation layer for Linux kernel 3.15+ tracepoints. * * Copyright (C) 2014 Mathieu Desnoyers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; only * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include "stp_tracepoint.h" /* * Protect the tracepoint table. stp_tracepoint_mutex nests within * kernel/tracepoint.c tp_modlist_mutex. kernel/tracepoint.c * tracepoint_mutex nests within stp_tracepoint_mutex. */ static DEFINE_MUTEX(stp_tracepoint_mutex); #define TRACEPOINT_HASH_BITS 6 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; /* * The tracepoint entry is the node contained within the hash table. It * is a mapping from the "string" key to the struct tracepoint pointer. */ struct tracepoint_entry { struct hlist_node hlist; struct tracepoint *tp; int refcount; struct list_head probes; char name[0]; }; struct stp_tp_probe { struct tracepoint_func tp_func; struct list_head list; }; static int add_probe(struct tracepoint_entry *e, void *probe, void *data) { struct stp_tp_probe *p; int found = 0; list_for_each_entry(p, &e->probes, list) { if (p->tp_func.func == probe && p->tp_func.data == data) { found = 1; break; } } if (found) return -EEXIST; p = _stp_kmalloc_gfp(sizeof(struct stp_tp_probe), STP_ALLOC_SLEEP_FLAGS); if (!p) return -ENOMEM; p->tp_func.func = probe; p->tp_func.data = data; list_add(&p->list, &e->probes); return 0; } static int remove_probe(struct tracepoint_entry *e, void *probe, void *data) { struct stp_tp_probe *p; int found = 0; list_for_each_entry(p, &e->probes, list) { if (p->tp_func.func == probe && p->tp_func.data == data) { found = 1; break; } } if (found) { list_del(&p->list); _stp_kfree(p); return 0; } else { return -ENOENT; } } /* * Get tracepoint if the tracepoint is present in the tracepoint hash table. * Must be called with stp_tracepoint_mutex held. * Returns NULL if not present. */ static struct tracepoint_entry *get_tracepoint(const char *name) { struct hlist_head *head; struct tracepoint_entry *e; u32 hash = jhash(name, strlen(name), 0); head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; hlist_for_each_entry(e, head, hlist) { if (!strcmp(name, e->name)) return e; } return NULL; } /* * Add the tracepoint to the tracepoint hash table. Must be called with * stp_tracepoint_mutex held. */ static struct tracepoint_entry *add_tracepoint(const char *name) { struct hlist_head *head; struct tracepoint_entry *e; size_t name_len = strlen(name) + 1; u32 hash = jhash(name, name_len - 1, 0); head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; hlist_for_each_entry(e, head, hlist) { if (!strcmp(name, e->name)) { return ERR_PTR(-EEXIST); /* Already there */ } } /* * Using kmalloc here to allocate a variable length element. Could * cause some memory fragmentation if overused. */ e = _stp_kmalloc_gfp(sizeof(struct tracepoint_entry) + name_len, STP_ALLOC_SLEEP_FLAGS); if (!e) return ERR_PTR(-ENOMEM); memcpy(&e->name[0], name, name_len); e->tp = NULL; e->refcount = 0; INIT_LIST_HEAD(&e->probes); hlist_add_head(&e->hlist, head); return e; } /* * Remove the tracepoint from the tracepoint hash table. Must be called * with stp_tracepoint_mutex held. */ static void remove_tracepoint(struct tracepoint_entry *e) { hlist_del(&e->hlist); _stp_kfree(e); } static void do_tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) { int ret = tracepoint_probe_register(tp, probe, data); if (ret) { dbug_tp(1, "error (%d) registering probe '%s'\n", ret, tp->name); } else { dbug_tp(2, "registered probe '%s'\n", tp->name); } } static void do_tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) { int ret = tracepoint_probe_unregister(tp, probe, data); if (ret) { dbug_tp(1, "error (%d) unregistering probe '%s'\n", ret, tp->name); } else { dbug_tp(2, "unregistered probe '%s'\n", tp->name); } } int stp_tracepoint_probe_register(const char *name, void *probe, void *data) { struct tracepoint_entry *e; int ret = 0; might_sleep(); mutex_lock(&stp_tracepoint_mutex); e = get_tracepoint(name); if (!e) { e = add_tracepoint(name); if (IS_ERR(e)) { ret = PTR_ERR(e); goto end; } } /* add (probe, data) to entry */ ret = add_probe(e, probe, data); if (ret) goto end; e->refcount++; dbug_tp(2, "added probe on '%s'\n", name); if (e->tp) { do_tracepoint_probe_register(e->tp, probe, data); } end: mutex_unlock(&stp_tracepoint_mutex); if (ret) { dbug_tp(1, "error (%d) adding probe on '%s'\n", ret, name); } return ret; } int stp_tracepoint_probe_unregister(const char *name, void *probe, void *data) { struct tracepoint_entry *e; int ret = 0; might_sleep(); mutex_lock(&stp_tracepoint_mutex); e = get_tracepoint(name); if (!e) { ret = -ENOENT; goto end; } /* remove (probe, data) from entry */ ret = remove_probe(e, probe, data); if (ret) goto end; if (e->tp) { do_tracepoint_probe_unregister(e->tp, probe, data); } if (!--e->refcount) remove_tracepoint(e); dbug_tp(2, "removed probe on '%s'\n", name); end: mutex_unlock(&stp_tracepoint_mutex); if (ret) { dbug_tp(1, "error (%d) removing probe on '%s'\n", ret, name); } return ret; } #ifdef CONFIG_MODULES static int stp_tracepoint_coming(struct tp_module *tp_mod) { int i; might_sleep(); mutex_lock(&stp_tracepoint_mutex); for (i = 0; i < tp_mod->mod->num_tracepoints; i++) { struct tracepoint *tp; struct tracepoint_entry *e; struct stp_tp_probe *p; #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS tp = offset_to_ptr(&(((const int*)(tp_mod->mod->tracepoints_ptrs))[i])); #else tp = tp_mod->mod->tracepoints_ptrs[i]; #endif e = get_tracepoint(tp->name); if (!e) { e = add_tracepoint(tp->name); if (IS_ERR(e)) { dbug_tp(1, "error (%ld) adding %s:%s\n", PTR_ERR(e), tp_mod->mod->name, tp->name); continue; } } /* If already enabled, just check consistency */ if (e->tp) { if (e->tp != tp) { dbug_tp(1, "found duplicate '%s'\n", tp->name); } continue; } e->tp = tp; e->refcount++; dbug_tp(2, "added %s:%s\n", tp_mod->mod->name, tp->name); /* register each (probe, data) */ list_for_each_entry(p, &e->probes, list) { do_tracepoint_probe_register(e->tp, p->tp_func.func, p->tp_func.data); } } mutex_unlock(&stp_tracepoint_mutex); return 0; } static int stp_tracepoint_going(struct tp_module *tp_mod) { int i; might_sleep(); mutex_lock(&stp_tracepoint_mutex); for (i = 0; i < tp_mod->mod->num_tracepoints; i++) { struct tracepoint *tp; struct tracepoint_entry *e; struct stp_tp_probe *p; #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS tp = offset_to_ptr(&(((const int*)(tp_mod->mod->tracepoints_ptrs))[i])); #else tp = tp_mod->mod->tracepoints_ptrs[i]; #endif e = get_tracepoint(tp->name); if (!e || !e->tp) continue; /* unregister each (probe, data) */ list_for_each_entry(p, &e->probes, list) { do_tracepoint_probe_unregister(e->tp, p->tp_func.func, p->tp_func.data); } e->tp = NULL; if (!--e->refcount) remove_tracepoint(e); dbug_tp(2, "removed %s:%s\n", tp_mod->mod->name, tp->name); } mutex_unlock(&stp_tracepoint_mutex); return 0; } static int stp_tracepoint_notify(struct notifier_block *self, unsigned long val, void *data) { struct tp_module *tp_mod = data; int ret = 0; switch (val) { case MODULE_STATE_COMING: ret = stp_tracepoint_coming(tp_mod); break; case MODULE_STATE_GOING: ret = stp_tracepoint_going(tp_mod); break; default: break; } return ret; } static struct notifier_block stp_tracepoint_notifier = { .notifier_call = stp_tracepoint_notify, .priority = 0, }; static int stp_tracepoint_module_init(void) { return register_tracepoint_module_notifier(&stp_tracepoint_notifier); } static void stp_tracepoint_module_exit(void) { (void)unregister_tracepoint_module_notifier(&stp_tracepoint_notifier); } #else /* #ifdef CONFIG_MODULES */ static int stp_tracepoint_module_init(void) { return 0; } static void stp_tracepoint_module_exit(void) { } #endif /* #else #ifdef CONFIG_MODULES */ static void stp_kernel_tracepoint_add(struct tracepoint *tp, void *priv) { struct tracepoint_entry *e; struct stp_tp_probe *p; int *ret = priv; might_sleep(); mutex_lock(&stp_tracepoint_mutex); e = get_tracepoint(tp->name); if (!e) { e = add_tracepoint(tp->name); if (IS_ERR(e)) { dbug_tp(1, "error (%ld) adding kernel:%s\n", PTR_ERR(e), tp->name); *ret = (int) PTR_ERR(e); goto end; } } /* If already enabled, just check consistency */ if (e->tp) { if (e->tp != tp) { dbug_tp(1, "found duplicate '%s'\n", tp->name); } goto end; } e->tp = tp; e->refcount++; dbug_tp(2, "added kernel:%s\n", tp->name); /* register each (probe, data) */ list_for_each_entry(p, &e->probes, list) { do_tracepoint_probe_register(e->tp, p->tp_func.func, p->tp_func.data); } end: mutex_unlock(&stp_tracepoint_mutex); } static void stp_kernel_tracepoint_remove(struct tracepoint *tp, void *priv) { struct tracepoint_entry *e; int *ret = priv; might_sleep(); mutex_lock(&stp_tracepoint_mutex); e = get_tracepoint(tp->name); if (!e || e->refcount != 1 || !list_empty(&e->probes)) { *ret = -EINVAL; goto end; } remove_tracepoint(e); dbug_tp(2, "removed kernel:%s\n", tp->name); end: mutex_unlock(&stp_tracepoint_mutex); } static int stp_tracepoint_init(void) { int error_ret = 0; int ret = 0; for_each_kernel_tracepoint(stp_kernel_tracepoint_add, &ret); if (!ret) ret = stp_tracepoint_module_init(); if (!ret) return 0; for_each_kernel_tracepoint(stp_kernel_tracepoint_remove, &error_ret); return ret; } static void stp_tracepoint_exit(void) { int i, ret = 0; stp_tracepoint_module_exit(); for_each_kernel_tracepoint(stp_kernel_tracepoint_remove, &ret); might_sleep(); mutex_lock(&stp_tracepoint_mutex); for (i = 0; i < TRACEPOINT_TABLE_SIZE; i++) { struct hlist_head *head = &tracepoint_table[i]; /* All tracepoints should be removed */ if (!hlist_empty(head)) { dbug_tp(1, "tracepoint_table[%d] is not empty!\n", i); } } mutex_unlock(&stp_tracepoint_mutex); }