From svn-commits at oss.oracle.com Thu Jul 3 19:18:23 2008 From: svn-commits at oss.oracle.com (svn-commits at oss.oracle.com) Date: Thu, 03 Jul 2008 19:18:23 -0700 Subject: [Kernelpatches-commits] wjhuang commits r6 - in trunk: . wjhuang wjhuang/marker_patches Message-ID: Author: wjhuang Date: 2008-07-03 19:18:23 -0700 (Thu, 03 Jul 2008) New Revision: 6 Added: trunk/wjhuang/ trunk/wjhuang/marker_patches/ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch Log: created personal directory Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch 2008-07-04 02:18:23 UTC (rev 6) @@ -0,0 +1,1882 @@ +diff --git a/Makefile b/Makefile +index 7682056..d1b7d78 100644 +--- a/Makefile ++++ b/Makefile +@@ -601,7 +601,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -725,11 +725,30 @@ debug_kallsyms: .tmp_map$(last_kallsyms) + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + $(Q)rm -f .old_version + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig +index 7dec8f0..73f833c 100644 +--- a/arch/i386/Kconfig ++++ b/arch/i386/Kconfig +@@ -1205,6 +1205,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/i386/Kconfig.debug" +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index a44d95d..23d73bb 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -61,6 +61,7 @@ SECTIONS + /* writeable */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } :data + +diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig +index c45b7b5..aee46db 100644 +--- a/arch/ia64/Kconfig ++++ b/arch/ia64/Kconfig +@@ -587,6 +587,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/ia64/Kconfig.debug" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index 5b0d5f6..9c63f83 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -206,7 +206,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 41024aa..4ab8d59 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -1095,6 +1095,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/powerpc/Kconfig.debug" +diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S +index c02298a..709bc47 100644 +--- a/arch/powerpc/kernel/vmlinux.lds.S ++++ b/arch/powerpc/kernel/vmlinux.lds.S +@@ -176,11 +176,13 @@ SECTIONS + *(.data) + *(.sdata) + *(.got.plt) *(.got) ++ MARKER + } + #else + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig +index 974474a..c81600c 100644 +--- a/arch/s390/Kconfig ++++ b/arch/s390/Kconfig +@@ -505,6 +505,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/s390/Kconfig.debug" +diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S +index df0c16a..69826b1 100644 +--- a/arch/s390/kernel/vmlinux.lds.S ++++ b/arch/s390/kernel/vmlinux.lds.S +@@ -46,6 +46,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig +index b627f8d..e0467d1 100644 +--- a/arch/sparc64/Kconfig ++++ b/arch/sparc64/Kconfig +@@ -427,6 +427,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/sparc64/Kconfig.debug" +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index b097379..1f10e43 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig +index d284a9a..e556e06 100644 +--- a/arch/x86_64/Kconfig ++++ b/arch/x86_64/Kconfig +@@ -704,6 +704,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/x86_64/Kconfig.debug" +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index f17185f..a33251a 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -67,6 +67,7 @@ SECTIONS + /* Data */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { + *(.data) ++ MARKER + CONSTRUCTORS + } :data + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 6a40707..9b0fab5 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -125,7 +125,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -138,8 +138,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -903,12 +903,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -923,7 +924,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1723,7 +1724,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1756,13 +1757,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1781,7 +1783,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2179,15 +2182,16 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 6ad5ad6..bf93e7e 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -92,7 +92,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -148,7 +149,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -161,12 +162,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h +index 5102c6b..aed4049 100644 +--- a/drivers/scsi/qla4xxx/ql4_fw.h ++++ b/drivers/scsi/qla4xxx/ql4_fw.h +@@ -744,7 +744,7 @@ struct continuation_t1_entry { + #define ET_CONTINUE ET_CONT_T1 + + /* Marker entry structure*/ +-struct marker_entry { ++struct qla4_marker_entry { + struct qla4_header hdr; /* 00-03 */ + + uint32_t system_defined; /* 04-07 */ +diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c +index 4e532b4..43860eb 100644 +--- a/drivers/scsi/qla4xxx/ql4_iocb.c ++++ b/drivers/scsi/qla4xxx/ql4_iocb.c +@@ -65,7 +65,8 @@ int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, + int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha, + struct ddb_entry *ddb_entry, int lun) + { +- struct marker_entry *marker_entry; ++ struct qla4_marker_entry *marker_entry; ++ + unsigned long flags = 0; + uint8_t status = QLA_SUCCESS; + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index b9e964c..a175f1e 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -9,12 +9,21 @@ + /* Align . to a 8 byte boundary equals to maximum function alignment. */ + #define ALIGN_FUNCTION() . = ALIGN(8) + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ ++ + #define RODATA \ + . = ALIGN(4096); \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_rodata) = .; \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index c458418..627afb9 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -348,6 +349,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -467,6 +472,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + + struct device_driver; + void module_add_driver(struct module *, struct device_driver *); +@@ -569,6 +575,10 @@ static inline void print_modules(void) + { + } + ++static inline void module_update_markers(void) ++{ ++} ++ + struct device_driver; + struct module; + +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index b4ca73d..5f12d1b 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +diff --git a/kernel/Makefile b/kernel/Makefile +index ed4af9c..b4ad7a7 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -50,6 +50,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o + obj-$(CONFIG_RELAY) += relay.o + obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o + obj-$(CONFIG_TASKSTATS) += taskstats.o ++obj-$(CONFIG_MARKERS) += marker.o + obj-$(CONFIG_UTRACE) += utrace.o + + ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index f9a5987..ac99222 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1488,6 +1488,8 @@ static struct module *load_module(void __user *umod, + unsigned int unusedcrcindex; + unsigned int unusedgplindex; + unsigned int unusedgplcrcindex; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + struct module *mod; + long err = 0; + void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +@@ -1756,6 +1758,9 @@ static struct module *load_module(void __user *umod, + add_taint(TAINT_FORCED_MODULE); + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1778,6 +1783,11 @@ static struct module *load_module(void __user *umod, + if (err < 0) + goto cleanup; + } ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif + + /* Find duplicate symbols */ + err = verify_export_symbols(mod); +@@ -1796,6 +1806,11 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2259,3 +2274,18 @@ EXPORT_SYMBOL(module_remove_driver); + void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif ++ ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ mutex_lock(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ mutex_unlock(&module_mutex); ++} ++#endif ++ +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index 0a64688..886b70c 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -41,6 +42,10 @@ include scripts/Makefile.lib + + kernelsymfile := $(objtree)/Module.symvers + modulesymfile := $(KBUILD_EXTMOD)/Module.symvers ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) + + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) +@@ -58,16 +63,27 @@ quiet_cmd_modpost = MODPOST + $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ + $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ + $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + PHONY += __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index dfde0e8..81e5910 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + #include "../../include/linux/license.h" +@@ -385,6 +386,8 @@ static void parse_elf(struct elf_info *info, const char *filename) + info->export_unused_gpl_sec = i; + else if (strcmp(secname, "__ksymtab_gpl_future") == 0) + info->export_gpl_future_sec = i; ++ else if (strcmp(secname, "__markers_strings") == 0) ++ info->markers_strings_sec = i; + + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; +@@ -859,6 +862,62 @@ static void check_sec_ref(struct module *mod, const char *modname, + } + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ + /** + * Functions used only during module init is marked __init and is stored in + * a .init.text section. Likewise data is marked __initdata and stored in +@@ -1061,6 +1120,8 @@ static void read_symbols(char *modname) + get_src_version(modname, mod->srcversion, + sizeof(mod->srcversion)-1); + ++ get_markers(&info, mod); ++ + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -1394,6 +1455,93 @@ static void write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if ((!external_module || !mod->skip) && mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ ++ + int main(int argc, char **argv) + { + struct module *mod; +@@ -1402,8 +1550,10 @@ int main(int argc, char **argv) + char *kernel_read = NULL, *module_read = NULL; + char *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:I:mo:a")) != -1) { ++ while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) { + switch(opt) { + case 'i': + kernel_read = optarg; +@@ -1421,6 +1571,12 @@ int main(int argc, char **argv) + case 'a': + all_versions = 1; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -1460,5 +1616,11 @@ int main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index d398c61..27b05e6 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -107,6 +107,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + char srcversion[25]; + }; + +@@ -121,6 +123,7 @@ struct elf_info { + Elf_Section export_gpl_sec; + Elf_Section export_unused_gpl_sec; + Elf_Section export_gpl_future_sec; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch 2008-07-04 02:18:23 UTC (rev 6) @@ -0,0 +1,1882 @@ +diff --git a/Makefile b/Makefile +index 93bffdf..59693ac 100644 +--- a/Makefile ++++ b/Makefile +@@ -603,7 +603,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -727,11 +727,30 @@ debug_kallsyms: .tmp_map$(last_kallsyms) + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + $(Q)rm -f .old_version + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig +index d04fcc6..ad73af5 100644 +--- a/arch/i386/Kconfig ++++ b/arch/i386/Kconfig +@@ -1212,6 +1212,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/i386/Kconfig.debug" +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index 7f4ca6b..3978095 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -67,6 +67,7 @@ SECTIONS + . = ALIGN(4096); + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } :data + +diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig +index f916fba..b81a36b 100644 +--- a/arch/ia64/Kconfig ++++ b/arch/ia64/Kconfig +@@ -591,6 +591,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/ia64/Kconfig.debug" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index 69cba94..2c0a5c9 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -207,7 +207,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 7295252..fb8a2b9 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -1091,6 +1091,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/powerpc/Kconfig.debug" +diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S +index c02298a..709bc47 100644 +--- a/arch/powerpc/kernel/vmlinux.lds.S ++++ b/arch/powerpc/kernel/vmlinux.lds.S +@@ -176,11 +176,13 @@ SECTIONS + *(.data) + *(.sdata) + *(.got.plt) *(.got) ++ MARKER + } + #else + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig +index d121403..d8d276a 100644 +--- a/arch/s390/Kconfig ++++ b/arch/s390/Kconfig +@@ -503,6 +503,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/s390/Kconfig.debug" +diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S +index af9e69a..8443156 100644 +--- a/arch/s390/kernel/vmlinux.lds.S ++++ b/arch/s390/kernel/vmlinux.lds.S +@@ -46,6 +46,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig +index b627f8d..e0467d1 100644 +--- a/arch/sparc64/Kconfig ++++ b/arch/sparc64/Kconfig +@@ -427,6 +427,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/sparc64/Kconfig.debug" +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index b097379..1f10e43 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig +index 651d6cb..0e6570c 100644 +--- a/arch/x86_64/Kconfig ++++ b/arch/x86_64/Kconfig +@@ -711,6 +711,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/x86_64/Kconfig.debug" +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index 57e3255..867fce1 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -68,6 +68,7 @@ SECTIONS + /* Data */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { + *(.data) ++ MARKER + CONSTRUCTORS + } :data + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 3fb354d..e884942 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -126,7 +126,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -139,8 +139,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -912,12 +912,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -932,7 +933,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1732,7 +1733,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1765,13 +1766,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1790,7 +1792,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2187,15 +2190,16 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 6ad5ad6..bf93e7e 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -92,7 +92,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -148,7 +149,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -161,12 +162,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h +index 8c6f9b6..efc3d79 100644 +--- a/drivers/scsi/qla4xxx/ql4_fw.h ++++ b/drivers/scsi/qla4xxx/ql4_fw.h +@@ -671,7 +671,7 @@ struct continuation_t1_entry { + #define ET_CONTINUE ET_CONT_T1 + + /* Marker entry structure*/ +-struct marker_entry { ++struct qla4_marker_entry { + struct qla4_header hdr; /* 00-03 */ + + uint32_t system_defined; /* 04-07 */ +diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c +index d45e4d0..b85a484 100644 +--- a/drivers/scsi/qla4xxx/ql4_iocb.c ++++ b/drivers/scsi/qla4xxx/ql4_iocb.c +@@ -70,7 +70,8 @@ int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, + int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha, + struct ddb_entry *ddb_entry, int lun) + { +- struct marker_entry *marker_entry; ++ struct qla4_marker_entry *marker_entry; ++ + unsigned long flags = 0; + uint8_t status = QLA_SUCCESS; + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index b9e964c..a175f1e 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -9,12 +9,21 @@ + /* Align . to a 8 byte boundary equals to maximum function alignment. */ + #define ALIGN_FUNCTION() . = ALIGN(8) + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ ++ + #define RODATA \ + . = ALIGN(4096); \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_rodata) = .; \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 0460634..de5b51e 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -353,6 +354,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -472,6 +477,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + + struct device_driver; + void module_add_driver(struct module *, struct device_driver *); +@@ -574,6 +580,10 @@ static inline void print_modules(void) + { + } + ++static inline void module_update_markers(void) ++{ ++} ++ + struct device_driver; + struct module; + +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index b4ca73d..5f12d1b 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +diff --git a/kernel/Makefile b/kernel/Makefile +index ef832fa..5879e8b 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -50,6 +50,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o + obj-$(CONFIG_RELAY) += relay.o + obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o + obj-$(CONFIG_TASKSTATS) += taskstats.o ++obj-$(CONFIG_MARKERS) += marker.o + obj-$(CONFIG_UTRACE) += utrace.o + obj-$(CONFIG_PTRACE) += ptrace.o + +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index f9a5987..ac99222 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1488,6 +1488,8 @@ static struct module *load_module(void __user *umod, + unsigned int unusedcrcindex; + unsigned int unusedgplindex; + unsigned int unusedgplcrcindex; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + struct module *mod; + long err = 0; + void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +@@ -1756,6 +1758,9 @@ static struct module *load_module(void __user *umod, + add_taint(TAINT_FORCED_MODULE); + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1778,6 +1783,11 @@ static struct module *load_module(void __user *umod, + if (err < 0) + goto cleanup; + } ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif + + /* Find duplicate symbols */ + err = verify_export_symbols(mod); +@@ -1796,6 +1806,11 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2259,3 +2274,18 @@ EXPORT_SYMBOL(module_remove_driver); + void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif ++ ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ mutex_lock(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ mutex_unlock(&module_mutex); ++} ++#endif ++ +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index 0a64688..886b70c 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -41,6 +42,10 @@ include scripts/Makefile.lib + + kernelsymfile := $(objtree)/Module.symvers + modulesymfile := $(KBUILD_EXTMOD)/Module.symvers ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) + + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) +@@ -58,16 +63,27 @@ quiet_cmd_modpost = MODPOST + $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ + $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ + $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + PHONY += __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 0ec3321..383f310 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + #include "../../include/linux/license.h" +@@ -385,6 +386,8 @@ static void parse_elf(struct elf_info *info, const char *filename) + info->export_unused_gpl_sec = i; + else if (strcmp(secname, "__ksymtab_gpl_future") == 0) + info->export_gpl_future_sec = i; ++ else if (strcmp(secname, "__markers_strings") == 0) ++ info->markers_strings_sec = i; + + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; +@@ -900,6 +903,62 @@ static void check_sec_ref(struct module *mod, const char *modname, + } + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ + /** + * Functions used only during module init is marked __init and is stored in + * a .init.text section. Likewise data is marked __initdata and stored in +@@ -1103,6 +1162,8 @@ static void read_symbols(char *modname) + get_src_version(modname, mod->srcversion, + sizeof(mod->srcversion)-1); + ++ get_markers(&info, mod); ++ + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -1436,6 +1497,93 @@ static void write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if ((!external_module || !mod->skip) && mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ ++ + int main(int argc, char **argv) + { + struct module *mod; +@@ -1444,8 +1592,10 @@ int main(int argc, char **argv) + char *kernel_read = NULL, *module_read = NULL; + char *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:I:mo:a")) != -1) { ++ while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) { + switch(opt) { + case 'i': + kernel_read = optarg; +@@ -1463,6 +1613,12 @@ int main(int argc, char **argv) + case 'a': + all_versions = 1; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -1502,5 +1658,11 @@ int main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index d398c61..27b05e6 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -107,6 +107,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + char srcversion[25]; + }; + +@@ -121,6 +123,7 @@ struct elf_info { + Elf_Section export_gpl_sec; + Elf_Section export_unused_gpl_sec; + Elf_Section export_gpl_future_sec; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch 2008-07-04 02:18:23 UTC (rev 6) @@ -0,0 +1,1917 @@ +diff --git a/Makefile b/Makefile +index ce04b1f..f059302 100644 +--- a/Makefile ++++ b/Makefile +@@ -581,7 +581,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -686,10 +686,30 @@ $(KALLSYMS): scripts ; + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug +index cf069b7..d39d5c5 100644 +--- a/arch/i386/Kconfig.debug ++++ b/arch/i386/Kconfig.debug +@@ -29,6 +29,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index d6cc9c0..7bbda2e 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -52,6 +52,7 @@ SECTIONS + /* writeable */ + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug +index 2782b48..17d4a44 100644 +--- a/arch/ia64/Kconfig.debug ++++ b/arch/ia64/Kconfig.debug +@@ -12,6 +12,12 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. + + choice + prompt "Physical memory granularity" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index a676e79..c6ea47e 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -193,7 +193,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug +index 860472a..5c3a9f5 100644 +--- a/arch/ppc64/Kconfig.debug ++++ b/arch/ppc64/Kconfig.debug +@@ -16,6 +16,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S +index 4103cc1..76d0bf0 100644 +--- a/arch/ppc64/kernel/vmlinux.lds.S ++++ b/arch/ppc64/kernel/vmlinux.lds.S +@@ -118,6 +118,7 @@ SECTIONS + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug +index cd8d39f..f39bf8d 100644 +--- a/arch/sparc64/Kconfig.debug ++++ b/arch/sparc64/Kconfig.debug +@@ -21,6 +21,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_DCFLUSH + bool "D-cache flush debugging" + depends on DEBUG_KERNEL +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index a710d38..d4b3b62 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug +index cb33186..d0260cb 100644 +--- a/arch/x86_64/Kconfig.debug ++++ b/arch/x86_64/Kconfig.debug +@@ -55,6 +55,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on DEBUG_KERNEL +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index f656f19..12ce116 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -33,6 +33,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 54fb0a0..cbdcd93 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -158,7 +158,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -171,8 +171,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -945,12 +945,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -965,7 +966,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1762,7 +1763,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1795,13 +1796,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1820,7 +1822,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2208,15 +2211,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 4119f0f..7e770ff 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -105,7 +105,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -161,7 +162,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -174,12 +175,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index bb340cb..fd6e57f 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -6,10 +6,18 @@ + #define VMLINUX_SYMBOL(_sym_) _sym_ + #endif + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ + #define RODATA \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 8da8948..2ad5efd 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -328,6 +329,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + #else /* !CONFIG_MODULES... */ + #define EXPORT_SYMBOL(sym) + #define EXPORT_SYMBOL_GPL(sym) +@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) + static inline void print_modules(void) + { + } ++ ++static inline void module_update_markers(void) ++{ ++} + #endif /* CONFIG_MODULES */ + + #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 91057d6..bcd0acb 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +@@ -102,6 +115,7 @@ struct rcu_data { + struct rcu_head *donelist; + struct rcu_head **donetail; + int cpu; ++ struct rcu_head barrier; + }; + + DECLARE_PER_CPU(struct rcu_data, rcu_data); +@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, + extern void FASTCALL(call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head))); + extern void synchronize_kernel(void); +- ++extern void rcu_barrier(void); + #endif /* __KERNEL__ */ + #endif /* __LINUX_RCUPDATE_H */ +diff --git a/kernel/Makefile b/kernel/Makefile +index 0b8c8ca..f8248bc 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o + obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_MARKERS) += marker.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index 18b39bc..096c3dc 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1504,6 +1504,8 @@ static struct module *load_module(void __user *umod, + void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + struct exception_table_entry *extable; + int gpgsig_ok; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); +@@ -1734,6 +1736,9 @@ static struct module *load_module(void __user *umod, + tainted |= TAINT_FORCED_MODULE; + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1757,6 +1762,12 @@ static struct module *load_module(void __user *umod, + goto cleanup; + } + ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif ++ + /* Set up and sort exception table */ + mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); + mod->extable = extable = (void *)sechdrs[exindex].sh_addr; +@@ -1768,6 +1779,12 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif ++ + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2175,6 +2192,20 @@ void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif + ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ down(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ up(&module_mutex); ++} ++#endif ++ + static int __init modules_init(void) + { + return subsystem_register(&module_subsys); +diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c +index 1b16bfc..20ade31 100644 +--- a/kernel/rcupdate.c ++++ b/kernel/rcupdate.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + /* Definition for rcupdate control block. */ + struct rcu_ctrlblk rcu_ctrlblk = +@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, + local_irq_restore(flags); + } + ++static atomic_t rcu_barrier_cpu_count; ++static DEFINE_MUTEX(rcu_barrier_mutex); ++static struct completion rcu_barrier_completion; ++ + /** + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. +@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, + return NOTIFY_OK; + } + ++static void rcu_barrier_callback(struct rcu_head *notused) ++{ ++ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) ++ complete(&rcu_barrier_completion); ++} ++ ++/* ++ * Called with preemption disabled, and from cross-cpu IRQ context. ++ */ ++static void rcu_barrier_func(void *notused) ++{ ++ int cpu = smp_processor_id(); ++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); ++ struct rcu_head *head; ++ ++ head = &rdp->barrier; ++ atomic_inc(&rcu_barrier_cpu_count); ++ call_rcu(head, rcu_barrier_callback); ++} ++ ++/** ++ * rcu_barrier - Wait until all the in-flight RCUs are complete. ++ */ ++void rcu_barrier(void) ++{ ++ BUG_ON(in_interrupt()); ++ /* Take cpucontrol mutex to protect against CPU hotplug */ ++ mutex_lock(&rcu_barrier_mutex); ++ init_completion(&rcu_barrier_completion); ++ atomic_set(&rcu_barrier_cpu_count, 0); ++ on_each_cpu(rcu_barrier_func, NULL, 0, 1); ++ wait_for_completion(&rcu_barrier_completion); ++ mutex_unlock(&rcu_barrier_mutex); ++} ++EXPORT_SYMBOL_GPL(rcu_barrier); ++ ++ ++ + static struct notifier_block __devinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, + }; +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index b3d31b5..b100a32 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -40,6 +41,11 @@ include scripts/Makefile.lib + + symverfile := $(objtree)/Module.symvers + ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) ++ + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) + modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) +@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST + cmd_modpost = scripts/mod/modpost \ + $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + .PHONY: __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 2a174e5..c25948c 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + +@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) + if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { + info->modinfo = (void *)hdr + sechdrs[i].sh_offset; + info->modinfo_len = sechdrs[i].sh_size; +- } ++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) ++ info->markers_strings_sec = i; ++ + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + +@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) + return strcmp(myname, "vmlinux") == 0; + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ ++ + void + read_symbols(char *modname) + { +@@ -426,6 +486,7 @@ read_symbols(char *modname) + } + maybe_frob_version(modname, info.modinfo, info.modinfo_len, + (void *)info.modinfo - (void *)info.hdr); ++ get_markers(&info, mod); + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -682,6 +743,92 @@ write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if (mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ + int + main(int argc, char **argv) + { +@@ -690,8 +837,10 @@ main(int argc, char **argv) + char fname[SZ]; + char *dump_read = NULL, *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { ++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { + switch(opt) { + case 'i': + dump_read = optarg; +@@ -702,6 +851,12 @@ main(int argc, char **argv) + case 'o': + dump_write = optarg; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -732,6 +887,12 @@ main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } + +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index 4871343..d79d7ea 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -18,6 +18,7 @@ + #define Elf_Sym Elf32_Sym + #define ELF_ST_BIND ELF32_ST_BIND + #define ELF_ST_TYPE ELF32_ST_TYPE ++#define Elf_Section Elf32_Half + + #else + +@@ -26,7 +27,7 @@ + #define Elf_Sym Elf64_Sym + #define ELF_ST_BIND ELF64_ST_BIND + #define ELF_ST_TYPE ELF64_ST_TYPE +- ++#define Elf_Section Elf64_Half + #endif + + #if KERNEL_ELFDATA != HOST_ELFDATA +@@ -77,6 +78,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + }; + + struct elf_info { +@@ -85,6 +88,7 @@ struct elf_info { + Elf_Shdr *sechdrs; + Elf_Sym *symtab_start; + Elf_Sym *symtab_stop; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch 2008-07-04 02:18:23 UTC (rev 6) @@ -0,0 +1,1917 @@ +diff --git a/Makefile b/Makefile +index 5bfc101..d495f16 100644 +--- a/Makefile ++++ b/Makefile +@@ -583,7 +583,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -688,10 +688,30 @@ $(KALLSYMS): scripts ; + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug +index cf069b7..d39d5c5 100644 +--- a/arch/i386/Kconfig.debug ++++ b/arch/i386/Kconfig.debug +@@ -29,6 +29,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index e8db99c..b846b21 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -53,6 +53,7 @@ SECTIONS + /* writeable */ + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug +index 2782b48..17d4a44 100644 +--- a/arch/ia64/Kconfig.debug ++++ b/arch/ia64/Kconfig.debug +@@ -12,6 +12,12 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. + + choice + prompt "Physical memory granularity" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index a676e79..c6ea47e 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -193,7 +193,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug +index 860472a..5c3a9f5 100644 +--- a/arch/ppc64/Kconfig.debug ++++ b/arch/ppc64/Kconfig.debug +@@ -16,6 +16,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S +index 4103cc1..76d0bf0 100644 +--- a/arch/ppc64/kernel/vmlinux.lds.S ++++ b/arch/ppc64/kernel/vmlinux.lds.S +@@ -118,6 +118,7 @@ SECTIONS + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug +index cd8d39f..f39bf8d 100644 +--- a/arch/sparc64/Kconfig.debug ++++ b/arch/sparc64/Kconfig.debug +@@ -21,6 +21,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_DCFLUSH + bool "D-cache flush debugging" + depends on DEBUG_KERNEL +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index a710d38..d4b3b62 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug +index cb33186..d0260cb 100644 +--- a/arch/x86_64/Kconfig.debug ++++ b/arch/x86_64/Kconfig.debug +@@ -55,6 +55,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on DEBUG_KERNEL +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index 053c826..3b3928f 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -33,6 +33,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 181f3d2..c7c5dc6 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -158,7 +158,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -171,8 +171,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -945,12 +945,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -965,7 +966,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1762,7 +1763,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1795,13 +1796,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1820,7 +1822,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2208,15 +2211,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 4c60b17..2bb477e 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -105,7 +105,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -161,7 +162,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -174,12 +175,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index bb340cb..fd6e57f 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -6,10 +6,18 @@ + #define VMLINUX_SYMBOL(_sym_) _sym_ + #endif + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ + #define RODATA \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 8da8948..2ad5efd 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -328,6 +329,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + #else /* !CONFIG_MODULES... */ + #define EXPORT_SYMBOL(sym) + #define EXPORT_SYMBOL_GPL(sym) +@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) + static inline void print_modules(void) + { + } ++ ++static inline void module_update_markers(void) ++{ ++} + #endif /* CONFIG_MODULES */ + + #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 91057d6..bcd0acb 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +@@ -102,6 +115,7 @@ struct rcu_data { + struct rcu_head *donelist; + struct rcu_head **donetail; + int cpu; ++ struct rcu_head barrier; + }; + + DECLARE_PER_CPU(struct rcu_data, rcu_data); +@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, + extern void FASTCALL(call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head))); + extern void synchronize_kernel(void); +- ++extern void rcu_barrier(void); + #endif /* __KERNEL__ */ + #endif /* __LINUX_RCUPDATE_H */ +diff --git a/kernel/Makefile b/kernel/Makefile +index 0b8c8ca..f8248bc 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o + obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_MARKERS) += marker.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index 7f0ccd8..1cd4c54 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1505,6 +1505,8 @@ static struct module *load_module(void __user *umod, + struct exception_table_entry *extable; + mm_segment_t old_fs; + int gpgsig_ok; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); +@@ -1735,6 +1737,9 @@ static struct module *load_module(void __user *umod, + tainted |= TAINT_FORCED_MODULE; + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1758,6 +1763,12 @@ static struct module *load_module(void __user *umod, + goto cleanup; + } + ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif ++ + /* Set up and sort exception table */ + mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); + mod->extable = extable = (void *)sechdrs[exindex].sh_addr; +@@ -1769,6 +1780,12 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif ++ + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2186,6 +2203,20 @@ void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif + ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ down(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ up(&module_mutex); ++} ++#endif ++ + static int __init modules_init(void) + { + return subsystem_register(&module_subsys); +diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c +index 1b16bfc..20ade31 100644 +--- a/kernel/rcupdate.c ++++ b/kernel/rcupdate.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + /* Definition for rcupdate control block. */ + struct rcu_ctrlblk rcu_ctrlblk = +@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, + local_irq_restore(flags); + } + ++static atomic_t rcu_barrier_cpu_count; ++static DEFINE_MUTEX(rcu_barrier_mutex); ++static struct completion rcu_barrier_completion; ++ + /** + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. +@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, + return NOTIFY_OK; + } + ++static void rcu_barrier_callback(struct rcu_head *notused) ++{ ++ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) ++ complete(&rcu_barrier_completion); ++} ++ ++/* ++ * Called with preemption disabled, and from cross-cpu IRQ context. ++ */ ++static void rcu_barrier_func(void *notused) ++{ ++ int cpu = smp_processor_id(); ++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); ++ struct rcu_head *head; ++ ++ head = &rdp->barrier; ++ atomic_inc(&rcu_barrier_cpu_count); ++ call_rcu(head, rcu_barrier_callback); ++} ++ ++/** ++ * rcu_barrier - Wait until all the in-flight RCUs are complete. ++ */ ++void rcu_barrier(void) ++{ ++ BUG_ON(in_interrupt()); ++ /* Take cpucontrol mutex to protect against CPU hotplug */ ++ mutex_lock(&rcu_barrier_mutex); ++ init_completion(&rcu_barrier_completion); ++ atomic_set(&rcu_barrier_cpu_count, 0); ++ on_each_cpu(rcu_barrier_func, NULL, 0, 1); ++ wait_for_completion(&rcu_barrier_completion); ++ mutex_unlock(&rcu_barrier_mutex); ++} ++EXPORT_SYMBOL_GPL(rcu_barrier); ++ ++ ++ + static struct notifier_block __devinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, + }; +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index b3d31b5..b100a32 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -40,6 +41,11 @@ include scripts/Makefile.lib + + symverfile := $(objtree)/Module.symvers + ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) ++ + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) + modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) +@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST + cmd_modpost = scripts/mod/modpost \ + $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + .PHONY: __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 2a174e5..c25948c 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + +@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) + if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { + info->modinfo = (void *)hdr + sechdrs[i].sh_offset; + info->modinfo_len = sechdrs[i].sh_size; +- } ++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) ++ info->markers_strings_sec = i; ++ + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + +@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) + return strcmp(myname, "vmlinux") == 0; + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ ++ + void + read_symbols(char *modname) + { +@@ -426,6 +486,7 @@ read_symbols(char *modname) + } + maybe_frob_version(modname, info.modinfo, info.modinfo_len, + (void *)info.modinfo - (void *)info.hdr); ++ get_markers(&info, mod); + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -682,6 +743,92 @@ write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if (mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ + int + main(int argc, char **argv) + { +@@ -690,8 +837,10 @@ main(int argc, char **argv) + char fname[SZ]; + char *dump_read = NULL, *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { ++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { + switch(opt) { + case 'i': + dump_read = optarg; +@@ -702,6 +851,12 @@ main(int argc, char **argv) + case 'o': + dump_write = optarg; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -732,6 +887,12 @@ main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } + +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index 4871343..d79d7ea 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -18,6 +18,7 @@ + #define Elf_Sym Elf32_Sym + #define ELF_ST_BIND ELF32_ST_BIND + #define ELF_ST_TYPE ELF32_ST_TYPE ++#define Elf_Section Elf32_Half + + #else + +@@ -26,7 +27,7 @@ + #define Elf_Sym Elf64_Sym + #define ELF_ST_BIND ELF64_ST_BIND + #define ELF_ST_TYPE ELF64_ST_TYPE +- ++#define Elf_Section Elf64_Half + #endif + + #if KERNEL_ELFDATA != HOST_ELFDATA +@@ -77,6 +78,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + }; + + struct elf_info { +@@ -85,6 +88,7 @@ struct elf_info { + Elf_Shdr *sechdrs; + Elf_Sym *symtab_start; + Elf_Sym *symtab_stop; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch 2008-07-04 02:18:23 UTC (rev 6) @@ -0,0 +1,1917 @@ +diff --git a/Makefile b/Makefile +index 06b517d..afa3d2c 100644 +--- a/Makefile ++++ b/Makefile +@@ -583,7 +583,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -688,10 +688,30 @@ $(KALLSYMS): scripts ; + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug +index cf069b7..d39d5c5 100644 +--- a/arch/i386/Kconfig.debug ++++ b/arch/i386/Kconfig.debug +@@ -29,6 +29,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index e8db99c..b846b21 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -53,6 +53,7 @@ SECTIONS + /* writeable */ + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug +index 2782b48..17d4a44 100644 +--- a/arch/ia64/Kconfig.debug ++++ b/arch/ia64/Kconfig.debug +@@ -12,6 +12,12 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. + + choice + prompt "Physical memory granularity" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index a676e79..c6ea47e 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -193,7 +193,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug +index 860472a..5c3a9f5 100644 +--- a/arch/ppc64/Kconfig.debug ++++ b/arch/ppc64/Kconfig.debug +@@ -16,6 +16,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S +index 4103cc1..76d0bf0 100644 +--- a/arch/ppc64/kernel/vmlinux.lds.S ++++ b/arch/ppc64/kernel/vmlinux.lds.S +@@ -118,6 +118,7 @@ SECTIONS + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug +index cd8d39f..f39bf8d 100644 +--- a/arch/sparc64/Kconfig.debug ++++ b/arch/sparc64/Kconfig.debug +@@ -21,6 +21,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_DCFLUSH + bool "D-cache flush debugging" + depends on DEBUG_KERNEL +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index a710d38..d4b3b62 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug +index cb33186..d0260cb 100644 +--- a/arch/x86_64/Kconfig.debug ++++ b/arch/x86_64/Kconfig.debug +@@ -55,6 +55,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on DEBUG_KERNEL +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index 053c826..3b3928f 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -33,6 +33,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 7a4d28d..991a1ef 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -159,7 +159,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -172,8 +172,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -954,12 +954,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -974,7 +975,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1771,7 +1772,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1804,13 +1805,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1829,7 +1831,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2217,15 +2220,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 4c60b17..2bb477e 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -105,7 +105,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -161,7 +162,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -174,12 +175,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 165a02f..eaf230e 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -6,10 +6,18 @@ + #define VMLINUX_SYMBOL(_sym_) _sym_ + #endif + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ + #define RODATA \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 8da8948..2ad5efd 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -328,6 +329,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + #else /* !CONFIG_MODULES... */ + #define EXPORT_SYMBOL(sym) + #define EXPORT_SYMBOL_GPL(sym) +@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) + static inline void print_modules(void) + { + } ++ ++static inline void module_update_markers(void) ++{ ++} + #endif /* CONFIG_MODULES */ + + #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 91057d6..bcd0acb 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +@@ -102,6 +115,7 @@ struct rcu_data { + struct rcu_head *donelist; + struct rcu_head **donetail; + int cpu; ++ struct rcu_head barrier; + }; + + DECLARE_PER_CPU(struct rcu_data, rcu_data); +@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, + extern void FASTCALL(call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head))); + extern void synchronize_kernel(void); +- ++extern void rcu_barrier(void); + #endif /* __KERNEL__ */ + #endif /* __LINUX_RCUPDATE_H */ +diff --git a/kernel/Makefile b/kernel/Makefile +index 0b8c8ca..f8248bc 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o + obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_MARKERS) += marker.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index 624e7ee..ae16b04 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1507,6 +1507,8 @@ static struct module *load_module(void __user *umod, + struct exception_table_entry *extable; + mm_segment_t old_fs; + int gpgsig_ok; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); +@@ -1739,6 +1741,9 @@ static struct module *load_module(void __user *umod, + tainted |= TAINT_FORCED_MODULE; + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1762,6 +1767,12 @@ static struct module *load_module(void __user *umod, + goto cleanup; + } + ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif ++ + /* Set up and sort exception table */ + mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); + mod->extable = extable = (void *)sechdrs[exindex].sh_addr; +@@ -1773,6 +1784,12 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif ++ + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2190,6 +2207,20 @@ void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif + ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ down(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ up(&module_mutex); ++} ++#endif ++ + static int __init modules_init(void) + { + return subsystem_register(&module_subsys); +diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c +index 1b16bfc..20ade31 100644 +--- a/kernel/rcupdate.c ++++ b/kernel/rcupdate.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + /* Definition for rcupdate control block. */ + struct rcu_ctrlblk rcu_ctrlblk = +@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, + local_irq_restore(flags); + } + ++static atomic_t rcu_barrier_cpu_count; ++static DEFINE_MUTEX(rcu_barrier_mutex); ++static struct completion rcu_barrier_completion; ++ + /** + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. +@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, + return NOTIFY_OK; + } + ++static void rcu_barrier_callback(struct rcu_head *notused) ++{ ++ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) ++ complete(&rcu_barrier_completion); ++} ++ ++/* ++ * Called with preemption disabled, and from cross-cpu IRQ context. ++ */ ++static void rcu_barrier_func(void *notused) ++{ ++ int cpu = smp_processor_id(); ++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); ++ struct rcu_head *head; ++ ++ head = &rdp->barrier; ++ atomic_inc(&rcu_barrier_cpu_count); ++ call_rcu(head, rcu_barrier_callback); ++} ++ ++/** ++ * rcu_barrier - Wait until all the in-flight RCUs are complete. ++ */ ++void rcu_barrier(void) ++{ ++ BUG_ON(in_interrupt()); ++ /* Take cpucontrol mutex to protect against CPU hotplug */ ++ mutex_lock(&rcu_barrier_mutex); ++ init_completion(&rcu_barrier_completion); ++ atomic_set(&rcu_barrier_cpu_count, 0); ++ on_each_cpu(rcu_barrier_func, NULL, 0, 1); ++ wait_for_completion(&rcu_barrier_completion); ++ mutex_unlock(&rcu_barrier_mutex); ++} ++EXPORT_SYMBOL_GPL(rcu_barrier); ++ ++ ++ + static struct notifier_block __devinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, + }; +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index b3d31b5..b100a32 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -40,6 +41,11 @@ include scripts/Makefile.lib + + symverfile := $(objtree)/Module.symvers + ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) ++ + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) + modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) +@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST + cmd_modpost = scripts/mod/modpost \ + $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + .PHONY: __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 2a174e5..c25948c 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + +@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) + if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { + info->modinfo = (void *)hdr + sechdrs[i].sh_offset; + info->modinfo_len = sechdrs[i].sh_size; +- } ++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) ++ info->markers_strings_sec = i; ++ + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + +@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) + return strcmp(myname, "vmlinux") == 0; + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ ++ + void + read_symbols(char *modname) + { +@@ -426,6 +486,7 @@ read_symbols(char *modname) + } + maybe_frob_version(modname, info.modinfo, info.modinfo_len, + (void *)info.modinfo - (void *)info.hdr); ++ get_markers(&info, mod); + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -682,6 +743,92 @@ write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if (mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ + int + main(int argc, char **argv) + { +@@ -690,8 +837,10 @@ main(int argc, char **argv) + char fname[SZ]; + char *dump_read = NULL, *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { ++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { + switch(opt) { + case 'i': + dump_read = optarg; +@@ -702,6 +851,12 @@ main(int argc, char **argv) + case 'o': + dump_write = optarg; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -732,6 +887,12 @@ main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } + +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index 4871343..d79d7ea 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -18,6 +18,7 @@ + #define Elf_Sym Elf32_Sym + #define ELF_ST_BIND ELF32_ST_BIND + #define ELF_ST_TYPE ELF32_ST_TYPE ++#define Elf_Section Elf32_Half + + #else + +@@ -26,7 +27,7 @@ + #define Elf_Sym Elf64_Sym + #define ELF_ST_BIND ELF64_ST_BIND + #define ELF_ST_TYPE ELF64_ST_TYPE +- ++#define Elf_Section Elf64_Half + #endif + + #if KERNEL_ELFDATA != HOST_ELFDATA +@@ -77,6 +78,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + }; + + struct elf_info { +@@ -85,6 +88,7 @@ struct elf_info { + Elf_Shdr *sechdrs; + Elf_Sym *symtab_start; + Elf_Sym *symtab_stop; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; From svn-commits at oss.oracle.com Thu Jul 3 19:22:00 2008 From: svn-commits at oss.oracle.com (svn-commits at oss.oracle.com) Date: Thu, 03 Jul 2008 19:22:00 -0700 Subject: [Kernelpatches-commits] wjhuang commits r7 - trunk/wjhuang/marker_patches Message-ID: Author: wjhuang Date: 2008-07-03 19:22:00 -0700 (Thu, 03 Jul 2008) New Revision: 7 Removed: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch Log: remove the test files Deleted: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch 2008-07-04 02:18:23 UTC (rev 6) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch 2008-07-04 02:22:00 UTC (rev 7) @@ -1,1882 +0,0 @@ -diff --git a/Makefile b/Makefile -index 7682056..d1b7d78 100644 ---- a/Makefile -+++ b/Makefile -@@ -601,7 +601,7 @@ quiet_cmd_vmlinux__ ?= LD $@ - cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ - -T $(vmlinux-lds) $(vmlinux-init) \ - --start-group $(vmlinux-main) --end-group \ -- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) -+ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) - - # Generate new vmlinux version - quiet_cmd_vmlinux_version = GEN .version -@@ -725,11 +725,30 @@ debug_kallsyms: .tmp_map$(last_kallsyms) - - endif # ifdef CONFIG_KALLSYMS - -+# Do modpost on a prelinked vmlinux. The finally linked vmlinux has -+# relevant sections renamed as per the linker script. -+quiet_cmd_vmlinux-modpost = LD $@ -+ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ -+ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ -+ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) -+define rule_vmlinux-modpost -+ : -+ +$(call cmd,vmlinux-modpost) -+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ -+ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd -+endef -+ - # vmlinux image - including updated kernel symbols --vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE -+vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE -+ $(call vmlinux-modpost) - $(call if_changed_rule,vmlinux__) - $(Q)rm -f .old_version - -+# build vmlinux.o first to catch section mismatch errors early -+$(kallsyms.o): vmlinux.o -+vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE -+ $(call if_changed_rule,vmlinux-modpost) -+ - # The actual objects are generated when descending, - # make sure no implicit rule kicks in - $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; -diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig -index 7dec8f0..73f833c 100644 ---- a/arch/i386/Kconfig -+++ b/arch/i386/Kconfig -@@ -1205,6 +1205,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/i386/Kconfig.debug" -diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S -index a44d95d..23d73bb 100644 ---- a/arch/i386/kernel/vmlinux.lds.S -+++ b/arch/i386/kernel/vmlinux.lds.S -@@ -61,6 +61,7 @@ SECTIONS - /* writeable */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } :data - -diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig -index c45b7b5..aee46db 100644 ---- a/arch/ia64/Kconfig -+++ b/arch/ia64/Kconfig -@@ -587,6 +587,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/ia64/Kconfig.debug" -diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S -index 5b0d5f6..9c63f83 100644 ---- a/arch/ia64/kernel/vmlinux.lds.S -+++ b/arch/ia64/kernel/vmlinux.lds.S -@@ -206,7 +206,7 @@ SECTIONS - - data : { } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) -- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } -+ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } - - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) -diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig -index 41024aa..4ab8d59 100644 ---- a/arch/powerpc/Kconfig -+++ b/arch/powerpc/Kconfig -@@ -1095,6 +1095,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/powerpc/Kconfig.debug" -diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S -index c02298a..709bc47 100644 ---- a/arch/powerpc/kernel/vmlinux.lds.S -+++ b/arch/powerpc/kernel/vmlinux.lds.S -@@ -176,11 +176,13 @@ SECTIONS - *(.data) - *(.sdata) - *(.got.plt) *(.got) -+ MARKER - } - #else - .data : { - *(.data .data.rel* .toc1) - *(.branch_lt) -+ MARKER - } - - .opd : { -diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig -index 974474a..c81600c 100644 ---- a/arch/s390/Kconfig -+++ b/arch/s390/Kconfig -@@ -505,6 +505,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/s390/Kconfig.debug" -diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S -index df0c16a..69826b1 100644 ---- a/arch/s390/kernel/vmlinux.lds.S -+++ b/arch/s390/kernel/vmlinux.lds.S -@@ -46,6 +46,7 @@ SECTIONS - - .data : { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } - -diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig -index b627f8d..e0467d1 100644 ---- a/arch/sparc64/Kconfig -+++ b/arch/sparc64/Kconfig -@@ -427,6 +427,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/sparc64/Kconfig.debug" -diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S -index b097379..1f10e43 100644 ---- a/arch/sparc64/kernel/vmlinux.lds.S -+++ b/arch/sparc64/kernel/vmlinux.lds.S -@@ -27,6 +27,7 @@ SECTIONS - .data : - { - *(.data) -+ MARKER - CONSTRUCTORS - } - .data1 : { *(.data1) } -diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig -index d284a9a..e556e06 100644 ---- a/arch/x86_64/Kconfig -+++ b/arch/x86_64/Kconfig -@@ -704,6 +704,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/x86_64/Kconfig.debug" -diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S -index f17185f..a33251a 100644 ---- a/arch/x86_64/kernel/vmlinux.lds.S -+++ b/arch/x86_64/kernel/vmlinux.lds.S -@@ -67,6 +67,7 @@ SECTIONS - /* Data */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { - *(.data) -+ MARKER - CONSTRUCTORS - } :data - -diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c -index 6a40707..9b0fab5 100644 ---- a/drivers/net/bonding/bond_3ad.c -+++ b/drivers/net/bonding/bond_3ad.c -@@ -125,7 +125,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); - - // ================= main 802.3ad protocol functions ================== - static int ad_lacpdu_send(struct port *port); --static int ad_marker_send(struct port *port, struct marker *marker); -+static int ad_marker_send(struct port *port, struct bond_marker *marker); - static void ad_mux_machine(struct port *port); - static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); - static void ad_tx_machine(struct port *port); -@@ -138,8 +138,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); - static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); - static void ad_enable_collecting_distributing(struct port *port); - static void ad_disable_collecting_distributing(struct port *port); --static void ad_marker_info_received(struct marker *marker_info, struct port *port); --static void ad_marker_response_received(struct marker *marker, struct port *port); -+static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); -+static void ad_marker_response_received(struct bond_marker *marker, struct port *port); - - - ///////////////////////////////////////////////////////////////////////////////// -@@ -903,12 +903,13 @@ static int ad_lacpdu_send(struct port *port) - * Returns: 0 on success - * < 0 on error - */ --static int ad_marker_send(struct port *port, struct marker *marker) -+static int ad_marker_send(struct port *port, struct bond_marker *marker) - { - struct slave *slave = port->slave; - struct sk_buff *skb; -- struct marker_header *marker_header; -- int length = sizeof(struct marker_header); -+ struct bond_marker_header *marker_header; -+ int length = sizeof(struct bond_marker_header); -+ - struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; - - skb = dev_alloc_skb(length + 16); -@@ -923,7 +924,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) - skb->nh.raw = skb->data + ETH_HLEN; - skb->protocol = PKT_TYPE_LACPDU; - -- marker_header = (struct marker_header *)skb_put(skb, length); -+ marker_header = (struct bond_marker_header *)skb_put(skb, length); - - marker_header->ad_header.destination_address = lacpdu_multicast_address; - /* Note: source addres is set to be the member's PERMANENT address, because we use it -@@ -1723,7 +1724,7 @@ static void ad_disable_collecting_distributing(struct port *port) - */ - static void ad_marker_info_send(struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - u16 index; - - // fill the marker PDU with the appropriate values -@@ -1756,13 +1757,14 @@ static void ad_marker_info_send(struct port *port) - * @port: the port we're looking at - * - */ --static void ad_marker_info_received(struct marker *marker_info,struct port *port) -+static void ad_marker_info_received(struct bond_marker *marker_info, -+ struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - - // copy the received marker data to the response marker - //marker = *marker_info; -- memcpy(&marker, marker_info, sizeof(struct marker)); -+ memcpy(&marker, marker_info, sizeof(struct bond_marker)); - // change the marker subtype to marker response - marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; - // send the marker response -@@ -1781,7 +1783,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port - * response for marker PDU's, in this stage, but only to respond to marker - * information. - */ --static void ad_marker_response_received(struct marker *marker, struct port *port) -+static void ad_marker_response_received(struct bond_marker *marker, -+ struct port *port) - { - marker=NULL; // just to satisfy the compiler - port=NULL; // just to satisfy the compiler -@@ -2179,15 +2182,16 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u - case AD_TYPE_MARKER: - // No need to convert fields to Little Endian since we don't use the marker's fields. - -- switch (((struct marker *)lacpdu)->tlv_type) { -+ switch (((struct bond_marker *)lacpdu)->tlv_type) { - case AD_MARKER_INFORMATION_SUBTYPE: - dprintk("Received Marker Information on port %d\n", port->actor_port_number); -- ad_marker_info_received((struct marker *)lacpdu, port); -+ ad_marker_info_received((struct bond_marker *)lacpdu, port); -+ - break; - - case AD_MARKER_RESPONSE_SUBTYPE: - dprintk("Received Marker Response on port %d\n", port->actor_port_number); -- ad_marker_response_received((struct marker *)lacpdu, port); -+ ad_marker_response_received((struct bond_marker *)lacpdu, port); - break; - - default: -diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h -index 6ad5ad6..bf93e7e 100644 ---- a/drivers/net/bonding/bond_3ad.h -+++ b/drivers/net/bonding/bond_3ad.h -@@ -92,7 +92,8 @@ typedef enum { - typedef enum { - AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype - AD_MARKER_RESPONSE_SUBTYPE // marker response subtype --} marker_subtype_t; -+} bond_marker_subtype_t; -+ - - // timers types(43.4.9 in the 802.3ad standard) - typedef enum { -@@ -148,7 +149,7 @@ typedef struct lacpdu_header { - } lacpdu_header_t; - - // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) --typedef struct marker { -+typedef struct bond_marker { - u8 subtype; // = 0x02 (marker PDU) - u8 version_number; // = 0x01 - u8 tlv_type; // = 0x01 (marker information) -@@ -161,12 +162,12 @@ typedef struct marker { - u8 tlv_type_terminator; // = 0x00 - u8 terminator_length; // = 0x00 - u8 reserved_90[90]; // = 0 --} marker_t; -+} bond_marker_t; - --typedef struct marker_header { -+typedef struct bond_marker_header { - struct ad_header ad_header; -- struct marker marker; --} marker_header_t; -+ struct bond_marker marker; -+} bond_marker_header_t; - - #pragma pack() - -diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h -index 5102c6b..aed4049 100644 ---- a/drivers/scsi/qla4xxx/ql4_fw.h -+++ b/drivers/scsi/qla4xxx/ql4_fw.h -@@ -744,7 +744,7 @@ struct continuation_t1_entry { - #define ET_CONTINUE ET_CONT_T1 - - /* Marker entry structure*/ --struct marker_entry { -+struct qla4_marker_entry { - struct qla4_header hdr; /* 00-03 */ - - uint32_t system_defined; /* 04-07 */ -diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c -index 4e532b4..43860eb 100644 ---- a/drivers/scsi/qla4xxx/ql4_iocb.c -+++ b/drivers/scsi/qla4xxx/ql4_iocb.c -@@ -65,7 +65,8 @@ int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, - int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha, - struct ddb_entry *ddb_entry, int lun) - { -- struct marker_entry *marker_entry; -+ struct qla4_marker_entry *marker_entry; -+ - unsigned long flags = 0; - uint8_t status = QLA_SUCCESS; - -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index b9e964c..a175f1e 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -9,12 +9,21 @@ - /* Align . to a 8 byte boundary equals to maximum function alignment. */ - #define ALIGN_FUNCTION() . = ALIGN(8) - -+/* Kernel markers : pointers */ -+#define MARKER \ -+ . = ALIGN(8); \ -+ VMLINUX_SYMBOL(__start___markers) = .; \ -+ *(__markers) \ -+ VMLINUX_SYMBOL(__stop___markers) = .; -+ -+ - #define RODATA \ - . = ALIGN(4096); \ - .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__start_rodata) = .; \ - *(.rodata) *(.rodata.*) \ - *(__vermagic) /* Kernel version magic */ \ -+ *(__markers_strings) /* Markers: strings */ \ - } \ - \ - .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ -diff --git a/include/linux/marker.h b/include/linux/marker.h -new file mode 100644 -index 0000000..efbc82b ---- /dev/null -+++ b/include/linux/marker.h -@@ -0,0 +1,139 @@ -+#ifndef _LINUX_MARKER_H -+#define _LINUX_MARKER_H -+ -+/* -+ * Code markup for dynamic and static tracing. -+ * -+ * See Documentation/marker.txt. -+ * -+ * (C) Copyright 2006 Mathieu Desnoyers -+ * -+ * This file is released under the GPLv2. -+ * See the file COPYING for more details. -+ */ -+ -+#include -+ -+struct module; -+struct marker; -+ -+/** -+ * marker_probe_func - Type of a marker probe function -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @args: variable argument list pointer. Use a pointer to overcome C's -+ * inability to pass this around as a pointer in a portable manner in -+ * the callee otherwise. -+ * -+ * Type of marker probe functions. They receive the mdata and need to parse the -+ * format string to recover the variable argument list. -+ */ -+typedef void marker_probe_func(void *probe_private, void *call_private, -+ const char *fmt, va_list *args); -+ -+struct marker_probe_closure { -+ marker_probe_func *func; /* Callback */ -+ void *probe_private; /* Private probe data */ -+}; -+ -+struct marker { -+ const char *name; /* Marker name */ -+ const char *format; /* Marker format string, describing the -+ * variable argument list. -+ */ -+ char state; /* Marker state. */ -+ char ptype; /* probe type : 0 : single, 1 : multi */ -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+} __attribute__((aligned(8))); -+ -+#ifdef CONFIG_MARKERS -+ -+/* -+ * Note : the empty asm volatile with read constraint is used here instead of a -+ * "used" attribute to fix a gcc 4.1.x bug. -+ * Make sure the alignment of the structure in the __markers section will -+ * not add unwanted padding between the beginning of the section and the -+ * structure. Force alignment to the same alignment as the section start. -+ */ -+#define __trace_mark(name, call_private, format, args...) \ -+ do { \ -+ static const char __mstrtab_##name[] \ -+ __attribute__((section("__markers_strings"))) \ -+ = #name "\0" format; \ -+ static struct marker __mark_##name \ -+ __attribute__((section("__markers"), aligned(8))) = \ -+ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ -+ 0, 0, marker_probe_cb, \ -+ { __mark_empty_function, NULL}, NULL }; \ -+ __mark_check_format(format, ## args); \ -+ if (unlikely(__mark_##name.state)) { \ -+ (*__mark_##name.call) \ -+ (&__mark_##name, call_private, \ -+ format, ## args); \ -+ } \ -+ } while (0) -+ -+extern void marker_update_probe_range(struct marker *begin, -+ struct marker *end); -+#else /* !CONFIG_MARKERS */ -+#define __trace_mark(name, call_private, format, args...) \ -+ __mark_check_format(format, ## args) -+static inline void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ } -+#endif /* CONFIG_MARKERS */ -+ -+/** -+ * trace_mark - Marker -+ * @name: marker name, not quoted. -+ * @format: format string -+ * @args...: variable argument list -+ * -+ * Places a marker. -+ */ -+#define trace_mark(name, format, args...) \ -+ __trace_mark(name, NULL, format, ## args) -+ -+/** -+ * MARK_NOARGS - Format string for a marker with no argument. -+ */ -+#define MARK_NOARGS " " -+ -+/* To be used for string format validity checking with gcc */ -+static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) -+{ -+} -+ -+extern marker_probe_func __mark_empty_function; -+ -+extern void marker_probe_cb(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+extern void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+ -+/* -+ * Connect a probe to a marker. -+ * private data pointer must be a valid allocated memory address, or NULL. -+ */ -+extern int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private); -+ -+/* -+ * Returns the private data given to marker_probe_register. -+ */ -+extern int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private); -+/* -+ * Unregister a marker by providing the registered private data. -+ */ -+extern int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private); -+ -+extern void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num); -+ -+#endif -diff --git a/include/linux/module.h b/include/linux/module.h -index c458418..627afb9 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -348,6 +349,10 @@ struct module - /* The command line arguments (may be mangled). People like - keeping pointers to this stuff */ - char *args; -+#ifdef CONFIG_MARKERS -+ struct marker *markers; -+ unsigned int num_markers; -+#endif - }; - - /* FIXME: It'd be nice to isolate modules during init, too, so they -@@ -467,6 +472,7 @@ int register_module_notifier(struct notifier_block * nb); - int unregister_module_notifier(struct notifier_block * nb); - - extern void print_modules(void); -+extern void module_update_markers(void); - - struct device_driver; - void module_add_driver(struct module *, struct device_driver *); -@@ -569,6 +575,10 @@ static inline void print_modules(void) - { - } - -+static inline void module_update_markers(void) -+{ -+} -+ - struct device_driver; - struct module; - -diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h -index b4ca73d..5f12d1b 100644 ---- a/include/linux/rcupdate.h -+++ b/include/linux/rcupdate.h -@@ -42,6 +42,19 @@ - #include - #include - -+/* -+ * Prevent the compiler from merging or refetching accesses. The compiler -+ * is also forbidden from reordering successive instances of ACCESS_ONCE(), -+ * but only when the compiler is aware of some particular ordering. One way -+ * to make the compiler aware of ordering is to put the two invocations of -+ * ACCESS_ONCE() in different C statements. -+ * -+ * This macro does absolutely -nothing- to prevent the CPU from reordering, -+ * merging, or refetching absolutely anything at any time. -+ * -+ */ -+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) -+ - /** - * struct rcu_head - callback structure for use with RCU - * @next: next update requests in a list -diff --git a/kernel/Makefile b/kernel/Makefile -index ed4af9c..b4ad7a7 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -50,6 +50,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o - obj-$(CONFIG_RELAY) += relay.o - obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o - obj-$(CONFIG_TASKSTATS) += taskstats.o -+obj-$(CONFIG_MARKERS) += marker.o - obj-$(CONFIG_UTRACE) += utrace.o - - ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) -diff --git a/kernel/marker.c b/kernel/marker.c -new file mode 100644 -index 0000000..c4c2cd8 ---- /dev/null -+++ b/kernel/marker.c -@@ -0,0 +1,851 @@ -+/* -+ * Copyright (C) 2007 Mathieu Desnoyers -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+extern struct marker __start___markers[]; -+extern struct marker __stop___markers[]; -+ -+/* Set to 1 to enable marker debug output */ -+const int marker_debug; -+ -+/* -+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin -+ * and module markers and the hash table. -+ */ -+static DEFINE_MUTEX(markers_mutex); -+ -+/* -+ * Marker hash table, containing the active markers. -+ * Protected by module_mutex. -+ */ -+#define MARKER_HASH_BITS 6 -+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -+ -+/* -+ * Note about RCU : -+ * It is used to make sure every handler has finished using its private data -+ * between two consecutive operation (add or remove) on a given marker. It is -+ * also used to delay the free of multiple probes array until a quiescent state -+ * is reached. -+ * marker entries modifications are protected by the markers_mutex. -+ */ -+struct marker_entry { -+ struct hlist_node hlist; -+ char *format; -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+ int refcount; /* Number of times armed. 0 if disarmed. */ -+ struct rcu_head rcu; -+ void *oldptr; -+ char rcu_pending:1; -+ char ptype:1; -+ char name[0]; /* Contains name'\0'format'\0' */ -+}; -+ -+static struct hlist_head marker_table[MARKER_TABLE_SIZE]; -+ -+/** -+ * __mark_empty_function - Empty probe callback -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @...: variable argument list -+ * -+ * Empty callback provided as a probe to the markers. By providing this to a -+ * disabled marker, we make sure the execution flow is always valid even -+ * though the function pointer change and the marker enabling are two distinct -+ * operations that modifies the execution flow of preemptible code. -+ */ -+void __mark_empty_function(void *probe_private, void *call_private, -+ const char *fmt, va_list *args) -+{ -+} -+EXPORT_SYMBOL_GPL(__mark_empty_function); -+ -+/* -+ * marker_probe_cb Callback that prepares the variable argument list for probes. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we -+ * need to put a full smp_rmb() in this branch. This is why we do not use -+ * rcu_dereference() for the pointer read. -+ */ -+void marker_probe_cb(const struct marker *mdata, void *call_private, -+ const char *fmt, ...) -+{ -+ va_list args; -+ char ptype; -+ -+ /* -+ * disabling preemption to make sure the teardown of the callbacks can -+ * be done correctly when they are in modules and they insure RCU read -+ * coherency. -+ */ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ va_start(args, fmt); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ va_end(args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) { -+ va_start(args, fmt); -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ va_end(args); -+ } -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb); -+ -+/* -+ * marker_probe_cb Callback that does not prepare the variable argument list. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Should be connected to markers "MARK_NOARGS". -+ */ -+void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...) -+{ -+ va_list args; /* not initialized */ -+ char ptype; -+ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); -+ -+static void free_old_closure(struct rcu_head *head) -+{ -+ struct marker_entry *entry = container_of(head, -+ struct marker_entry, rcu); -+ kfree(entry->oldptr); -+ /* Make sure we free the data before setting the pending flag to 0 */ -+ smp_wmb(); -+ entry->rcu_pending = 0; -+} -+ -+static void debug_print_probes(struct marker_entry *entry) -+{ -+ int i; -+ -+ if (!marker_debug) -+ return; -+ -+ if (!entry->ptype) { -+ printk(KERN_DEBUG "Single probe : %p %p\n", -+ entry->single.func, -+ entry->single.probe_private); -+ } else { -+ for (i = 0; entry->multi[i].func; i++) -+ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, -+ entry->multi[i].func, -+ entry->multi[i].probe_private); -+ } -+} -+ -+static struct marker_probe_closure * -+marker_entry_add_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0; -+ struct marker_probe_closure *old, *new; -+ -+ WARN_ON(!probe); -+ -+ debug_print_probes(entry); -+ old = entry->multi; -+ if (!entry->ptype) { -+ if (entry->single.func == probe && -+ entry->single.probe_private == probe_private) -+ return ERR_PTR(-EBUSY); -+ if (entry->single.func == __mark_empty_function) { -+ /* 0 -> 1 probes */ -+ entry->single.func = probe; -+ entry->single.probe_private = probe_private; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* 1 -> 2 probes */ -+ nr_probes = 1; -+ old = NULL; -+ } -+ } else { -+ /* (N -> N+1), (N != 0, 1) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) -+ if (old[nr_probes].func == probe -+ && old[nr_probes].probe_private -+ == probe_private) -+ return ERR_PTR(-EBUSY); -+ } -+ /* + 2 : one for new probe, one for NULL func */ -+ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), -+ GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ if (!old) -+ new[0] = entry->single; -+ else -+ memcpy(new, old, -+ nr_probes * sizeof(struct marker_probe_closure)); -+ new[nr_probes].func = probe; -+ new[nr_probes].probe_private = probe_private; -+ entry->refcount = nr_probes + 1; -+ entry->multi = new; -+ entry->ptype = 1; -+ debug_print_probes(entry); -+ return old; -+} -+ -+static struct marker_probe_closure * -+marker_entry_remove_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0, nr_del = 0, i; -+ struct marker_probe_closure *old, *new; -+ -+ old = entry->multi; -+ -+ debug_print_probes(entry); -+ if (!entry->ptype) { -+ /* 0 -> N is an error */ -+ WARN_ON(entry->single.func == __mark_empty_function); -+ /* 1 -> 0 probes */ -+ WARN_ON(probe && entry->single.func != probe); -+ WARN_ON(entry->single.probe_private != probe_private); -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* (N -> M), (N > 1, M >= 0) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { -+ if ((!probe || old[nr_probes].func == probe) -+ && old[nr_probes].probe_private -+ == probe_private) -+ nr_del++; -+ } -+ } -+ -+ if (nr_probes - nr_del == 0) { -+ /* N -> 0, (N > 1) */ -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ } else if (nr_probes - nr_del == 1) { -+ /* N -> 1, (N > 1) */ -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ entry->single = old[i]; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ } else { -+ int j = 0; -+ /* N -> M, (N > 1, M > 1) */ -+ /* + 1 for NULL */ -+ new = kzalloc((nr_probes - nr_del + 1) -+ * sizeof(struct marker_probe_closure), GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ new[j++] = old[i]; -+ entry->refcount = nr_probes - nr_del; -+ entry->ptype = 1; -+ entry->multi = new; -+ } -+ debug_print_probes(entry); -+ return old; -+} -+ -+/* -+ * Get marker if the marker is present in the marker hash table. -+ * Must be called with markers_mutex held. -+ * Returns NULL if not present. -+ */ -+static struct marker_entry *get_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ u32 hash = jhash(name, strlen(name), 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) -+ return e; -+ } -+ return NULL; -+} -+ -+/* -+ * Add the marker to the marker hash table. Must be called with markers_mutex -+ * held. -+ */ -+static struct marker_entry *add_marker(const char *name, const char *format) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ size_t format_len = 0; -+ u32 hash = jhash(name, name_len-1, 0); -+ -+ if (format) -+ format_len = strlen(format) + 1; -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ printk(KERN_NOTICE -+ "Marker %s busy\n", name); -+ return ERR_PTR(-EBUSY); /* Already there */ -+ } -+ } -+ /* -+ * Using kmalloc here to allocate a variable length element. Could -+ * cause some memory fragmentation if overused. -+ */ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return ERR_PTR(-ENOMEM); -+ memcpy(&e->name[0], name, name_len); -+ if (format) { -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ } else { -+ e->format = NULL; -+ e->call = marker_probe_cb; -+ } -+ e->single.func = __mark_empty_function; -+ e->single.probe_private = NULL; -+ e->multi = NULL; -+ e->ptype = 0; -+ e->refcount = 0; -+ e->rcu_pending = 0; -+ hlist_add_head(&e->hlist, head); -+ return e; -+} -+ -+/* -+ * Remove the marker from the marker hash table. Must be called with mutex_lock -+ * held. -+ */ -+static int remove_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ int found = 0; -+ size_t len = strlen(name) + 1; -+ u32 hash = jhash(name, len-1, 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ found = 1; -+ break; -+ } -+ } -+ if (!found) -+ return -ENOENT; -+ if (e->single.func != __mark_empty_function) -+ return -EBUSY; -+ hlist_del(&e->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if (e->rcu_pending) -+ rcu_barrier(); -+ kfree(e); -+ return 0; -+} -+ -+/* -+ * Set the mark_entry format to the format found in the element. -+ */ -+static int marker_set_format(struct marker_entry **entry, const char *format) -+{ -+ struct marker_entry *e; -+ size_t name_len = strlen((*entry)->name) + 1; -+ size_t format_len = strlen(format) + 1; -+ -+ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return -ENOMEM; -+ memcpy(&e->name[0], (*entry)->name, name_len); -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ e->single = (*entry)->single; -+ e->multi = (*entry)->multi; -+ e->ptype = (*entry)->ptype; -+ e->refcount = (*entry)->refcount; -+ e->rcu_pending = 0; -+ hlist_add_before(&e->hlist, &(*entry)->hlist); -+ hlist_del(&(*entry)->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if ((*entry)->rcu_pending) -+ rcu_barrier(); -+ kfree(*entry); -+ *entry = e; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ return 0; -+} -+ -+/* -+ * Sets the probe callback corresponding to one marker. -+ */ -+static int set_marker(struct marker_entry **entry, struct marker *elem, -+ int active) -+{ -+ int ret; -+ WARN_ON(strcmp((*entry)->name, elem->name) != 0); -+ -+ if ((*entry)->format) { -+ if (strcmp((*entry)->format, elem->format) != 0) { -+ printk(KERN_NOTICE -+ "Format mismatch for probe %s " -+ "(%s), marker (%s)\n", -+ (*entry)->name, -+ (*entry)->format, -+ elem->format); -+ return -EPERM; -+ } -+ } else { -+ ret = marker_set_format(entry, elem->format); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * probe_cb setup (statically known) is done here. It is -+ * asynchronous with the rest of execution, therefore we only -+ * pass from a "safe" callback (with argument) to an "unsafe" -+ * callback (does not set arguments). -+ */ -+ elem->call = (*entry)->call; -+ /* -+ * Sanity check : -+ * We only update the single probe private data when the ptr is -+ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) -+ */ -+ WARN_ON(elem->single.func != __mark_empty_function -+ && elem->single.probe_private -+ != (*entry)->single.probe_private && -+ !elem->ptype); -+ elem->single.probe_private = (*entry)->single.probe_private; -+ /* -+ * Make sure the private data is valid when we update the -+ * single probe ptr. -+ */ -+ smp_wmb(); -+ elem->single.func = (*entry)->single.func; -+ /* -+ * We also make sure that the new probe callbacks array is consistent -+ * before setting a pointer to it. -+ */ -+ rcu_assign_pointer(elem->multi, (*entry)->multi); -+ /* -+ * Update the function or multi probe array pointer before setting the -+ * ptype. -+ */ -+ smp_wmb(); -+ elem->ptype = (*entry)->ptype; -+ elem->state = active; -+ -+ return 0; -+} -+ -+/* -+ * Disable a marker and its probe callback. -+ * Note: only after a synchronize_sched() issued after setting elem->call to the -+ * empty function insures that the original callback is not used anymore. This -+ * insured by preemption disabling around the call site. -+ */ -+static void disable_marker(struct marker *elem) -+{ -+ /* leave "call" as is. It is known statically. */ -+ elem->state = 0; -+ elem->single.func = __mark_empty_function; -+ /* Update the function before setting the ptype */ -+ smp_wmb(); -+ elem->ptype = 0; /* single probe */ -+ /* -+ * Leave the private data and id there, because removal is racy and -+ * should be done only after a synchronize_sched(). These are never used -+ * until the next initialization anyway. -+ */ -+} -+ -+/** -+ * marker_update_probe_range - Update a probe range -+ * @begin: beginning of the range -+ * @end: end of the range -+ * -+ * Updates the probe callback corresponding to a range of markers. -+ */ -+void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ -+ struct marker *iter; -+ struct marker_entry *mark_entry; -+ -+ mutex_lock(&markers_mutex); -+ for (iter = begin; iter < end; iter++) { -+ mark_entry = get_marker(iter->name); -+ if (mark_entry) { -+ set_marker(&mark_entry, iter, -+ !!mark_entry->refcount); -+ /* -+ * ignore error, continue -+ */ -+ } else { -+ disable_marker(iter); -+ } -+ } -+ mutex_unlock(&markers_mutex); -+} -+ -+/* -+ * Update probes, removing the faulty probes. -+ * Issues a synchronize_sched() when no reference to the module passed -+ * as parameter is found in the probes so the probe module can be -+ * safely unloaded from now on. -+ * -+ * Internal callback only changed before the first probe is connected to it. -+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 -+ * transitions. All other transitions will leave the old private data valid. -+ * This makes the non-atomicity of the callback/private data updates valid. -+ * -+ * "special case" updates : -+ * 0 -> 1 callback -+ * 1 -> 0 callback -+ * 1 -> 2 callbacks -+ * 2 -> 1 callbacks -+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. -+ * Site effect : marker_set_format may delete the marker entry (creating a -+ * replacement). -+ */ -+static void marker_update_probes(void) -+{ -+ /* Core kernel markers */ -+ marker_update_probe_range(__start___markers, __stop___markers); -+ /* Markers in modules. */ -+ module_update_markers(); -+} -+ -+/** -+ * marker_probe_register - Connect a probe to a marker -+ * @name: marker name -+ * @format: format string -+ * @probe: probe handler -+ * @probe_private: probe private data -+ * -+ * private data must be a valid allocated memory address, or NULL. -+ * Returns 0 if ok, error value on error. -+ * The probe address must at least be aligned on the architecture pointer size. -+ */ -+int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ entry = add_marker(name, format); -+ if (IS_ERR(entry)) { -+ ret = PTR_ERR(entry); -+ goto end; -+ } -+ } -+ /* -+ * If we detect that a call_rcu is pending for this marker, -+ * make sure it's executed now. -+ */ -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_add_probe(entry, probe, probe_private); -+ if (IS_ERR(old)) { -+ ret = PTR_ERR(old); -+ goto end; -+ } -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_register); -+ -+/** -+ * marker_probe_unregister - Disconnect a probe from a marker -+ * @name: marker name -+ * @probe: probe function pointer -+ * @probe_private: probe private data -+ * -+ * Returns the private data given to marker_probe_register, or an ERR_PTR(). -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ struct marker_probe_closure *old; -+ int ret = 0; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, probe, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister); -+ -+static struct marker_entry * -+get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ unsigned int i; -+ struct hlist_head *head; -+ struct hlist_node *node; -+ -+ for (i = 0; i < MARKER_TABLE_SIZE; i++) { -+ head = &marker_table[i]; -+ hlist_for_each_entry(entry, node, head, hlist) { -+ if (!entry->ptype) { -+ if (entry->single.func == probe -+ && entry->single.probe_private -+ == probe_private) -+ return entry; -+ } else { -+ struct marker_probe_closure *closure; -+ closure = entry->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func == probe && -+ closure[i].probe_private -+ == probe_private) -+ return entry; -+ } -+ } -+ } -+ } -+ return NULL; -+} -+ -+/** -+ * marker_probe_unregister_private_data - Disconnect a probe from a marker -+ * @probe: probe function -+ * @probe_private: probe private data -+ * -+ * Unregister a probe by providing the registered private data. -+ * Only removes the first marker found in hash table. -+ * Return 0 on success or error value. -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, NULL, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(entry->name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); -+ -+/** -+ * marker_get_private_data - Get a marker's probe private data -+ * @name: marker name -+ * @probe: probe to match -+ * @num: get the nth matching probe's private data -+ * -+ * Returns the nth private data pointer (starting from 0) matching, or an -+ * ERR_PTR. -+ * Returns the private data pointer, or an ERR_PTR. -+ * The private data pointer should _only_ be dereferenced if the caller is the -+ * owner of the data, or its content could vanish. This is mostly used to -+ * confirm that a caller is the owner of a registered probe. -+ */ -+void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ u32 hash = jhash(name, name_len-1, 0); -+ int i; -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ if (!e->ptype) { -+ if (num == 0 && e->single.func == probe) -+ return e->single.probe_private; -+ else -+ break; -+ } else { -+ struct marker_probe_closure *closure; -+ int match = 0; -+ closure = e->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func != probe) -+ continue; -+ if (match++ == num) -+ return closure[i].probe_private; -+ } -+ } -+ } -+ } -+ return ERR_PTR(-ENOENT); -+} -+EXPORT_SYMBOL_GPL(marker_get_private_data); -diff --git a/kernel/module.c b/kernel/module.c -index f9a5987..ac99222 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -1488,6 +1488,8 @@ static struct module *load_module(void __user *umod, - unsigned int unusedcrcindex; - unsigned int unusedgplindex; - unsigned int unusedgplcrcindex; -+ unsigned int markersindex; -+ unsigned int markersstringsindex; - struct module *mod; - long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ -@@ -1756,6 +1758,9 @@ static struct module *load_module(void __user *umod, - add_taint(TAINT_FORCED_MODULE); - } - #endif -+ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); -+ markersstringsindex = find_sec(hdr, sechdrs, secstrings, -+ "__markers_strings"); - - /* Now do relocations. */ - for (i = 1; i < hdr->e_shnum; i++) { -@@ -1778,6 +1783,11 @@ static struct module *load_module(void __user *umod, - if (err < 0) - goto cleanup; - } -+#ifdef CONFIG_MARKERS -+ mod->markers = (void *)sechdrs[markersindex].sh_addr; -+ mod->num_markers = -+ sechdrs[markersindex].sh_size / sizeof(*mod->markers); -+#endif - - /* Find duplicate symbols */ - err = verify_export_symbols(mod); -@@ -1796,6 +1806,11 @@ static struct module *load_module(void __user *umod, - - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); - -+#ifdef CONFIG_MARKERS -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+#endif - err = module_finalize(hdr, sechdrs, mod); - if (err < 0) - goto cleanup; -@@ -2259,3 +2274,18 @@ EXPORT_SYMBOL(module_remove_driver); - void struct_module(struct module *mod) { return; } - EXPORT_SYMBOL(struct_module); - #endif -+ -+#ifdef CONFIG_MARKERS -+void module_update_markers(void) -+{ -+ struct module *mod; -+ -+ mutex_lock(&module_mutex); -+ list_for_each_entry(mod, &modules, list) -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+ mutex_unlock(&module_mutex); -+} -+#endif -+ -diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost -index 0a64688..886b70c 100644 ---- a/scripts/Makefile.modpost -+++ b/scripts/Makefile.modpost -@@ -13,6 +13,7 @@ - # 2) modpost is then used to - # 3) create one .mod.c file pr. module - # 4) create one Module.symvers file with CRC for all exported symbols -+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers - # 5) compile all .mod.c files - # 6) final link of the module to a file - -@@ -41,6 +42,10 @@ include scripts/Makefile.lib - - kernelsymfile := $(objtree)/Module.symvers - modulesymfile := $(KBUILD_EXTMOD)/Module.symvers -+kernelmarkersfile := $(objtree)/Module.markers -+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers -+ -+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) - - # Step 1), find all modules listed in $(MODVERDIR)/ - __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) -@@ -58,16 +63,27 @@ quiet_cmd_modpost = MODPOST - $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ - $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ - $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ -+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ -+ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ - $(filter-out FORCE,$^) - - PHONY += __modpost - __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE - $(call cmd,modpost) - -+quiet_cmd_kernel-mod = MODPOST $@ -+ cmd_kernel-mod = $(cmd_modpost) $@ -+ -+vmlinux.o: FORCE -+ $(call cmd,kernel-mod) -+ - # Declare generated files as targets for modpost - $(symverfile): __modpost ; - $(modules:.ko=.mod.c): __modpost ; - -+ifdef CONFIG_MARKERS -+$(markersfile): __modpost ; -+endif - - # Step 5), compile all *.mod.c files - -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index dfde0e8..81e5910 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -10,7 +10,8 @@ - * - * Usage: modpost vmlinux module1.o module2.o ... - */ -- -+#define _GNU_SOURCE -+#include - #include - #include "modpost.h" - #include "../../include/linux/license.h" -@@ -385,6 +386,8 @@ static void parse_elf(struct elf_info *info, const char *filename) - info->export_unused_gpl_sec = i; - else if (strcmp(secname, "__ksymtab_gpl_future") == 0) - info->export_gpl_future_sec = i; -+ else if (strcmp(secname, "__markers_strings") == 0) -+ info->markers_strings_sec = i; - - if (sechdrs[i].sh_type != SHT_SYMTAB) - continue; -@@ -859,6 +862,62 @@ static void check_sec_ref(struct module *mod, const char *modname, - } - } - -+static void get_markers(struct elf_info *info, struct module *mod) -+{ -+ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; -+ const char *strings = (const char *) info->hdr + sh->sh_offset; -+ const Elf_Sym *sym, *first_sym, *last_sym; -+ size_t n; -+ -+ if (!info->markers_strings_sec) -+ return; -+ -+ /* -+ * First count the strings. We look for all the symbols defined -+ * in the __markers_strings section named __mstrtab_*. For -+ * these local names, the compiler puts a random .NNN suffix on, -+ * so the names don't correspond exactly. -+ */ -+ first_sym = last_sym = NULL; -+ n = 0; -+ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ if (first_sym == NULL) -+ first_sym = sym; -+ last_sym = sym; -+ ++n; -+ } -+ -+ if (n == 0) -+ return; -+ -+ /* -+ * Now collect each name and format into a line for the output. -+ * Lines look like: -+ * marker_name vmlinux marker %s format %d -+ * The format string after the second \t can use whitespace. -+ */ -+ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); -+ mod->nmarkers = n; -+ -+ n = 0; -+ for (sym = first_sym; sym <= last_sym; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ const char *name = strings + sym->st_value; -+ const char *fmt = strchr(name, '\0') + 1; -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ mod->markers[n++] = line; -+ } -+} -+ - /** - * Functions used only during module init is marked __init and is stored in - * a .init.text section. Likewise data is marked __initdata and stored in -@@ -1061,6 +1120,8 @@ static void read_symbols(char *modname) - get_src_version(modname, mod->srcversion, - sizeof(mod->srcversion)-1); - -+ get_markers(&info, mod); -+ - parse_elf_finish(&info); - - /* Our trick to get versioning for struct_module - it's -@@ -1394,6 +1455,93 @@ static void write_dump(const char *fname) - write_if_changed(&buf, fname); - } - -+static void add_marker(struct module *mod, const char *name, const char *fmt) -+{ -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ -+ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * -+ sizeof mod->markers[0]))); -+ mod->markers[mod->nmarkers++] = line; -+} -+ -+static void read_markers(const char *fname) -+{ -+ unsigned long size, pos = 0; -+ void *file = grab_file(fname, &size); -+ char *line; -+ -+ if (!file) /* No old markers, silently ignore */ -+ return; -+ -+ while ((line = get_next_line(&pos, file, size))) { -+ char *marker, *modname, *fmt; -+ struct module *mod; -+ -+ marker = line; -+ modname = strchr(marker, '\t'); -+ if (!modname) -+ goto fail; -+ *modname++ = '\0'; -+ fmt = strchr(modname, '\t'); -+ if (!fmt) -+ goto fail; -+ *fmt++ = '\0'; -+ if (*marker == '\0' || *modname == '\0') -+ goto fail; -+ -+ mod = find_module(modname); -+ if (!mod) { -+ if (is_vmlinux(modname)) -+ have_vmlinux = 1; -+ mod = new_module(NOFAIL(strdup(modname))); -+ mod->skip = 1; -+ } -+ -+ add_marker(mod, marker, fmt); -+ } -+ return; -+fail: -+ fatal("parse error in markers list file\n"); -+} -+ -+static int compare_strings(const void *a, const void *b) -+{ -+ return strcmp(*(const char **) a, *(const char **) b); -+} -+ -+static void write_markers(const char *fname) -+{ -+ struct buffer buf = { }; -+ struct module *mod; -+ size_t i; -+ -+ for (mod = modules; mod; mod = mod->next) -+ if ((!external_module || !mod->skip) && mod->markers != NULL) { -+ /* -+ * Sort the strings so we can skip duplicates when -+ * we write them out. -+ */ -+ qsort(mod->markers, mod->nmarkers, -+ sizeof mod->markers[0], &compare_strings); -+ for (i = 0; i < mod->nmarkers; ++i) { -+ char *line = mod->markers[i]; -+ buf_write(&buf, line, strlen(line)); -+ while (i + 1 < mod->nmarkers && -+ !strcmp(mod->markers[i], -+ mod->markers[i + 1])) -+ free(mod->markers[i++]); -+ free(mod->markers[i]); -+ } -+ free(mod->markers); -+ mod->markers = NULL; -+ } -+ -+ write_if_changed(&buf, fname); -+} -+ -+ - int main(int argc, char **argv) - { - struct module *mod; -@@ -1402,8 +1550,10 @@ int main(int argc, char **argv) - char *kernel_read = NULL, *module_read = NULL; - char *dump_write = NULL; - int opt; -+ char *markers_read = NULL; -+ char *markers_write = NULL; - -- while ((opt = getopt(argc, argv, "i:I:mo:a")) != -1) { -+ while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) { - switch(opt) { - case 'i': - kernel_read = optarg; -@@ -1421,6 +1571,12 @@ int main(int argc, char **argv) - case 'a': - all_versions = 1; - break; -+ case 'M': -+ markers_write = optarg; -+ break; -+ case 'K': -+ markers_read = optarg; -+ break; - default: - exit(1); - } -@@ -1460,5 +1616,11 @@ int main(int argc, char **argv) - if (dump_write) - write_dump(dump_write); - -+ if (markers_read) -+ read_markers(markers_read); -+ -+ if (markers_write) -+ write_markers(markers_write); -+ - return 0; - } -diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h -index d398c61..27b05e6 100644 ---- a/scripts/mod/modpost.h -+++ b/scripts/mod/modpost.h -@@ -107,6 +107,8 @@ struct module { - int has_init; - int has_cleanup; - struct buffer dev_table_buf; -+ char **markers; -+ size_t nmarkers; - char srcversion[25]; - }; - -@@ -121,6 +123,7 @@ struct elf_info { - Elf_Section export_gpl_sec; - Elf_Section export_unused_gpl_sec; - Elf_Section export_gpl_future_sec; -+ Elf_Section markers_strings_sec; - const char *strtab; - char *modinfo; - unsigned int modinfo_len; Deleted: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch 2008-07-04 02:18:23 UTC (rev 6) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch 2008-07-04 02:22:00 UTC (rev 7) @@ -1,1882 +0,0 @@ -diff --git a/Makefile b/Makefile -index 93bffdf..59693ac 100644 ---- a/Makefile -+++ b/Makefile -@@ -603,7 +603,7 @@ quiet_cmd_vmlinux__ ?= LD $@ - cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ - -T $(vmlinux-lds) $(vmlinux-init) \ - --start-group $(vmlinux-main) --end-group \ -- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) -+ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) - - # Generate new vmlinux version - quiet_cmd_vmlinux_version = GEN .version -@@ -727,11 +727,30 @@ debug_kallsyms: .tmp_map$(last_kallsyms) - - endif # ifdef CONFIG_KALLSYMS - -+# Do modpost on a prelinked vmlinux. The finally linked vmlinux has -+# relevant sections renamed as per the linker script. -+quiet_cmd_vmlinux-modpost = LD $@ -+ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ -+ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ -+ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) -+define rule_vmlinux-modpost -+ : -+ +$(call cmd,vmlinux-modpost) -+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ -+ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd -+endef -+ - # vmlinux image - including updated kernel symbols --vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE -+vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE -+ $(call vmlinux-modpost) - $(call if_changed_rule,vmlinux__) - $(Q)rm -f .old_version - -+# build vmlinux.o first to catch section mismatch errors early -+$(kallsyms.o): vmlinux.o -+vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE -+ $(call if_changed_rule,vmlinux-modpost) -+ - # The actual objects are generated when descending, - # make sure no implicit rule kicks in - $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; -diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig -index d04fcc6..ad73af5 100644 ---- a/arch/i386/Kconfig -+++ b/arch/i386/Kconfig -@@ -1212,6 +1212,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/i386/Kconfig.debug" -diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S -index 7f4ca6b..3978095 100644 ---- a/arch/i386/kernel/vmlinux.lds.S -+++ b/arch/i386/kernel/vmlinux.lds.S -@@ -67,6 +67,7 @@ SECTIONS - . = ALIGN(4096); - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } :data - -diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig -index f916fba..b81a36b 100644 ---- a/arch/ia64/Kconfig -+++ b/arch/ia64/Kconfig -@@ -591,6 +591,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/ia64/Kconfig.debug" -diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S -index 69cba94..2c0a5c9 100644 ---- a/arch/ia64/kernel/vmlinux.lds.S -+++ b/arch/ia64/kernel/vmlinux.lds.S -@@ -207,7 +207,7 @@ SECTIONS - - data : { } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) -- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } -+ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } - - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) -diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig -index 7295252..fb8a2b9 100644 ---- a/arch/powerpc/Kconfig -+++ b/arch/powerpc/Kconfig -@@ -1091,6 +1091,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/powerpc/Kconfig.debug" -diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S -index c02298a..709bc47 100644 ---- a/arch/powerpc/kernel/vmlinux.lds.S -+++ b/arch/powerpc/kernel/vmlinux.lds.S -@@ -176,11 +176,13 @@ SECTIONS - *(.data) - *(.sdata) - *(.got.plt) *(.got) -+ MARKER - } - #else - .data : { - *(.data .data.rel* .toc1) - *(.branch_lt) -+ MARKER - } - - .opd : { -diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig -index d121403..d8d276a 100644 ---- a/arch/s390/Kconfig -+++ b/arch/s390/Kconfig -@@ -503,6 +503,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/s390/Kconfig.debug" -diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S -index af9e69a..8443156 100644 ---- a/arch/s390/kernel/vmlinux.lds.S -+++ b/arch/s390/kernel/vmlinux.lds.S -@@ -46,6 +46,7 @@ SECTIONS - - .data : { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } - -diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig -index b627f8d..e0467d1 100644 ---- a/arch/sparc64/Kconfig -+++ b/arch/sparc64/Kconfig -@@ -427,6 +427,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/sparc64/Kconfig.debug" -diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S -index b097379..1f10e43 100644 ---- a/arch/sparc64/kernel/vmlinux.lds.S -+++ b/arch/sparc64/kernel/vmlinux.lds.S -@@ -27,6 +27,7 @@ SECTIONS - .data : - { - *(.data) -+ MARKER - CONSTRUCTORS - } - .data1 : { *(.data1) } -diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig -index 651d6cb..0e6570c 100644 ---- a/arch/x86_64/Kconfig -+++ b/arch/x86_64/Kconfig -@@ -711,6 +711,14 @@ config KPROBES - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". -+ -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - endmenu - - source "arch/x86_64/Kconfig.debug" -diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S -index 57e3255..867fce1 100644 ---- a/arch/x86_64/kernel/vmlinux.lds.S -+++ b/arch/x86_64/kernel/vmlinux.lds.S -@@ -68,6 +68,7 @@ SECTIONS - /* Data */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { - *(.data) -+ MARKER - CONSTRUCTORS - } :data - -diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c -index 3fb354d..e884942 100644 ---- a/drivers/net/bonding/bond_3ad.c -+++ b/drivers/net/bonding/bond_3ad.c -@@ -126,7 +126,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); - - // ================= main 802.3ad protocol functions ================== - static int ad_lacpdu_send(struct port *port); --static int ad_marker_send(struct port *port, struct marker *marker); -+static int ad_marker_send(struct port *port, struct bond_marker *marker); - static void ad_mux_machine(struct port *port); - static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); - static void ad_tx_machine(struct port *port); -@@ -139,8 +139,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); - static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); - static void ad_enable_collecting_distributing(struct port *port); - static void ad_disable_collecting_distributing(struct port *port); --static void ad_marker_info_received(struct marker *marker_info, struct port *port); --static void ad_marker_response_received(struct marker *marker, struct port *port); -+static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); -+static void ad_marker_response_received(struct bond_marker *marker, struct port *port); - - - ///////////////////////////////////////////////////////////////////////////////// -@@ -912,12 +912,13 @@ static int ad_lacpdu_send(struct port *port) - * Returns: 0 on success - * < 0 on error - */ --static int ad_marker_send(struct port *port, struct marker *marker) -+static int ad_marker_send(struct port *port, struct bond_marker *marker) - { - struct slave *slave = port->slave; - struct sk_buff *skb; -- struct marker_header *marker_header; -- int length = sizeof(struct marker_header); -+ struct bond_marker_header *marker_header; -+ int length = sizeof(struct bond_marker_header); -+ - struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; - - skb = dev_alloc_skb(length + 16); -@@ -932,7 +933,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) - skb->nh.raw = skb->data + ETH_HLEN; - skb->protocol = PKT_TYPE_LACPDU; - -- marker_header = (struct marker_header *)skb_put(skb, length); -+ marker_header = (struct bond_marker_header *)skb_put(skb, length); - - marker_header->ad_header.destination_address = lacpdu_multicast_address; - /* Note: source addres is set to be the member's PERMANENT address, because we use it -@@ -1732,7 +1733,7 @@ static void ad_disable_collecting_distributing(struct port *port) - */ - static void ad_marker_info_send(struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - u16 index; - - // fill the marker PDU with the appropriate values -@@ -1765,13 +1766,14 @@ static void ad_marker_info_send(struct port *port) - * @port: the port we're looking at - * - */ --static void ad_marker_info_received(struct marker *marker_info,struct port *port) -+static void ad_marker_info_received(struct bond_marker *marker_info, -+ struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - - // copy the received marker data to the response marker - //marker = *marker_info; -- memcpy(&marker, marker_info, sizeof(struct marker)); -+ memcpy(&marker, marker_info, sizeof(struct bond_marker)); - // change the marker subtype to marker response - marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; - // send the marker response -@@ -1790,7 +1792,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port - * response for marker PDU's, in this stage, but only to respond to marker - * information. - */ --static void ad_marker_response_received(struct marker *marker, struct port *port) -+static void ad_marker_response_received(struct bond_marker *marker, -+ struct port *port) - { - marker=NULL; // just to satisfy the compiler - port=NULL; // just to satisfy the compiler -@@ -2187,15 +2190,16 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u - case AD_TYPE_MARKER: - // No need to convert fields to Little Endian since we don't use the marker's fields. - -- switch (((struct marker *)lacpdu)->tlv_type) { -+ switch (((struct bond_marker *)lacpdu)->tlv_type) { - case AD_MARKER_INFORMATION_SUBTYPE: - dprintk("Received Marker Information on port %d\n", port->actor_port_number); -- ad_marker_info_received((struct marker *)lacpdu, port); -+ ad_marker_info_received((struct bond_marker *)lacpdu, port); -+ - break; - - case AD_MARKER_RESPONSE_SUBTYPE: - dprintk("Received Marker Response on port %d\n", port->actor_port_number); -- ad_marker_response_received((struct marker *)lacpdu, port); -+ ad_marker_response_received((struct bond_marker *)lacpdu, port); - break; - - default: -diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h -index 6ad5ad6..bf93e7e 100644 ---- a/drivers/net/bonding/bond_3ad.h -+++ b/drivers/net/bonding/bond_3ad.h -@@ -92,7 +92,8 @@ typedef enum { - typedef enum { - AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype - AD_MARKER_RESPONSE_SUBTYPE // marker response subtype --} marker_subtype_t; -+} bond_marker_subtype_t; -+ - - // timers types(43.4.9 in the 802.3ad standard) - typedef enum { -@@ -148,7 +149,7 @@ typedef struct lacpdu_header { - } lacpdu_header_t; - - // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) --typedef struct marker { -+typedef struct bond_marker { - u8 subtype; // = 0x02 (marker PDU) - u8 version_number; // = 0x01 - u8 tlv_type; // = 0x01 (marker information) -@@ -161,12 +162,12 @@ typedef struct marker { - u8 tlv_type_terminator; // = 0x00 - u8 terminator_length; // = 0x00 - u8 reserved_90[90]; // = 0 --} marker_t; -+} bond_marker_t; - --typedef struct marker_header { -+typedef struct bond_marker_header { - struct ad_header ad_header; -- struct marker marker; --} marker_header_t; -+ struct bond_marker marker; -+} bond_marker_header_t; - - #pragma pack() - -diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h -index 8c6f9b6..efc3d79 100644 ---- a/drivers/scsi/qla4xxx/ql4_fw.h -+++ b/drivers/scsi/qla4xxx/ql4_fw.h -@@ -671,7 +671,7 @@ struct continuation_t1_entry { - #define ET_CONTINUE ET_CONT_T1 - - /* Marker entry structure*/ --struct marker_entry { -+struct qla4_marker_entry { - struct qla4_header hdr; /* 00-03 */ - - uint32_t system_defined; /* 04-07 */ -diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c -index d45e4d0..b85a484 100644 ---- a/drivers/scsi/qla4xxx/ql4_iocb.c -+++ b/drivers/scsi/qla4xxx/ql4_iocb.c -@@ -70,7 +70,8 @@ int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, - int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha, - struct ddb_entry *ddb_entry, int lun) - { -- struct marker_entry *marker_entry; -+ struct qla4_marker_entry *marker_entry; -+ - unsigned long flags = 0; - uint8_t status = QLA_SUCCESS; - -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index b9e964c..a175f1e 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -9,12 +9,21 @@ - /* Align . to a 8 byte boundary equals to maximum function alignment. */ - #define ALIGN_FUNCTION() . = ALIGN(8) - -+/* Kernel markers : pointers */ -+#define MARKER \ -+ . = ALIGN(8); \ -+ VMLINUX_SYMBOL(__start___markers) = .; \ -+ *(__markers) \ -+ VMLINUX_SYMBOL(__stop___markers) = .; -+ -+ - #define RODATA \ - . = ALIGN(4096); \ - .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__start_rodata) = .; \ - *(.rodata) *(.rodata.*) \ - *(__vermagic) /* Kernel version magic */ \ -+ *(__markers_strings) /* Markers: strings */ \ - } \ - \ - .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ -diff --git a/include/linux/marker.h b/include/linux/marker.h -new file mode 100644 -index 0000000..efbc82b ---- /dev/null -+++ b/include/linux/marker.h -@@ -0,0 +1,139 @@ -+#ifndef _LINUX_MARKER_H -+#define _LINUX_MARKER_H -+ -+/* -+ * Code markup for dynamic and static tracing. -+ * -+ * See Documentation/marker.txt. -+ * -+ * (C) Copyright 2006 Mathieu Desnoyers -+ * -+ * This file is released under the GPLv2. -+ * See the file COPYING for more details. -+ */ -+ -+#include -+ -+struct module; -+struct marker; -+ -+/** -+ * marker_probe_func - Type of a marker probe function -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @args: variable argument list pointer. Use a pointer to overcome C's -+ * inability to pass this around as a pointer in a portable manner in -+ * the callee otherwise. -+ * -+ * Type of marker probe functions. They receive the mdata and need to parse the -+ * format string to recover the variable argument list. -+ */ -+typedef void marker_probe_func(void *probe_private, void *call_private, -+ const char *fmt, va_list *args); -+ -+struct marker_probe_closure { -+ marker_probe_func *func; /* Callback */ -+ void *probe_private; /* Private probe data */ -+}; -+ -+struct marker { -+ const char *name; /* Marker name */ -+ const char *format; /* Marker format string, describing the -+ * variable argument list. -+ */ -+ char state; /* Marker state. */ -+ char ptype; /* probe type : 0 : single, 1 : multi */ -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+} __attribute__((aligned(8))); -+ -+#ifdef CONFIG_MARKERS -+ -+/* -+ * Note : the empty asm volatile with read constraint is used here instead of a -+ * "used" attribute to fix a gcc 4.1.x bug. -+ * Make sure the alignment of the structure in the __markers section will -+ * not add unwanted padding between the beginning of the section and the -+ * structure. Force alignment to the same alignment as the section start. -+ */ -+#define __trace_mark(name, call_private, format, args...) \ -+ do { \ -+ static const char __mstrtab_##name[] \ -+ __attribute__((section("__markers_strings"))) \ -+ = #name "\0" format; \ -+ static struct marker __mark_##name \ -+ __attribute__((section("__markers"), aligned(8))) = \ -+ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ -+ 0, 0, marker_probe_cb, \ -+ { __mark_empty_function, NULL}, NULL }; \ -+ __mark_check_format(format, ## args); \ -+ if (unlikely(__mark_##name.state)) { \ -+ (*__mark_##name.call) \ -+ (&__mark_##name, call_private, \ -+ format, ## args); \ -+ } \ -+ } while (0) -+ -+extern void marker_update_probe_range(struct marker *begin, -+ struct marker *end); -+#else /* !CONFIG_MARKERS */ -+#define __trace_mark(name, call_private, format, args...) \ -+ __mark_check_format(format, ## args) -+static inline void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ } -+#endif /* CONFIG_MARKERS */ -+ -+/** -+ * trace_mark - Marker -+ * @name: marker name, not quoted. -+ * @format: format string -+ * @args...: variable argument list -+ * -+ * Places a marker. -+ */ -+#define trace_mark(name, format, args...) \ -+ __trace_mark(name, NULL, format, ## args) -+ -+/** -+ * MARK_NOARGS - Format string for a marker with no argument. -+ */ -+#define MARK_NOARGS " " -+ -+/* To be used for string format validity checking with gcc */ -+static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) -+{ -+} -+ -+extern marker_probe_func __mark_empty_function; -+ -+extern void marker_probe_cb(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+extern void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+ -+/* -+ * Connect a probe to a marker. -+ * private data pointer must be a valid allocated memory address, or NULL. -+ */ -+extern int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private); -+ -+/* -+ * Returns the private data given to marker_probe_register. -+ */ -+extern int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private); -+/* -+ * Unregister a marker by providing the registered private data. -+ */ -+extern int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private); -+ -+extern void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num); -+ -+#endif -diff --git a/include/linux/module.h b/include/linux/module.h -index 0460634..de5b51e 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -353,6 +354,10 @@ struct module - /* The command line arguments (may be mangled). People like - keeping pointers to this stuff */ - char *args; -+#ifdef CONFIG_MARKERS -+ struct marker *markers; -+ unsigned int num_markers; -+#endif - }; - - /* FIXME: It'd be nice to isolate modules during init, too, so they -@@ -472,6 +477,7 @@ int register_module_notifier(struct notifier_block * nb); - int unregister_module_notifier(struct notifier_block * nb); - - extern void print_modules(void); -+extern void module_update_markers(void); - - struct device_driver; - void module_add_driver(struct module *, struct device_driver *); -@@ -574,6 +580,10 @@ static inline void print_modules(void) - { - } - -+static inline void module_update_markers(void) -+{ -+} -+ - struct device_driver; - struct module; - -diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h -index b4ca73d..5f12d1b 100644 ---- a/include/linux/rcupdate.h -+++ b/include/linux/rcupdate.h -@@ -42,6 +42,19 @@ - #include - #include - -+/* -+ * Prevent the compiler from merging or refetching accesses. The compiler -+ * is also forbidden from reordering successive instances of ACCESS_ONCE(), -+ * but only when the compiler is aware of some particular ordering. One way -+ * to make the compiler aware of ordering is to put the two invocations of -+ * ACCESS_ONCE() in different C statements. -+ * -+ * This macro does absolutely -nothing- to prevent the CPU from reordering, -+ * merging, or refetching absolutely anything at any time. -+ * -+ */ -+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) -+ - /** - * struct rcu_head - callback structure for use with RCU - * @next: next update requests in a list -diff --git a/kernel/Makefile b/kernel/Makefile -index ef832fa..5879e8b 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -50,6 +50,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o - obj-$(CONFIG_RELAY) += relay.o - obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o - obj-$(CONFIG_TASKSTATS) += taskstats.o -+obj-$(CONFIG_MARKERS) += marker.o - obj-$(CONFIG_UTRACE) += utrace.o - obj-$(CONFIG_PTRACE) += ptrace.o - -diff --git a/kernel/marker.c b/kernel/marker.c -new file mode 100644 -index 0000000..c4c2cd8 ---- /dev/null -+++ b/kernel/marker.c -@@ -0,0 +1,851 @@ -+/* -+ * Copyright (C) 2007 Mathieu Desnoyers -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+extern struct marker __start___markers[]; -+extern struct marker __stop___markers[]; -+ -+/* Set to 1 to enable marker debug output */ -+const int marker_debug; -+ -+/* -+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin -+ * and module markers and the hash table. -+ */ -+static DEFINE_MUTEX(markers_mutex); -+ -+/* -+ * Marker hash table, containing the active markers. -+ * Protected by module_mutex. -+ */ -+#define MARKER_HASH_BITS 6 -+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -+ -+/* -+ * Note about RCU : -+ * It is used to make sure every handler has finished using its private data -+ * between two consecutive operation (add or remove) on a given marker. It is -+ * also used to delay the free of multiple probes array until a quiescent state -+ * is reached. -+ * marker entries modifications are protected by the markers_mutex. -+ */ -+struct marker_entry { -+ struct hlist_node hlist; -+ char *format; -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+ int refcount; /* Number of times armed. 0 if disarmed. */ -+ struct rcu_head rcu; -+ void *oldptr; -+ char rcu_pending:1; -+ char ptype:1; -+ char name[0]; /* Contains name'\0'format'\0' */ -+}; -+ -+static struct hlist_head marker_table[MARKER_TABLE_SIZE]; -+ -+/** -+ * __mark_empty_function - Empty probe callback -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @...: variable argument list -+ * -+ * Empty callback provided as a probe to the markers. By providing this to a -+ * disabled marker, we make sure the execution flow is always valid even -+ * though the function pointer change and the marker enabling are two distinct -+ * operations that modifies the execution flow of preemptible code. -+ */ -+void __mark_empty_function(void *probe_private, void *call_private, -+ const char *fmt, va_list *args) -+{ -+} -+EXPORT_SYMBOL_GPL(__mark_empty_function); -+ -+/* -+ * marker_probe_cb Callback that prepares the variable argument list for probes. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we -+ * need to put a full smp_rmb() in this branch. This is why we do not use -+ * rcu_dereference() for the pointer read. -+ */ -+void marker_probe_cb(const struct marker *mdata, void *call_private, -+ const char *fmt, ...) -+{ -+ va_list args; -+ char ptype; -+ -+ /* -+ * disabling preemption to make sure the teardown of the callbacks can -+ * be done correctly when they are in modules and they insure RCU read -+ * coherency. -+ */ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ va_start(args, fmt); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ va_end(args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) { -+ va_start(args, fmt); -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ va_end(args); -+ } -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb); -+ -+/* -+ * marker_probe_cb Callback that does not prepare the variable argument list. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Should be connected to markers "MARK_NOARGS". -+ */ -+void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...) -+{ -+ va_list args; /* not initialized */ -+ char ptype; -+ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); -+ -+static void free_old_closure(struct rcu_head *head) -+{ -+ struct marker_entry *entry = container_of(head, -+ struct marker_entry, rcu); -+ kfree(entry->oldptr); -+ /* Make sure we free the data before setting the pending flag to 0 */ -+ smp_wmb(); -+ entry->rcu_pending = 0; -+} -+ -+static void debug_print_probes(struct marker_entry *entry) -+{ -+ int i; -+ -+ if (!marker_debug) -+ return; -+ -+ if (!entry->ptype) { -+ printk(KERN_DEBUG "Single probe : %p %p\n", -+ entry->single.func, -+ entry->single.probe_private); -+ } else { -+ for (i = 0; entry->multi[i].func; i++) -+ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, -+ entry->multi[i].func, -+ entry->multi[i].probe_private); -+ } -+} -+ -+static struct marker_probe_closure * -+marker_entry_add_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0; -+ struct marker_probe_closure *old, *new; -+ -+ WARN_ON(!probe); -+ -+ debug_print_probes(entry); -+ old = entry->multi; -+ if (!entry->ptype) { -+ if (entry->single.func == probe && -+ entry->single.probe_private == probe_private) -+ return ERR_PTR(-EBUSY); -+ if (entry->single.func == __mark_empty_function) { -+ /* 0 -> 1 probes */ -+ entry->single.func = probe; -+ entry->single.probe_private = probe_private; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* 1 -> 2 probes */ -+ nr_probes = 1; -+ old = NULL; -+ } -+ } else { -+ /* (N -> N+1), (N != 0, 1) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) -+ if (old[nr_probes].func == probe -+ && old[nr_probes].probe_private -+ == probe_private) -+ return ERR_PTR(-EBUSY); -+ } -+ /* + 2 : one for new probe, one for NULL func */ -+ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), -+ GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ if (!old) -+ new[0] = entry->single; -+ else -+ memcpy(new, old, -+ nr_probes * sizeof(struct marker_probe_closure)); -+ new[nr_probes].func = probe; -+ new[nr_probes].probe_private = probe_private; -+ entry->refcount = nr_probes + 1; -+ entry->multi = new; -+ entry->ptype = 1; -+ debug_print_probes(entry); -+ return old; -+} -+ -+static struct marker_probe_closure * -+marker_entry_remove_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0, nr_del = 0, i; -+ struct marker_probe_closure *old, *new; -+ -+ old = entry->multi; -+ -+ debug_print_probes(entry); -+ if (!entry->ptype) { -+ /* 0 -> N is an error */ -+ WARN_ON(entry->single.func == __mark_empty_function); -+ /* 1 -> 0 probes */ -+ WARN_ON(probe && entry->single.func != probe); -+ WARN_ON(entry->single.probe_private != probe_private); -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* (N -> M), (N > 1, M >= 0) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { -+ if ((!probe || old[nr_probes].func == probe) -+ && old[nr_probes].probe_private -+ == probe_private) -+ nr_del++; -+ } -+ } -+ -+ if (nr_probes - nr_del == 0) { -+ /* N -> 0, (N > 1) */ -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ } else if (nr_probes - nr_del == 1) { -+ /* N -> 1, (N > 1) */ -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ entry->single = old[i]; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ } else { -+ int j = 0; -+ /* N -> M, (N > 1, M > 1) */ -+ /* + 1 for NULL */ -+ new = kzalloc((nr_probes - nr_del + 1) -+ * sizeof(struct marker_probe_closure), GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ new[j++] = old[i]; -+ entry->refcount = nr_probes - nr_del; -+ entry->ptype = 1; -+ entry->multi = new; -+ } -+ debug_print_probes(entry); -+ return old; -+} -+ -+/* -+ * Get marker if the marker is present in the marker hash table. -+ * Must be called with markers_mutex held. -+ * Returns NULL if not present. -+ */ -+static struct marker_entry *get_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ u32 hash = jhash(name, strlen(name), 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) -+ return e; -+ } -+ return NULL; -+} -+ -+/* -+ * Add the marker to the marker hash table. Must be called with markers_mutex -+ * held. -+ */ -+static struct marker_entry *add_marker(const char *name, const char *format) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ size_t format_len = 0; -+ u32 hash = jhash(name, name_len-1, 0); -+ -+ if (format) -+ format_len = strlen(format) + 1; -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ printk(KERN_NOTICE -+ "Marker %s busy\n", name); -+ return ERR_PTR(-EBUSY); /* Already there */ -+ } -+ } -+ /* -+ * Using kmalloc here to allocate a variable length element. Could -+ * cause some memory fragmentation if overused. -+ */ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return ERR_PTR(-ENOMEM); -+ memcpy(&e->name[0], name, name_len); -+ if (format) { -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ } else { -+ e->format = NULL; -+ e->call = marker_probe_cb; -+ } -+ e->single.func = __mark_empty_function; -+ e->single.probe_private = NULL; -+ e->multi = NULL; -+ e->ptype = 0; -+ e->refcount = 0; -+ e->rcu_pending = 0; -+ hlist_add_head(&e->hlist, head); -+ return e; -+} -+ -+/* -+ * Remove the marker from the marker hash table. Must be called with mutex_lock -+ * held. -+ */ -+static int remove_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ int found = 0; -+ size_t len = strlen(name) + 1; -+ u32 hash = jhash(name, len-1, 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ found = 1; -+ break; -+ } -+ } -+ if (!found) -+ return -ENOENT; -+ if (e->single.func != __mark_empty_function) -+ return -EBUSY; -+ hlist_del(&e->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if (e->rcu_pending) -+ rcu_barrier(); -+ kfree(e); -+ return 0; -+} -+ -+/* -+ * Set the mark_entry format to the format found in the element. -+ */ -+static int marker_set_format(struct marker_entry **entry, const char *format) -+{ -+ struct marker_entry *e; -+ size_t name_len = strlen((*entry)->name) + 1; -+ size_t format_len = strlen(format) + 1; -+ -+ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return -ENOMEM; -+ memcpy(&e->name[0], (*entry)->name, name_len); -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ e->single = (*entry)->single; -+ e->multi = (*entry)->multi; -+ e->ptype = (*entry)->ptype; -+ e->refcount = (*entry)->refcount; -+ e->rcu_pending = 0; -+ hlist_add_before(&e->hlist, &(*entry)->hlist); -+ hlist_del(&(*entry)->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if ((*entry)->rcu_pending) -+ rcu_barrier(); -+ kfree(*entry); -+ *entry = e; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ return 0; -+} -+ -+/* -+ * Sets the probe callback corresponding to one marker. -+ */ -+static int set_marker(struct marker_entry **entry, struct marker *elem, -+ int active) -+{ -+ int ret; -+ WARN_ON(strcmp((*entry)->name, elem->name) != 0); -+ -+ if ((*entry)->format) { -+ if (strcmp((*entry)->format, elem->format) != 0) { -+ printk(KERN_NOTICE -+ "Format mismatch for probe %s " -+ "(%s), marker (%s)\n", -+ (*entry)->name, -+ (*entry)->format, -+ elem->format); -+ return -EPERM; -+ } -+ } else { -+ ret = marker_set_format(entry, elem->format); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * probe_cb setup (statically known) is done here. It is -+ * asynchronous with the rest of execution, therefore we only -+ * pass from a "safe" callback (with argument) to an "unsafe" -+ * callback (does not set arguments). -+ */ -+ elem->call = (*entry)->call; -+ /* -+ * Sanity check : -+ * We only update the single probe private data when the ptr is -+ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) -+ */ -+ WARN_ON(elem->single.func != __mark_empty_function -+ && elem->single.probe_private -+ != (*entry)->single.probe_private && -+ !elem->ptype); -+ elem->single.probe_private = (*entry)->single.probe_private; -+ /* -+ * Make sure the private data is valid when we update the -+ * single probe ptr. -+ */ -+ smp_wmb(); -+ elem->single.func = (*entry)->single.func; -+ /* -+ * We also make sure that the new probe callbacks array is consistent -+ * before setting a pointer to it. -+ */ -+ rcu_assign_pointer(elem->multi, (*entry)->multi); -+ /* -+ * Update the function or multi probe array pointer before setting the -+ * ptype. -+ */ -+ smp_wmb(); -+ elem->ptype = (*entry)->ptype; -+ elem->state = active; -+ -+ return 0; -+} -+ -+/* -+ * Disable a marker and its probe callback. -+ * Note: only after a synchronize_sched() issued after setting elem->call to the -+ * empty function insures that the original callback is not used anymore. This -+ * insured by preemption disabling around the call site. -+ */ -+static void disable_marker(struct marker *elem) -+{ -+ /* leave "call" as is. It is known statically. */ -+ elem->state = 0; -+ elem->single.func = __mark_empty_function; -+ /* Update the function before setting the ptype */ -+ smp_wmb(); -+ elem->ptype = 0; /* single probe */ -+ /* -+ * Leave the private data and id there, because removal is racy and -+ * should be done only after a synchronize_sched(). These are never used -+ * until the next initialization anyway. -+ */ -+} -+ -+/** -+ * marker_update_probe_range - Update a probe range -+ * @begin: beginning of the range -+ * @end: end of the range -+ * -+ * Updates the probe callback corresponding to a range of markers. -+ */ -+void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ -+ struct marker *iter; -+ struct marker_entry *mark_entry; -+ -+ mutex_lock(&markers_mutex); -+ for (iter = begin; iter < end; iter++) { -+ mark_entry = get_marker(iter->name); -+ if (mark_entry) { -+ set_marker(&mark_entry, iter, -+ !!mark_entry->refcount); -+ /* -+ * ignore error, continue -+ */ -+ } else { -+ disable_marker(iter); -+ } -+ } -+ mutex_unlock(&markers_mutex); -+} -+ -+/* -+ * Update probes, removing the faulty probes. -+ * Issues a synchronize_sched() when no reference to the module passed -+ * as parameter is found in the probes so the probe module can be -+ * safely unloaded from now on. -+ * -+ * Internal callback only changed before the first probe is connected to it. -+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 -+ * transitions. All other transitions will leave the old private data valid. -+ * This makes the non-atomicity of the callback/private data updates valid. -+ * -+ * "special case" updates : -+ * 0 -> 1 callback -+ * 1 -> 0 callback -+ * 1 -> 2 callbacks -+ * 2 -> 1 callbacks -+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. -+ * Site effect : marker_set_format may delete the marker entry (creating a -+ * replacement). -+ */ -+static void marker_update_probes(void) -+{ -+ /* Core kernel markers */ -+ marker_update_probe_range(__start___markers, __stop___markers); -+ /* Markers in modules. */ -+ module_update_markers(); -+} -+ -+/** -+ * marker_probe_register - Connect a probe to a marker -+ * @name: marker name -+ * @format: format string -+ * @probe: probe handler -+ * @probe_private: probe private data -+ * -+ * private data must be a valid allocated memory address, or NULL. -+ * Returns 0 if ok, error value on error. -+ * The probe address must at least be aligned on the architecture pointer size. -+ */ -+int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ entry = add_marker(name, format); -+ if (IS_ERR(entry)) { -+ ret = PTR_ERR(entry); -+ goto end; -+ } -+ } -+ /* -+ * If we detect that a call_rcu is pending for this marker, -+ * make sure it's executed now. -+ */ -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_add_probe(entry, probe, probe_private); -+ if (IS_ERR(old)) { -+ ret = PTR_ERR(old); -+ goto end; -+ } -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_register); -+ -+/** -+ * marker_probe_unregister - Disconnect a probe from a marker -+ * @name: marker name -+ * @probe: probe function pointer -+ * @probe_private: probe private data -+ * -+ * Returns the private data given to marker_probe_register, or an ERR_PTR(). -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ struct marker_probe_closure *old; -+ int ret = 0; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, probe, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister); -+ -+static struct marker_entry * -+get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ unsigned int i; -+ struct hlist_head *head; -+ struct hlist_node *node; -+ -+ for (i = 0; i < MARKER_TABLE_SIZE; i++) { -+ head = &marker_table[i]; -+ hlist_for_each_entry(entry, node, head, hlist) { -+ if (!entry->ptype) { -+ if (entry->single.func == probe -+ && entry->single.probe_private -+ == probe_private) -+ return entry; -+ } else { -+ struct marker_probe_closure *closure; -+ closure = entry->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func == probe && -+ closure[i].probe_private -+ == probe_private) -+ return entry; -+ } -+ } -+ } -+ } -+ return NULL; -+} -+ -+/** -+ * marker_probe_unregister_private_data - Disconnect a probe from a marker -+ * @probe: probe function -+ * @probe_private: probe private data -+ * -+ * Unregister a probe by providing the registered private data. -+ * Only removes the first marker found in hash table. -+ * Return 0 on success or error value. -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, NULL, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(entry->name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); -+ -+/** -+ * marker_get_private_data - Get a marker's probe private data -+ * @name: marker name -+ * @probe: probe to match -+ * @num: get the nth matching probe's private data -+ * -+ * Returns the nth private data pointer (starting from 0) matching, or an -+ * ERR_PTR. -+ * Returns the private data pointer, or an ERR_PTR. -+ * The private data pointer should _only_ be dereferenced if the caller is the -+ * owner of the data, or its content could vanish. This is mostly used to -+ * confirm that a caller is the owner of a registered probe. -+ */ -+void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ u32 hash = jhash(name, name_len-1, 0); -+ int i; -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ if (!e->ptype) { -+ if (num == 0 && e->single.func == probe) -+ return e->single.probe_private; -+ else -+ break; -+ } else { -+ struct marker_probe_closure *closure; -+ int match = 0; -+ closure = e->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func != probe) -+ continue; -+ if (match++ == num) -+ return closure[i].probe_private; -+ } -+ } -+ } -+ } -+ return ERR_PTR(-ENOENT); -+} -+EXPORT_SYMBOL_GPL(marker_get_private_data); -diff --git a/kernel/module.c b/kernel/module.c -index f9a5987..ac99222 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -1488,6 +1488,8 @@ static struct module *load_module(void __user *umod, - unsigned int unusedcrcindex; - unsigned int unusedgplindex; - unsigned int unusedgplcrcindex; -+ unsigned int markersindex; -+ unsigned int markersstringsindex; - struct module *mod; - long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ -@@ -1756,6 +1758,9 @@ static struct module *load_module(void __user *umod, - add_taint(TAINT_FORCED_MODULE); - } - #endif -+ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); -+ markersstringsindex = find_sec(hdr, sechdrs, secstrings, -+ "__markers_strings"); - - /* Now do relocations. */ - for (i = 1; i < hdr->e_shnum; i++) { -@@ -1778,6 +1783,11 @@ static struct module *load_module(void __user *umod, - if (err < 0) - goto cleanup; - } -+#ifdef CONFIG_MARKERS -+ mod->markers = (void *)sechdrs[markersindex].sh_addr; -+ mod->num_markers = -+ sechdrs[markersindex].sh_size / sizeof(*mod->markers); -+#endif - - /* Find duplicate symbols */ - err = verify_export_symbols(mod); -@@ -1796,6 +1806,11 @@ static struct module *load_module(void __user *umod, - - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); - -+#ifdef CONFIG_MARKERS -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+#endif - err = module_finalize(hdr, sechdrs, mod); - if (err < 0) - goto cleanup; -@@ -2259,3 +2274,18 @@ EXPORT_SYMBOL(module_remove_driver); - void struct_module(struct module *mod) { return; } - EXPORT_SYMBOL(struct_module); - #endif -+ -+#ifdef CONFIG_MARKERS -+void module_update_markers(void) -+{ -+ struct module *mod; -+ -+ mutex_lock(&module_mutex); -+ list_for_each_entry(mod, &modules, list) -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+ mutex_unlock(&module_mutex); -+} -+#endif -+ -diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost -index 0a64688..886b70c 100644 ---- a/scripts/Makefile.modpost -+++ b/scripts/Makefile.modpost -@@ -13,6 +13,7 @@ - # 2) modpost is then used to - # 3) create one .mod.c file pr. module - # 4) create one Module.symvers file with CRC for all exported symbols -+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers - # 5) compile all .mod.c files - # 6) final link of the module to a file - -@@ -41,6 +42,10 @@ include scripts/Makefile.lib - - kernelsymfile := $(objtree)/Module.symvers - modulesymfile := $(KBUILD_EXTMOD)/Module.symvers -+kernelmarkersfile := $(objtree)/Module.markers -+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers -+ -+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) - - # Step 1), find all modules listed in $(MODVERDIR)/ - __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) -@@ -58,16 +63,27 @@ quiet_cmd_modpost = MODPOST - $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ - $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ - $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ -+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ -+ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ - $(filter-out FORCE,$^) - - PHONY += __modpost - __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE - $(call cmd,modpost) - -+quiet_cmd_kernel-mod = MODPOST $@ -+ cmd_kernel-mod = $(cmd_modpost) $@ -+ -+vmlinux.o: FORCE -+ $(call cmd,kernel-mod) -+ - # Declare generated files as targets for modpost - $(symverfile): __modpost ; - $(modules:.ko=.mod.c): __modpost ; - -+ifdef CONFIG_MARKERS -+$(markersfile): __modpost ; -+endif - - # Step 5), compile all *.mod.c files - -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index 0ec3321..383f310 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -10,7 +10,8 @@ - * - * Usage: modpost vmlinux module1.o module2.o ... - */ -- -+#define _GNU_SOURCE -+#include - #include - #include "modpost.h" - #include "../../include/linux/license.h" -@@ -385,6 +386,8 @@ static void parse_elf(struct elf_info *info, const char *filename) - info->export_unused_gpl_sec = i; - else if (strcmp(secname, "__ksymtab_gpl_future") == 0) - info->export_gpl_future_sec = i; -+ else if (strcmp(secname, "__markers_strings") == 0) -+ info->markers_strings_sec = i; - - if (sechdrs[i].sh_type != SHT_SYMTAB) - continue; -@@ -900,6 +903,62 @@ static void check_sec_ref(struct module *mod, const char *modname, - } - } - -+static void get_markers(struct elf_info *info, struct module *mod) -+{ -+ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; -+ const char *strings = (const char *) info->hdr + sh->sh_offset; -+ const Elf_Sym *sym, *first_sym, *last_sym; -+ size_t n; -+ -+ if (!info->markers_strings_sec) -+ return; -+ -+ /* -+ * First count the strings. We look for all the symbols defined -+ * in the __markers_strings section named __mstrtab_*. For -+ * these local names, the compiler puts a random .NNN suffix on, -+ * so the names don't correspond exactly. -+ */ -+ first_sym = last_sym = NULL; -+ n = 0; -+ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ if (first_sym == NULL) -+ first_sym = sym; -+ last_sym = sym; -+ ++n; -+ } -+ -+ if (n == 0) -+ return; -+ -+ /* -+ * Now collect each name and format into a line for the output. -+ * Lines look like: -+ * marker_name vmlinux marker %s format %d -+ * The format string after the second \t can use whitespace. -+ */ -+ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); -+ mod->nmarkers = n; -+ -+ n = 0; -+ for (sym = first_sym; sym <= last_sym; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ const char *name = strings + sym->st_value; -+ const char *fmt = strchr(name, '\0') + 1; -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ mod->markers[n++] = line; -+ } -+} -+ - /** - * Functions used only during module init is marked __init and is stored in - * a .init.text section. Likewise data is marked __initdata and stored in -@@ -1103,6 +1162,8 @@ static void read_symbols(char *modname) - get_src_version(modname, mod->srcversion, - sizeof(mod->srcversion)-1); - -+ get_markers(&info, mod); -+ - parse_elf_finish(&info); - - /* Our trick to get versioning for struct_module - it's -@@ -1436,6 +1497,93 @@ static void write_dump(const char *fname) - write_if_changed(&buf, fname); - } - -+static void add_marker(struct module *mod, const char *name, const char *fmt) -+{ -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ -+ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * -+ sizeof mod->markers[0]))); -+ mod->markers[mod->nmarkers++] = line; -+} -+ -+static void read_markers(const char *fname) -+{ -+ unsigned long size, pos = 0; -+ void *file = grab_file(fname, &size); -+ char *line; -+ -+ if (!file) /* No old markers, silently ignore */ -+ return; -+ -+ while ((line = get_next_line(&pos, file, size))) { -+ char *marker, *modname, *fmt; -+ struct module *mod; -+ -+ marker = line; -+ modname = strchr(marker, '\t'); -+ if (!modname) -+ goto fail; -+ *modname++ = '\0'; -+ fmt = strchr(modname, '\t'); -+ if (!fmt) -+ goto fail; -+ *fmt++ = '\0'; -+ if (*marker == '\0' || *modname == '\0') -+ goto fail; -+ -+ mod = find_module(modname); -+ if (!mod) { -+ if (is_vmlinux(modname)) -+ have_vmlinux = 1; -+ mod = new_module(NOFAIL(strdup(modname))); -+ mod->skip = 1; -+ } -+ -+ add_marker(mod, marker, fmt); -+ } -+ return; -+fail: -+ fatal("parse error in markers list file\n"); -+} -+ -+static int compare_strings(const void *a, const void *b) -+{ -+ return strcmp(*(const char **) a, *(const char **) b); -+} -+ -+static void write_markers(const char *fname) -+{ -+ struct buffer buf = { }; -+ struct module *mod; -+ size_t i; -+ -+ for (mod = modules; mod; mod = mod->next) -+ if ((!external_module || !mod->skip) && mod->markers != NULL) { -+ /* -+ * Sort the strings so we can skip duplicates when -+ * we write them out. -+ */ -+ qsort(mod->markers, mod->nmarkers, -+ sizeof mod->markers[0], &compare_strings); -+ for (i = 0; i < mod->nmarkers; ++i) { -+ char *line = mod->markers[i]; -+ buf_write(&buf, line, strlen(line)); -+ while (i + 1 < mod->nmarkers && -+ !strcmp(mod->markers[i], -+ mod->markers[i + 1])) -+ free(mod->markers[i++]); -+ free(mod->markers[i]); -+ } -+ free(mod->markers); -+ mod->markers = NULL; -+ } -+ -+ write_if_changed(&buf, fname); -+} -+ -+ - int main(int argc, char **argv) - { - struct module *mod; -@@ -1444,8 +1592,10 @@ int main(int argc, char **argv) - char *kernel_read = NULL, *module_read = NULL; - char *dump_write = NULL; - int opt; -+ char *markers_read = NULL; -+ char *markers_write = NULL; - -- while ((opt = getopt(argc, argv, "i:I:mo:a")) != -1) { -+ while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) { - switch(opt) { - case 'i': - kernel_read = optarg; -@@ -1463,6 +1613,12 @@ int main(int argc, char **argv) - case 'a': - all_versions = 1; - break; -+ case 'M': -+ markers_write = optarg; -+ break; -+ case 'K': -+ markers_read = optarg; -+ break; - default: - exit(1); - } -@@ -1502,5 +1658,11 @@ int main(int argc, char **argv) - if (dump_write) - write_dump(dump_write); - -+ if (markers_read) -+ read_markers(markers_read); -+ -+ if (markers_write) -+ write_markers(markers_write); -+ - return 0; - } -diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h -index d398c61..27b05e6 100644 ---- a/scripts/mod/modpost.h -+++ b/scripts/mod/modpost.h -@@ -107,6 +107,8 @@ struct module { - int has_init; - int has_cleanup; - struct buffer dev_table_buf; -+ char **markers; -+ size_t nmarkers; - char srcversion[25]; - }; - -@@ -121,6 +123,7 @@ struct elf_info { - Elf_Section export_gpl_sec; - Elf_Section export_unused_gpl_sec; - Elf_Section export_gpl_future_sec; -+ Elf_Section markers_strings_sec; - const char *strtab; - char *modinfo; - unsigned int modinfo_len; Deleted: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch 2008-07-04 02:18:23 UTC (rev 6) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch 2008-07-04 02:22:00 UTC (rev 7) @@ -1,1917 +0,0 @@ -diff --git a/Makefile b/Makefile -index ce04b1f..f059302 100644 ---- a/Makefile -+++ b/Makefile -@@ -581,7 +581,7 @@ quiet_cmd_vmlinux__ ?= LD $@ - cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ - -T $(vmlinux-lds) $(vmlinux-init) \ - --start-group $(vmlinux-main) --end-group \ -- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) -+ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) - - # Generate new vmlinux version - quiet_cmd_vmlinux_version = GEN .version -@@ -686,10 +686,30 @@ $(KALLSYMS): scripts ; - - endif # ifdef CONFIG_KALLSYMS - -+# Do modpost on a prelinked vmlinux. The finally linked vmlinux has -+# relevant sections renamed as per the linker script. -+quiet_cmd_vmlinux-modpost = LD $@ -+ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ -+ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ -+ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) -+define rule_vmlinux-modpost -+ : -+ +$(call cmd,vmlinux-modpost) -+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ -+ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd -+endef -+ -+ - # vmlinux image - including updated kernel symbols --vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE -+vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE -+ $(call vmlinux-modpost) - $(call if_changed_rule,vmlinux__) - -+# build vmlinux.o first to catch section mismatch errors early -+$(kallsyms.o): vmlinux.o -+vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE -+ $(call if_changed_rule,vmlinux-modpost) -+ - # The actual objects are generated when descending, - # make sure no implicit rule kicks in - $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; -diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug -index cf069b7..d39d5c5 100644 ---- a/arch/i386/Kconfig.debug -+++ b/arch/i386/Kconfig.debug -@@ -29,6 +29,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL -diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S -index d6cc9c0..7bbda2e 100644 ---- a/arch/i386/kernel/vmlinux.lds.S -+++ b/arch/i386/kernel/vmlinux.lds.S -@@ -52,6 +52,7 @@ SECTIONS - /* writeable */ - .data : { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } - -diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug -index 2782b48..17d4a44 100644 ---- a/arch/ia64/Kconfig.debug -+++ b/arch/ia64/Kconfig.debug -@@ -12,6 +12,12 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. - - choice - prompt "Physical memory granularity" -diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S -index a676e79..c6ea47e 100644 ---- a/arch/ia64/kernel/vmlinux.lds.S -+++ b/arch/ia64/kernel/vmlinux.lds.S -@@ -193,7 +193,7 @@ SECTIONS - - data : { } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) -- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } -+ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } - - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) -diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug -index 860472a..5c3a9f5 100644 ---- a/arch/ppc64/Kconfig.debug -+++ b/arch/ppc64/Kconfig.debug -@@ -16,6 +16,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL -diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S -index 4103cc1..76d0bf0 100644 ---- a/arch/ppc64/kernel/vmlinux.lds.S -+++ b/arch/ppc64/kernel/vmlinux.lds.S -@@ -118,6 +118,7 @@ SECTIONS - .data : { - *(.data .data.rel* .toc1) - *(.branch_lt) -+ MARKER - } - - .opd : { -diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug -index cd8d39f..f39bf8d 100644 ---- a/arch/sparc64/Kconfig.debug -+++ b/arch/sparc64/Kconfig.debug -@@ -21,6 +21,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_DCFLUSH - bool "D-cache flush debugging" - depends on DEBUG_KERNEL -diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S -index a710d38..d4b3b62 100644 ---- a/arch/sparc64/kernel/vmlinux.lds.S -+++ b/arch/sparc64/kernel/vmlinux.lds.S -@@ -27,6 +27,7 @@ SECTIONS - .data : - { - *(.data) -+ MARKER - CONSTRUCTORS - } - .data1 : { *(.data1) } -diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug -index cb33186..d0260cb 100644 ---- a/arch/x86_64/Kconfig.debug -+++ b/arch/x86_64/Kconfig.debug -@@ -55,6 +55,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config IOMMU_LEAK - bool "IOMMU leak tracing" - depends on DEBUG_KERNEL -diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S -index f656f19..12ce116 100644 ---- a/arch/x86_64/kernel/vmlinux.lds.S -+++ b/arch/x86_64/kernel/vmlinux.lds.S -@@ -33,6 +33,7 @@ SECTIONS - - .data : { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } - -diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c -index 54fb0a0..cbdcd93 100644 ---- a/drivers/net/bonding/bond_3ad.c -+++ b/drivers/net/bonding/bond_3ad.c -@@ -158,7 +158,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); - - // ================= main 802.3ad protocol functions ================== - static int ad_lacpdu_send(struct port *port); --static int ad_marker_send(struct port *port, struct marker *marker); -+static int ad_marker_send(struct port *port, struct bond_marker *marker); - static void ad_mux_machine(struct port *port); - static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); - static void ad_tx_machine(struct port *port); -@@ -171,8 +171,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); - static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); - static void ad_enable_collecting_distributing(struct port *port); - static void ad_disable_collecting_distributing(struct port *port); --static void ad_marker_info_received(struct marker *marker_info, struct port *port); --static void ad_marker_response_received(struct marker *marker, struct port *port); -+static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); -+static void ad_marker_response_received(struct bond_marker *marker, struct port *port); - - - ///////////////////////////////////////////////////////////////////////////////// -@@ -945,12 +945,13 @@ static int ad_lacpdu_send(struct port *port) - * Returns: 0 on success - * < 0 on error - */ --static int ad_marker_send(struct port *port, struct marker *marker) -+static int ad_marker_send(struct port *port, struct bond_marker *marker) - { - struct slave *slave = port->slave; - struct sk_buff *skb; -- struct marker_header *marker_header; -- int length = sizeof(struct marker_header); -+ struct bond_marker_header *marker_header; -+ int length = sizeof(struct bond_marker_header); -+ - struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; - - skb = dev_alloc_skb(length + 16); -@@ -965,7 +966,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) - skb->nh.raw = skb->data + ETH_HLEN; - skb->protocol = PKT_TYPE_LACPDU; - -- marker_header = (struct marker_header *)skb_put(skb, length); -+ marker_header = (struct bond_marker_header *)skb_put(skb, length); - - marker_header->ad_header.destination_address = lacpdu_multicast_address; - /* Note: source addres is set to be the member's PERMANENT address, because we use it -@@ -1762,7 +1763,7 @@ static void ad_disable_collecting_distributing(struct port *port) - */ - static void ad_marker_info_send(struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - u16 index; - - // fill the marker PDU with the appropriate values -@@ -1795,13 +1796,14 @@ static void ad_marker_info_send(struct port *port) - * @port: the port we're looking at - * - */ --static void ad_marker_info_received(struct marker *marker_info,struct port *port) -+static void ad_marker_info_received(struct bond_marker *marker_info, -+ struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - - // copy the received marker data to the response marker - //marker = *marker_info; -- memcpy(&marker, marker_info, sizeof(struct marker)); -+ memcpy(&marker, marker_info, sizeof(struct bond_marker)); - // change the marker subtype to marker response - marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; - // send the marker response -@@ -1820,7 +1822,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port - * response for marker PDU's, in this stage, but only to respond to marker - * information. - */ --static void ad_marker_response_received(struct marker *marker, struct port *port) -+static void ad_marker_response_received(struct bond_marker *marker, -+ struct port *port) - { - marker=NULL; // just to satisfy the compiler - port=NULL; // just to satisfy the compiler -@@ -2208,15 +2211,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng - case AD_TYPE_MARKER: - // No need to convert fields to Little Endian since we don't use the marker's fields. - -- switch (((struct marker *)lacpdu)->tlv_type) { -+ switch (((struct bond_marker *)lacpdu)->tlv_type) { - case AD_MARKER_INFORMATION_SUBTYPE: - dprintk("Received Marker Information on port %d\n", port->actor_port_number); -- ad_marker_info_received((struct marker *)lacpdu, port); -+ ad_marker_info_received((struct bond_marker *)lacpdu, port); -+ - break; - - case AD_MARKER_RESPONSE_SUBTYPE: - dprintk("Received Marker Response on port %d\n", port->actor_port_number); -- ad_marker_response_received((struct marker *)lacpdu, port); -+ ad_marker_response_received((struct bond_marker *)lacpdu, port); - break; - - default: -diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h -index 4119f0f..7e770ff 100644 ---- a/drivers/net/bonding/bond_3ad.h -+++ b/drivers/net/bonding/bond_3ad.h -@@ -105,7 +105,8 @@ typedef enum { - typedef enum { - AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype - AD_MARKER_RESPONSE_SUBTYPE // marker response subtype --} marker_subtype_t; -+} bond_marker_subtype_t; -+ - - // timers types(43.4.9 in the 802.3ad standard) - typedef enum { -@@ -161,7 +162,7 @@ typedef struct lacpdu_header { - } lacpdu_header_t; - - // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) --typedef struct marker { -+typedef struct bond_marker { - u8 subtype; // = 0x02 (marker PDU) - u8 version_number; // = 0x01 - u8 tlv_type; // = 0x01 (marker information) -@@ -174,12 +175,12 @@ typedef struct marker { - u8 tlv_type_terminator; // = 0x00 - u8 terminator_length; // = 0x00 - u8 reserved_90[90]; // = 0 --} marker_t; -+} bond_marker_t; - --typedef struct marker_header { -+typedef struct bond_marker_header { - struct ad_header ad_header; -- struct marker marker; --} marker_header_t; -+ struct bond_marker marker; -+} bond_marker_header_t; - - #pragma pack() - -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index bb340cb..fd6e57f 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -6,10 +6,18 @@ - #define VMLINUX_SYMBOL(_sym_) _sym_ - #endif - -+/* Kernel markers : pointers */ -+#define MARKER \ -+ . = ALIGN(8); \ -+ VMLINUX_SYMBOL(__start___markers) = .; \ -+ *(__markers) \ -+ VMLINUX_SYMBOL(__stop___markers) = .; -+ - #define RODATA \ - .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ - *(.rodata) *(.rodata.*) \ - *(__vermagic) /* Kernel version magic */ \ -+ *(__markers_strings) /* Markers: strings */ \ - } \ - \ - .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ -diff --git a/include/linux/marker.h b/include/linux/marker.h -new file mode 100644 -index 0000000..efbc82b ---- /dev/null -+++ b/include/linux/marker.h -@@ -0,0 +1,139 @@ -+#ifndef _LINUX_MARKER_H -+#define _LINUX_MARKER_H -+ -+/* -+ * Code markup for dynamic and static tracing. -+ * -+ * See Documentation/marker.txt. -+ * -+ * (C) Copyright 2006 Mathieu Desnoyers -+ * -+ * This file is released under the GPLv2. -+ * See the file COPYING for more details. -+ */ -+ -+#include -+ -+struct module; -+struct marker; -+ -+/** -+ * marker_probe_func - Type of a marker probe function -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @args: variable argument list pointer. Use a pointer to overcome C's -+ * inability to pass this around as a pointer in a portable manner in -+ * the callee otherwise. -+ * -+ * Type of marker probe functions. They receive the mdata and need to parse the -+ * format string to recover the variable argument list. -+ */ -+typedef void marker_probe_func(void *probe_private, void *call_private, -+ const char *fmt, va_list *args); -+ -+struct marker_probe_closure { -+ marker_probe_func *func; /* Callback */ -+ void *probe_private; /* Private probe data */ -+}; -+ -+struct marker { -+ const char *name; /* Marker name */ -+ const char *format; /* Marker format string, describing the -+ * variable argument list. -+ */ -+ char state; /* Marker state. */ -+ char ptype; /* probe type : 0 : single, 1 : multi */ -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+} __attribute__((aligned(8))); -+ -+#ifdef CONFIG_MARKERS -+ -+/* -+ * Note : the empty asm volatile with read constraint is used here instead of a -+ * "used" attribute to fix a gcc 4.1.x bug. -+ * Make sure the alignment of the structure in the __markers section will -+ * not add unwanted padding between the beginning of the section and the -+ * structure. Force alignment to the same alignment as the section start. -+ */ -+#define __trace_mark(name, call_private, format, args...) \ -+ do { \ -+ static const char __mstrtab_##name[] \ -+ __attribute__((section("__markers_strings"))) \ -+ = #name "\0" format; \ -+ static struct marker __mark_##name \ -+ __attribute__((section("__markers"), aligned(8))) = \ -+ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ -+ 0, 0, marker_probe_cb, \ -+ { __mark_empty_function, NULL}, NULL }; \ -+ __mark_check_format(format, ## args); \ -+ if (unlikely(__mark_##name.state)) { \ -+ (*__mark_##name.call) \ -+ (&__mark_##name, call_private, \ -+ format, ## args); \ -+ } \ -+ } while (0) -+ -+extern void marker_update_probe_range(struct marker *begin, -+ struct marker *end); -+#else /* !CONFIG_MARKERS */ -+#define __trace_mark(name, call_private, format, args...) \ -+ __mark_check_format(format, ## args) -+static inline void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ } -+#endif /* CONFIG_MARKERS */ -+ -+/** -+ * trace_mark - Marker -+ * @name: marker name, not quoted. -+ * @format: format string -+ * @args...: variable argument list -+ * -+ * Places a marker. -+ */ -+#define trace_mark(name, format, args...) \ -+ __trace_mark(name, NULL, format, ## args) -+ -+/** -+ * MARK_NOARGS - Format string for a marker with no argument. -+ */ -+#define MARK_NOARGS " " -+ -+/* To be used for string format validity checking with gcc */ -+static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) -+{ -+} -+ -+extern marker_probe_func __mark_empty_function; -+ -+extern void marker_probe_cb(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+extern void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+ -+/* -+ * Connect a probe to a marker. -+ * private data pointer must be a valid allocated memory address, or NULL. -+ */ -+extern int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private); -+ -+/* -+ * Returns the private data given to marker_probe_register. -+ */ -+extern int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private); -+/* -+ * Unregister a marker by providing the registered private data. -+ */ -+extern int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private); -+ -+extern void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num); -+ -+#endif -diff --git a/include/linux/module.h b/include/linux/module.h -index 8da8948..2ad5efd 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -328,6 +329,10 @@ struct module - /* The command line arguments (may be mangled). People like - keeping pointers to this stuff */ - char *args; -+#ifdef CONFIG_MARKERS -+ struct marker *markers; -+ unsigned int num_markers; -+#endif - }; - - /* FIXME: It'd be nice to isolate modules during init, too, so they -@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); - int unregister_module_notifier(struct notifier_block * nb); - - extern void print_modules(void); -+extern void module_update_markers(void); - #else /* !CONFIG_MODULES... */ - #define EXPORT_SYMBOL(sym) - #define EXPORT_SYMBOL_GPL(sym) -@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) - static inline void print_modules(void) - { - } -+ -+static inline void module_update_markers(void) -+{ -+} - #endif /* CONFIG_MODULES */ - - #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) -diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h -index 91057d6..bcd0acb 100644 ---- a/include/linux/rcupdate.h -+++ b/include/linux/rcupdate.h -@@ -42,6 +42,19 @@ - #include - #include - -+/* -+ * Prevent the compiler from merging or refetching accesses. The compiler -+ * is also forbidden from reordering successive instances of ACCESS_ONCE(), -+ * but only when the compiler is aware of some particular ordering. One way -+ * to make the compiler aware of ordering is to put the two invocations of -+ * ACCESS_ONCE() in different C statements. -+ * -+ * This macro does absolutely -nothing- to prevent the CPU from reordering, -+ * merging, or refetching absolutely anything at any time. -+ * -+ */ -+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) -+ - /** - * struct rcu_head - callback structure for use with RCU - * @next: next update requests in a list -@@ -102,6 +115,7 @@ struct rcu_data { - struct rcu_head *donelist; - struct rcu_head **donetail; - int cpu; -+ struct rcu_head barrier; - }; - - DECLARE_PER_CPU(struct rcu_data, rcu_data); -@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, - extern void FASTCALL(call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head))); - extern void synchronize_kernel(void); -- -+extern void rcu_barrier(void); - #endif /* __KERNEL__ */ - #endif /* __LINUX_RCUPDATE_H */ -diff --git a/kernel/Makefile b/kernel/Makefile -index 0b8c8ca..f8248bc 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o - obj-$(CONFIG_AUDITSYSCALL) += auditsc.o - obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o - obj-$(CONFIG_KPROBES) += kprobes.o -+obj-$(CONFIG_MARKERS) += marker.o - - ifneq ($(CONFIG_IA64),y) - # According to Alan Modra , the -fno-omit-frame-pointer is -diff --git a/kernel/marker.c b/kernel/marker.c -new file mode 100644 -index 0000000..c4c2cd8 ---- /dev/null -+++ b/kernel/marker.c -@@ -0,0 +1,851 @@ -+/* -+ * Copyright (C) 2007 Mathieu Desnoyers -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+extern struct marker __start___markers[]; -+extern struct marker __stop___markers[]; -+ -+/* Set to 1 to enable marker debug output */ -+const int marker_debug; -+ -+/* -+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin -+ * and module markers and the hash table. -+ */ -+static DEFINE_MUTEX(markers_mutex); -+ -+/* -+ * Marker hash table, containing the active markers. -+ * Protected by module_mutex. -+ */ -+#define MARKER_HASH_BITS 6 -+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -+ -+/* -+ * Note about RCU : -+ * It is used to make sure every handler has finished using its private data -+ * between two consecutive operation (add or remove) on a given marker. It is -+ * also used to delay the free of multiple probes array until a quiescent state -+ * is reached. -+ * marker entries modifications are protected by the markers_mutex. -+ */ -+struct marker_entry { -+ struct hlist_node hlist; -+ char *format; -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+ int refcount; /* Number of times armed. 0 if disarmed. */ -+ struct rcu_head rcu; -+ void *oldptr; -+ char rcu_pending:1; -+ char ptype:1; -+ char name[0]; /* Contains name'\0'format'\0' */ -+}; -+ -+static struct hlist_head marker_table[MARKER_TABLE_SIZE]; -+ -+/** -+ * __mark_empty_function - Empty probe callback -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @...: variable argument list -+ * -+ * Empty callback provided as a probe to the markers. By providing this to a -+ * disabled marker, we make sure the execution flow is always valid even -+ * though the function pointer change and the marker enabling are two distinct -+ * operations that modifies the execution flow of preemptible code. -+ */ -+void __mark_empty_function(void *probe_private, void *call_private, -+ const char *fmt, va_list *args) -+{ -+} -+EXPORT_SYMBOL_GPL(__mark_empty_function); -+ -+/* -+ * marker_probe_cb Callback that prepares the variable argument list for probes. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we -+ * need to put a full smp_rmb() in this branch. This is why we do not use -+ * rcu_dereference() for the pointer read. -+ */ -+void marker_probe_cb(const struct marker *mdata, void *call_private, -+ const char *fmt, ...) -+{ -+ va_list args; -+ char ptype; -+ -+ /* -+ * disabling preemption to make sure the teardown of the callbacks can -+ * be done correctly when they are in modules and they insure RCU read -+ * coherency. -+ */ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ va_start(args, fmt); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ va_end(args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) { -+ va_start(args, fmt); -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ va_end(args); -+ } -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb); -+ -+/* -+ * marker_probe_cb Callback that does not prepare the variable argument list. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Should be connected to markers "MARK_NOARGS". -+ */ -+void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...) -+{ -+ va_list args; /* not initialized */ -+ char ptype; -+ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); -+ -+static void free_old_closure(struct rcu_head *head) -+{ -+ struct marker_entry *entry = container_of(head, -+ struct marker_entry, rcu); -+ kfree(entry->oldptr); -+ /* Make sure we free the data before setting the pending flag to 0 */ -+ smp_wmb(); -+ entry->rcu_pending = 0; -+} -+ -+static void debug_print_probes(struct marker_entry *entry) -+{ -+ int i; -+ -+ if (!marker_debug) -+ return; -+ -+ if (!entry->ptype) { -+ printk(KERN_DEBUG "Single probe : %p %p\n", -+ entry->single.func, -+ entry->single.probe_private); -+ } else { -+ for (i = 0; entry->multi[i].func; i++) -+ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, -+ entry->multi[i].func, -+ entry->multi[i].probe_private); -+ } -+} -+ -+static struct marker_probe_closure * -+marker_entry_add_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0; -+ struct marker_probe_closure *old, *new; -+ -+ WARN_ON(!probe); -+ -+ debug_print_probes(entry); -+ old = entry->multi; -+ if (!entry->ptype) { -+ if (entry->single.func == probe && -+ entry->single.probe_private == probe_private) -+ return ERR_PTR(-EBUSY); -+ if (entry->single.func == __mark_empty_function) { -+ /* 0 -> 1 probes */ -+ entry->single.func = probe; -+ entry->single.probe_private = probe_private; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* 1 -> 2 probes */ -+ nr_probes = 1; -+ old = NULL; -+ } -+ } else { -+ /* (N -> N+1), (N != 0, 1) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) -+ if (old[nr_probes].func == probe -+ && old[nr_probes].probe_private -+ == probe_private) -+ return ERR_PTR(-EBUSY); -+ } -+ /* + 2 : one for new probe, one for NULL func */ -+ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), -+ GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ if (!old) -+ new[0] = entry->single; -+ else -+ memcpy(new, old, -+ nr_probes * sizeof(struct marker_probe_closure)); -+ new[nr_probes].func = probe; -+ new[nr_probes].probe_private = probe_private; -+ entry->refcount = nr_probes + 1; -+ entry->multi = new; -+ entry->ptype = 1; -+ debug_print_probes(entry); -+ return old; -+} -+ -+static struct marker_probe_closure * -+marker_entry_remove_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0, nr_del = 0, i; -+ struct marker_probe_closure *old, *new; -+ -+ old = entry->multi; -+ -+ debug_print_probes(entry); -+ if (!entry->ptype) { -+ /* 0 -> N is an error */ -+ WARN_ON(entry->single.func == __mark_empty_function); -+ /* 1 -> 0 probes */ -+ WARN_ON(probe && entry->single.func != probe); -+ WARN_ON(entry->single.probe_private != probe_private); -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* (N -> M), (N > 1, M >= 0) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { -+ if ((!probe || old[nr_probes].func == probe) -+ && old[nr_probes].probe_private -+ == probe_private) -+ nr_del++; -+ } -+ } -+ -+ if (nr_probes - nr_del == 0) { -+ /* N -> 0, (N > 1) */ -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ } else if (nr_probes - nr_del == 1) { -+ /* N -> 1, (N > 1) */ -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ entry->single = old[i]; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ } else { -+ int j = 0; -+ /* N -> M, (N > 1, M > 1) */ -+ /* + 1 for NULL */ -+ new = kzalloc((nr_probes - nr_del + 1) -+ * sizeof(struct marker_probe_closure), GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ new[j++] = old[i]; -+ entry->refcount = nr_probes - nr_del; -+ entry->ptype = 1; -+ entry->multi = new; -+ } -+ debug_print_probes(entry); -+ return old; -+} -+ -+/* -+ * Get marker if the marker is present in the marker hash table. -+ * Must be called with markers_mutex held. -+ * Returns NULL if not present. -+ */ -+static struct marker_entry *get_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ u32 hash = jhash(name, strlen(name), 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) -+ return e; -+ } -+ return NULL; -+} -+ -+/* -+ * Add the marker to the marker hash table. Must be called with markers_mutex -+ * held. -+ */ -+static struct marker_entry *add_marker(const char *name, const char *format) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ size_t format_len = 0; -+ u32 hash = jhash(name, name_len-1, 0); -+ -+ if (format) -+ format_len = strlen(format) + 1; -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ printk(KERN_NOTICE -+ "Marker %s busy\n", name); -+ return ERR_PTR(-EBUSY); /* Already there */ -+ } -+ } -+ /* -+ * Using kmalloc here to allocate a variable length element. Could -+ * cause some memory fragmentation if overused. -+ */ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return ERR_PTR(-ENOMEM); -+ memcpy(&e->name[0], name, name_len); -+ if (format) { -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ } else { -+ e->format = NULL; -+ e->call = marker_probe_cb; -+ } -+ e->single.func = __mark_empty_function; -+ e->single.probe_private = NULL; -+ e->multi = NULL; -+ e->ptype = 0; -+ e->refcount = 0; -+ e->rcu_pending = 0; -+ hlist_add_head(&e->hlist, head); -+ return e; -+} -+ -+/* -+ * Remove the marker from the marker hash table. Must be called with mutex_lock -+ * held. -+ */ -+static int remove_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ int found = 0; -+ size_t len = strlen(name) + 1; -+ u32 hash = jhash(name, len-1, 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ found = 1; -+ break; -+ } -+ } -+ if (!found) -+ return -ENOENT; -+ if (e->single.func != __mark_empty_function) -+ return -EBUSY; -+ hlist_del(&e->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if (e->rcu_pending) -+ rcu_barrier(); -+ kfree(e); -+ return 0; -+} -+ -+/* -+ * Set the mark_entry format to the format found in the element. -+ */ -+static int marker_set_format(struct marker_entry **entry, const char *format) -+{ -+ struct marker_entry *e; -+ size_t name_len = strlen((*entry)->name) + 1; -+ size_t format_len = strlen(format) + 1; -+ -+ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return -ENOMEM; -+ memcpy(&e->name[0], (*entry)->name, name_len); -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ e->single = (*entry)->single; -+ e->multi = (*entry)->multi; -+ e->ptype = (*entry)->ptype; -+ e->refcount = (*entry)->refcount; -+ e->rcu_pending = 0; -+ hlist_add_before(&e->hlist, &(*entry)->hlist); -+ hlist_del(&(*entry)->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if ((*entry)->rcu_pending) -+ rcu_barrier(); -+ kfree(*entry); -+ *entry = e; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ return 0; -+} -+ -+/* -+ * Sets the probe callback corresponding to one marker. -+ */ -+static int set_marker(struct marker_entry **entry, struct marker *elem, -+ int active) -+{ -+ int ret; -+ WARN_ON(strcmp((*entry)->name, elem->name) != 0); -+ -+ if ((*entry)->format) { -+ if (strcmp((*entry)->format, elem->format) != 0) { -+ printk(KERN_NOTICE -+ "Format mismatch for probe %s " -+ "(%s), marker (%s)\n", -+ (*entry)->name, -+ (*entry)->format, -+ elem->format); -+ return -EPERM; -+ } -+ } else { -+ ret = marker_set_format(entry, elem->format); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * probe_cb setup (statically known) is done here. It is -+ * asynchronous with the rest of execution, therefore we only -+ * pass from a "safe" callback (with argument) to an "unsafe" -+ * callback (does not set arguments). -+ */ -+ elem->call = (*entry)->call; -+ /* -+ * Sanity check : -+ * We only update the single probe private data when the ptr is -+ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) -+ */ -+ WARN_ON(elem->single.func != __mark_empty_function -+ && elem->single.probe_private -+ != (*entry)->single.probe_private && -+ !elem->ptype); -+ elem->single.probe_private = (*entry)->single.probe_private; -+ /* -+ * Make sure the private data is valid when we update the -+ * single probe ptr. -+ */ -+ smp_wmb(); -+ elem->single.func = (*entry)->single.func; -+ /* -+ * We also make sure that the new probe callbacks array is consistent -+ * before setting a pointer to it. -+ */ -+ rcu_assign_pointer(elem->multi, (*entry)->multi); -+ /* -+ * Update the function or multi probe array pointer before setting the -+ * ptype. -+ */ -+ smp_wmb(); -+ elem->ptype = (*entry)->ptype; -+ elem->state = active; -+ -+ return 0; -+} -+ -+/* -+ * Disable a marker and its probe callback. -+ * Note: only after a synchronize_sched() issued after setting elem->call to the -+ * empty function insures that the original callback is not used anymore. This -+ * insured by preemption disabling around the call site. -+ */ -+static void disable_marker(struct marker *elem) -+{ -+ /* leave "call" as is. It is known statically. */ -+ elem->state = 0; -+ elem->single.func = __mark_empty_function; -+ /* Update the function before setting the ptype */ -+ smp_wmb(); -+ elem->ptype = 0; /* single probe */ -+ /* -+ * Leave the private data and id there, because removal is racy and -+ * should be done only after a synchronize_sched(). These are never used -+ * until the next initialization anyway. -+ */ -+} -+ -+/** -+ * marker_update_probe_range - Update a probe range -+ * @begin: beginning of the range -+ * @end: end of the range -+ * -+ * Updates the probe callback corresponding to a range of markers. -+ */ -+void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ -+ struct marker *iter; -+ struct marker_entry *mark_entry; -+ -+ mutex_lock(&markers_mutex); -+ for (iter = begin; iter < end; iter++) { -+ mark_entry = get_marker(iter->name); -+ if (mark_entry) { -+ set_marker(&mark_entry, iter, -+ !!mark_entry->refcount); -+ /* -+ * ignore error, continue -+ */ -+ } else { -+ disable_marker(iter); -+ } -+ } -+ mutex_unlock(&markers_mutex); -+} -+ -+/* -+ * Update probes, removing the faulty probes. -+ * Issues a synchronize_sched() when no reference to the module passed -+ * as parameter is found in the probes so the probe module can be -+ * safely unloaded from now on. -+ * -+ * Internal callback only changed before the first probe is connected to it. -+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 -+ * transitions. All other transitions will leave the old private data valid. -+ * This makes the non-atomicity of the callback/private data updates valid. -+ * -+ * "special case" updates : -+ * 0 -> 1 callback -+ * 1 -> 0 callback -+ * 1 -> 2 callbacks -+ * 2 -> 1 callbacks -+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. -+ * Site effect : marker_set_format may delete the marker entry (creating a -+ * replacement). -+ */ -+static void marker_update_probes(void) -+{ -+ /* Core kernel markers */ -+ marker_update_probe_range(__start___markers, __stop___markers); -+ /* Markers in modules. */ -+ module_update_markers(); -+} -+ -+/** -+ * marker_probe_register - Connect a probe to a marker -+ * @name: marker name -+ * @format: format string -+ * @probe: probe handler -+ * @probe_private: probe private data -+ * -+ * private data must be a valid allocated memory address, or NULL. -+ * Returns 0 if ok, error value on error. -+ * The probe address must at least be aligned on the architecture pointer size. -+ */ -+int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ entry = add_marker(name, format); -+ if (IS_ERR(entry)) { -+ ret = PTR_ERR(entry); -+ goto end; -+ } -+ } -+ /* -+ * If we detect that a call_rcu is pending for this marker, -+ * make sure it's executed now. -+ */ -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_add_probe(entry, probe, probe_private); -+ if (IS_ERR(old)) { -+ ret = PTR_ERR(old); -+ goto end; -+ } -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_register); -+ -+/** -+ * marker_probe_unregister - Disconnect a probe from a marker -+ * @name: marker name -+ * @probe: probe function pointer -+ * @probe_private: probe private data -+ * -+ * Returns the private data given to marker_probe_register, or an ERR_PTR(). -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ struct marker_probe_closure *old; -+ int ret = 0; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, probe, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister); -+ -+static struct marker_entry * -+get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ unsigned int i; -+ struct hlist_head *head; -+ struct hlist_node *node; -+ -+ for (i = 0; i < MARKER_TABLE_SIZE; i++) { -+ head = &marker_table[i]; -+ hlist_for_each_entry(entry, node, head, hlist) { -+ if (!entry->ptype) { -+ if (entry->single.func == probe -+ && entry->single.probe_private -+ == probe_private) -+ return entry; -+ } else { -+ struct marker_probe_closure *closure; -+ closure = entry->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func == probe && -+ closure[i].probe_private -+ == probe_private) -+ return entry; -+ } -+ } -+ } -+ } -+ return NULL; -+} -+ -+/** -+ * marker_probe_unregister_private_data - Disconnect a probe from a marker -+ * @probe: probe function -+ * @probe_private: probe private data -+ * -+ * Unregister a probe by providing the registered private data. -+ * Only removes the first marker found in hash table. -+ * Return 0 on success or error value. -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, NULL, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(entry->name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); -+ -+/** -+ * marker_get_private_data - Get a marker's probe private data -+ * @name: marker name -+ * @probe: probe to match -+ * @num: get the nth matching probe's private data -+ * -+ * Returns the nth private data pointer (starting from 0) matching, or an -+ * ERR_PTR. -+ * Returns the private data pointer, or an ERR_PTR. -+ * The private data pointer should _only_ be dereferenced if the caller is the -+ * owner of the data, or its content could vanish. This is mostly used to -+ * confirm that a caller is the owner of a registered probe. -+ */ -+void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ u32 hash = jhash(name, name_len-1, 0); -+ int i; -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ if (!e->ptype) { -+ if (num == 0 && e->single.func == probe) -+ return e->single.probe_private; -+ else -+ break; -+ } else { -+ struct marker_probe_closure *closure; -+ int match = 0; -+ closure = e->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func != probe) -+ continue; -+ if (match++ == num) -+ return closure[i].probe_private; -+ } -+ } -+ } -+ } -+ return ERR_PTR(-ENOENT); -+} -+EXPORT_SYMBOL_GPL(marker_get_private_data); -diff --git a/kernel/module.c b/kernel/module.c -index 18b39bc..096c3dc 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -1504,6 +1504,8 @@ static struct module *load_module(void __user *umod, - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ - struct exception_table_entry *extable; - int gpgsig_ok; -+ unsigned int markersindex; -+ unsigned int markersstringsindex; - - DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", - umod, len, uargs); -@@ -1734,6 +1736,9 @@ static struct module *load_module(void __user *umod, - tainted |= TAINT_FORCED_MODULE; - } - #endif -+ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); -+ markersstringsindex = find_sec(hdr, sechdrs, secstrings, -+ "__markers_strings"); - - /* Now do relocations. */ - for (i = 1; i < hdr->e_shnum; i++) { -@@ -1757,6 +1762,12 @@ static struct module *load_module(void __user *umod, - goto cleanup; - } - -+#ifdef CONFIG_MARKERS -+ mod->markers = (void *)sechdrs[markersindex].sh_addr; -+ mod->num_markers = -+ sechdrs[markersindex].sh_size / sizeof(*mod->markers); -+#endif -+ - /* Set up and sort exception table */ - mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); - mod->extable = extable = (void *)sechdrs[exindex].sh_addr; -@@ -1768,6 +1779,12 @@ static struct module *load_module(void __user *umod, - - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); - -+#ifdef CONFIG_MARKERS -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+#endif -+ - err = module_finalize(hdr, sechdrs, mod); - if (err < 0) - goto cleanup; -@@ -2175,6 +2192,20 @@ void struct_module(struct module *mod) { return; } - EXPORT_SYMBOL(struct_module); - #endif - -+#ifdef CONFIG_MARKERS -+void module_update_markers(void) -+{ -+ struct module *mod; -+ -+ down(&module_mutex); -+ list_for_each_entry(mod, &modules, list) -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+ up(&module_mutex); -+} -+#endif -+ - static int __init modules_init(void) - { - return subsystem_register(&module_subsys); -diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c -index 1b16bfc..20ade31 100644 ---- a/kernel/rcupdate.c -+++ b/kernel/rcupdate.c -@@ -46,6 +46,7 @@ - #include - #include - #include -+#include - - /* Definition for rcupdate control block. */ - struct rcu_ctrlblk rcu_ctrlblk = -@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, - local_irq_restore(flags); - } - -+static atomic_t rcu_barrier_cpu_count; -+static DEFINE_MUTEX(rcu_barrier_mutex); -+static struct completion rcu_barrier_completion; -+ - /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. -@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, - return NOTIFY_OK; - } - -+static void rcu_barrier_callback(struct rcu_head *notused) -+{ -+ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) -+ complete(&rcu_barrier_completion); -+} -+ -+/* -+ * Called with preemption disabled, and from cross-cpu IRQ context. -+ */ -+static void rcu_barrier_func(void *notused) -+{ -+ int cpu = smp_processor_id(); -+ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); -+ struct rcu_head *head; -+ -+ head = &rdp->barrier; -+ atomic_inc(&rcu_barrier_cpu_count); -+ call_rcu(head, rcu_barrier_callback); -+} -+ -+/** -+ * rcu_barrier - Wait until all the in-flight RCUs are complete. -+ */ -+void rcu_barrier(void) -+{ -+ BUG_ON(in_interrupt()); -+ /* Take cpucontrol mutex to protect against CPU hotplug */ -+ mutex_lock(&rcu_barrier_mutex); -+ init_completion(&rcu_barrier_completion); -+ atomic_set(&rcu_barrier_cpu_count, 0); -+ on_each_cpu(rcu_barrier_func, NULL, 0, 1); -+ wait_for_completion(&rcu_barrier_completion); -+ mutex_unlock(&rcu_barrier_mutex); -+} -+EXPORT_SYMBOL_GPL(rcu_barrier); -+ -+ -+ - static struct notifier_block __devinitdata rcu_nb = { - .notifier_call = rcu_cpu_notify, - }; -diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost -index b3d31b5..b100a32 100644 ---- a/scripts/Makefile.modpost -+++ b/scripts/Makefile.modpost -@@ -13,6 +13,7 @@ - # 2) modpost is then used to - # 3) create one .mod.c file pr. module - # 4) create one Module.symvers file with CRC for all exported symbols -+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers - # 5) compile all .mod.c files - # 6) final link of the module to a file - -@@ -40,6 +41,11 @@ include scripts/Makefile.lib - - symverfile := $(objtree)/Module.symvers - -+kernelmarkersfile := $(objtree)/Module.markers -+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers -+ -+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) -+ - # Step 1), find all modules listed in $(MODVERDIR)/ - __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) - modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) -@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST - cmd_modpost = scripts/mod/modpost \ - $(if $(CONFIG_MODVERSIONS),-m) \ - $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ -+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ -+ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ - $(filter-out FORCE,$^) - - .PHONY: __modpost - __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE - $(call cmd,modpost) - -+quiet_cmd_kernel-mod = MODPOST $@ -+ cmd_kernel-mod = $(cmd_modpost) $@ -+ -+vmlinux.o: FORCE -+ $(call cmd,kernel-mod) -+ - # Declare generated files as targets for modpost - $(symverfile): __modpost ; - $(modules:.ko=.mod.c): __modpost ; - -+ifdef CONFIG_MARKERS -+$(markersfile): __modpost ; -+endif - - # Step 5), compile all *.mod.c files - -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index 2a174e5..c25948c 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -10,7 +10,8 @@ - * - * Usage: modpost vmlinux module1.o module2.o ... - */ -- -+#define _GNU_SOURCE -+#include - #include - #include "modpost.h" - -@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) - if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { - info->modinfo = (void *)hdr + sechdrs[i].sh_offset; - info->modinfo_len = sechdrs[i].sh_size; -- } -+ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) -+ info->markers_strings_sec = i; -+ - if (sechdrs[i].sh_type != SHT_SYMTAB) - continue; - -@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) - return strcmp(myname, "vmlinux") == 0; - } - -+static void get_markers(struct elf_info *info, struct module *mod) -+{ -+ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; -+ const char *strings = (const char *) info->hdr + sh->sh_offset; -+ const Elf_Sym *sym, *first_sym, *last_sym; -+ size_t n; -+ -+ -+ if (!info->markers_strings_sec) -+ return; -+ -+ /* -+ * First count the strings. We look for all the symbols defined -+ * in the __markers_strings section named __mstrtab_*. For -+ * these local names, the compiler puts a random .NNN suffix on, -+ * so the names don't correspond exactly. -+ */ -+ first_sym = last_sym = NULL; -+ n = 0; -+ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ if (first_sym == NULL) -+ first_sym = sym; -+ last_sym = sym; -+ ++n; -+ } -+ -+ if (n == 0) -+ return; -+ /* -+ * Now collect each name and format into a line for the output. -+ * Lines look like: -+ * marker_name vmlinux marker %s format %d -+ * The format string after the second \t can use whitespace. -+ */ -+ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); -+ mod->nmarkers = n; -+ -+ n = 0; -+ for (sym = first_sym; sym <= last_sym; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ const char *name = strings + sym->st_value; -+ const char *fmt = strchr(name, '\0') + 1; -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ mod->markers[n++] = line; -+ } -+} -+ -+ - void - read_symbols(char *modname) - { -@@ -426,6 +486,7 @@ read_symbols(char *modname) - } - maybe_frob_version(modname, info.modinfo, info.modinfo_len, - (void *)info.modinfo - (void *)info.hdr); -+ get_markers(&info, mod); - parse_elf_finish(&info); - - /* Our trick to get versioning for struct_module - it's -@@ -682,6 +743,92 @@ write_dump(const char *fname) - write_if_changed(&buf, fname); - } - -+static void add_marker(struct module *mod, const char *name, const char *fmt) -+{ -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ -+ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * -+ sizeof mod->markers[0]))); -+ mod->markers[mod->nmarkers++] = line; -+} -+ -+static void read_markers(const char *fname) -+{ -+ unsigned long size, pos = 0; -+ void *file = grab_file(fname, &size); -+ char *line; -+ -+ if (!file) /* No old markers, silently ignore */ -+ return; -+ -+ while ((line = get_next_line(&pos, file, size))) { -+ char *marker, *modname, *fmt; -+ struct module *mod; -+ -+ marker = line; -+ modname = strchr(marker, '\t'); -+ if (!modname) -+ goto fail; -+ *modname++ = '\0'; -+ fmt = strchr(modname, '\t'); -+ if (!fmt) -+ goto fail; -+ *fmt++ = '\0'; -+ if (*marker == '\0' || *modname == '\0') -+ goto fail; -+ -+ mod = find_module(modname); -+ if (!mod) { -+ if (is_vmlinux(modname)) -+ have_vmlinux = 1; -+ mod = new_module(NOFAIL(strdup(modname))); -+ mod->skip = 1; -+ } -+ -+ add_marker(mod, marker, fmt); -+ } -+ return; -+fail: -+ fatal("parse error in markers list file\n"); -+} -+ -+static int compare_strings(const void *a, const void *b) -+{ -+ return strcmp(*(const char **) a, *(const char **) b); -+} -+ -+static void write_markers(const char *fname) -+{ -+ struct buffer buf = { }; -+ struct module *mod; -+ size_t i; -+ -+ for (mod = modules; mod; mod = mod->next) -+ if (mod->markers != NULL) { -+ /* -+ * Sort the strings so we can skip duplicates when -+ * we write them out. -+ */ -+ qsort(mod->markers, mod->nmarkers, -+ sizeof mod->markers[0], &compare_strings); -+ for (i = 0; i < mod->nmarkers; ++i) { -+ char *line = mod->markers[i]; -+ buf_write(&buf, line, strlen(line)); -+ while (i + 1 < mod->nmarkers && -+ !strcmp(mod->markers[i], -+ mod->markers[i + 1])) -+ free(mod->markers[i++]); -+ free(mod->markers[i]); -+ } -+ free(mod->markers); -+ mod->markers = NULL; -+ } -+ -+ write_if_changed(&buf, fname); -+} -+ - int - main(int argc, char **argv) - { -@@ -690,8 +837,10 @@ main(int argc, char **argv) - char fname[SZ]; - char *dump_read = NULL, *dump_write = NULL; - int opt; -+ char *markers_read = NULL; -+ char *markers_write = NULL; - -- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { -+ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { - switch(opt) { - case 'i': - dump_read = optarg; -@@ -702,6 +851,12 @@ main(int argc, char **argv) - case 'o': - dump_write = optarg; - break; -+ case 'M': -+ markers_write = optarg; -+ break; -+ case 'K': -+ markers_read = optarg; -+ break; - default: - exit(1); - } -@@ -732,6 +887,12 @@ main(int argc, char **argv) - if (dump_write) - write_dump(dump_write); - -+ if (markers_read) -+ read_markers(markers_read); -+ -+ if (markers_write) -+ write_markers(markers_write); -+ - return 0; - } - -diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h -index 4871343..d79d7ea 100644 ---- a/scripts/mod/modpost.h -+++ b/scripts/mod/modpost.h -@@ -18,6 +18,7 @@ - #define Elf_Sym Elf32_Sym - #define ELF_ST_BIND ELF32_ST_BIND - #define ELF_ST_TYPE ELF32_ST_TYPE -+#define Elf_Section Elf32_Half - - #else - -@@ -26,7 +27,7 @@ - #define Elf_Sym Elf64_Sym - #define ELF_ST_BIND ELF64_ST_BIND - #define ELF_ST_TYPE ELF64_ST_TYPE -- -+#define Elf_Section Elf64_Half - #endif - - #if KERNEL_ELFDATA != HOST_ELFDATA -@@ -77,6 +78,8 @@ struct module { - int has_init; - int has_cleanup; - struct buffer dev_table_buf; -+ char **markers; -+ size_t nmarkers; - }; - - struct elf_info { -@@ -85,6 +88,7 @@ struct elf_info { - Elf_Shdr *sechdrs; - Elf_Sym *symtab_start; - Elf_Sym *symtab_stop; -+ Elf_Section markers_strings_sec; - const char *strtab; - char *modinfo; - unsigned int modinfo_len; Deleted: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch 2008-07-04 02:18:23 UTC (rev 6) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch 2008-07-04 02:22:00 UTC (rev 7) @@ -1,1917 +0,0 @@ -diff --git a/Makefile b/Makefile -index 5bfc101..d495f16 100644 ---- a/Makefile -+++ b/Makefile -@@ -583,7 +583,7 @@ quiet_cmd_vmlinux__ ?= LD $@ - cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ - -T $(vmlinux-lds) $(vmlinux-init) \ - --start-group $(vmlinux-main) --end-group \ -- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) -+ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) - - # Generate new vmlinux version - quiet_cmd_vmlinux_version = GEN .version -@@ -688,10 +688,30 @@ $(KALLSYMS): scripts ; - - endif # ifdef CONFIG_KALLSYMS - -+# Do modpost on a prelinked vmlinux. The finally linked vmlinux has -+# relevant sections renamed as per the linker script. -+quiet_cmd_vmlinux-modpost = LD $@ -+ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ -+ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ -+ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) -+define rule_vmlinux-modpost -+ : -+ +$(call cmd,vmlinux-modpost) -+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ -+ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd -+endef -+ -+ - # vmlinux image - including updated kernel symbols --vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE -+vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE -+ $(call vmlinux-modpost) - $(call if_changed_rule,vmlinux__) - -+# build vmlinux.o first to catch section mismatch errors early -+$(kallsyms.o): vmlinux.o -+vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE -+ $(call if_changed_rule,vmlinux-modpost) -+ - # The actual objects are generated when descending, - # make sure no implicit rule kicks in - $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; -diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug -index cf069b7..d39d5c5 100644 ---- a/arch/i386/Kconfig.debug -+++ b/arch/i386/Kconfig.debug -@@ -29,6 +29,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL -diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S -index e8db99c..b846b21 100644 ---- a/arch/i386/kernel/vmlinux.lds.S -+++ b/arch/i386/kernel/vmlinux.lds.S -@@ -53,6 +53,7 @@ SECTIONS - /* writeable */ - .data : { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } - -diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug -index 2782b48..17d4a44 100644 ---- a/arch/ia64/Kconfig.debug -+++ b/arch/ia64/Kconfig.debug -@@ -12,6 +12,12 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. - - choice - prompt "Physical memory granularity" -diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S -index a676e79..c6ea47e 100644 ---- a/arch/ia64/kernel/vmlinux.lds.S -+++ b/arch/ia64/kernel/vmlinux.lds.S -@@ -193,7 +193,7 @@ SECTIONS - - data : { } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) -- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } -+ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } - - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) -diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug -index 860472a..5c3a9f5 100644 ---- a/arch/ppc64/Kconfig.debug -+++ b/arch/ppc64/Kconfig.debug -@@ -16,6 +16,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL -diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S -index 4103cc1..76d0bf0 100644 ---- a/arch/ppc64/kernel/vmlinux.lds.S -+++ b/arch/ppc64/kernel/vmlinux.lds.S -@@ -118,6 +118,7 @@ SECTIONS - .data : { - *(.data .data.rel* .toc1) - *(.branch_lt) -+ MARKER - } - - .opd : { -diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug -index cd8d39f..f39bf8d 100644 ---- a/arch/sparc64/Kconfig.debug -+++ b/arch/sparc64/Kconfig.debug -@@ -21,6 +21,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_DCFLUSH - bool "D-cache flush debugging" - depends on DEBUG_KERNEL -diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S -index a710d38..d4b3b62 100644 ---- a/arch/sparc64/kernel/vmlinux.lds.S -+++ b/arch/sparc64/kernel/vmlinux.lds.S -@@ -27,6 +27,7 @@ SECTIONS - .data : - { - *(.data) -+ MARKER - CONSTRUCTORS - } - .data1 : { *(.data1) } -diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug -index cb33186..d0260cb 100644 ---- a/arch/x86_64/Kconfig.debug -+++ b/arch/x86_64/Kconfig.debug -@@ -55,6 +55,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config IOMMU_LEAK - bool "IOMMU leak tracing" - depends on DEBUG_KERNEL -diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S -index 053c826..3b3928f 100644 ---- a/arch/x86_64/kernel/vmlinux.lds.S -+++ b/arch/x86_64/kernel/vmlinux.lds.S -@@ -33,6 +33,7 @@ SECTIONS - - .data : { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } - -diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c -index 181f3d2..c7c5dc6 100644 ---- a/drivers/net/bonding/bond_3ad.c -+++ b/drivers/net/bonding/bond_3ad.c -@@ -158,7 +158,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); - - // ================= main 802.3ad protocol functions ================== - static int ad_lacpdu_send(struct port *port); --static int ad_marker_send(struct port *port, struct marker *marker); -+static int ad_marker_send(struct port *port, struct bond_marker *marker); - static void ad_mux_machine(struct port *port); - static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); - static void ad_tx_machine(struct port *port); -@@ -171,8 +171,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); - static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); - static void ad_enable_collecting_distributing(struct port *port); - static void ad_disable_collecting_distributing(struct port *port); --static void ad_marker_info_received(struct marker *marker_info, struct port *port); --static void ad_marker_response_received(struct marker *marker, struct port *port); -+static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); -+static void ad_marker_response_received(struct bond_marker *marker, struct port *port); - - - ///////////////////////////////////////////////////////////////////////////////// -@@ -945,12 +945,13 @@ static int ad_lacpdu_send(struct port *port) - * Returns: 0 on success - * < 0 on error - */ --static int ad_marker_send(struct port *port, struct marker *marker) -+static int ad_marker_send(struct port *port, struct bond_marker *marker) - { - struct slave *slave = port->slave; - struct sk_buff *skb; -- struct marker_header *marker_header; -- int length = sizeof(struct marker_header); -+ struct bond_marker_header *marker_header; -+ int length = sizeof(struct bond_marker_header); -+ - struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; - - skb = dev_alloc_skb(length + 16); -@@ -965,7 +966,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) - skb->nh.raw = skb->data + ETH_HLEN; - skb->protocol = PKT_TYPE_LACPDU; - -- marker_header = (struct marker_header *)skb_put(skb, length); -+ marker_header = (struct bond_marker_header *)skb_put(skb, length); - - marker_header->ad_header.destination_address = lacpdu_multicast_address; - /* Note: source addres is set to be the member's PERMANENT address, because we use it -@@ -1762,7 +1763,7 @@ static void ad_disable_collecting_distributing(struct port *port) - */ - static void ad_marker_info_send(struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - u16 index; - - // fill the marker PDU with the appropriate values -@@ -1795,13 +1796,14 @@ static void ad_marker_info_send(struct port *port) - * @port: the port we're looking at - * - */ --static void ad_marker_info_received(struct marker *marker_info,struct port *port) -+static void ad_marker_info_received(struct bond_marker *marker_info, -+ struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - - // copy the received marker data to the response marker - //marker = *marker_info; -- memcpy(&marker, marker_info, sizeof(struct marker)); -+ memcpy(&marker, marker_info, sizeof(struct bond_marker)); - // change the marker subtype to marker response - marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; - // send the marker response -@@ -1820,7 +1822,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port - * response for marker PDU's, in this stage, but only to respond to marker - * information. - */ --static void ad_marker_response_received(struct marker *marker, struct port *port) -+static void ad_marker_response_received(struct bond_marker *marker, -+ struct port *port) - { - marker=NULL; // just to satisfy the compiler - port=NULL; // just to satisfy the compiler -@@ -2208,15 +2211,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng - case AD_TYPE_MARKER: - // No need to convert fields to Little Endian since we don't use the marker's fields. - -- switch (((struct marker *)lacpdu)->tlv_type) { -+ switch (((struct bond_marker *)lacpdu)->tlv_type) { - case AD_MARKER_INFORMATION_SUBTYPE: - dprintk("Received Marker Information on port %d\n", port->actor_port_number); -- ad_marker_info_received((struct marker *)lacpdu, port); -+ ad_marker_info_received((struct bond_marker *)lacpdu, port); -+ - break; - - case AD_MARKER_RESPONSE_SUBTYPE: - dprintk("Received Marker Response on port %d\n", port->actor_port_number); -- ad_marker_response_received((struct marker *)lacpdu, port); -+ ad_marker_response_received((struct bond_marker *)lacpdu, port); - break; - - default: -diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h -index 4c60b17..2bb477e 100644 ---- a/drivers/net/bonding/bond_3ad.h -+++ b/drivers/net/bonding/bond_3ad.h -@@ -105,7 +105,8 @@ typedef enum { - typedef enum { - AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype - AD_MARKER_RESPONSE_SUBTYPE // marker response subtype --} marker_subtype_t; -+} bond_marker_subtype_t; -+ - - // timers types(43.4.9 in the 802.3ad standard) - typedef enum { -@@ -161,7 +162,7 @@ typedef struct lacpdu_header { - } lacpdu_header_t; - - // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) --typedef struct marker { -+typedef struct bond_marker { - u8 subtype; // = 0x02 (marker PDU) - u8 version_number; // = 0x01 - u8 tlv_type; // = 0x01 (marker information) -@@ -174,12 +175,12 @@ typedef struct marker { - u8 tlv_type_terminator; // = 0x00 - u8 terminator_length; // = 0x00 - u8 reserved_90[90]; // = 0 --} marker_t; -+} bond_marker_t; - --typedef struct marker_header { -+typedef struct bond_marker_header { - struct ad_header ad_header; -- struct marker marker; --} marker_header_t; -+ struct bond_marker marker; -+} bond_marker_header_t; - - #pragma pack() - -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index bb340cb..fd6e57f 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -6,10 +6,18 @@ - #define VMLINUX_SYMBOL(_sym_) _sym_ - #endif - -+/* Kernel markers : pointers */ -+#define MARKER \ -+ . = ALIGN(8); \ -+ VMLINUX_SYMBOL(__start___markers) = .; \ -+ *(__markers) \ -+ VMLINUX_SYMBOL(__stop___markers) = .; -+ - #define RODATA \ - .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ - *(.rodata) *(.rodata.*) \ - *(__vermagic) /* Kernel version magic */ \ -+ *(__markers_strings) /* Markers: strings */ \ - } \ - \ - .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ -diff --git a/include/linux/marker.h b/include/linux/marker.h -new file mode 100644 -index 0000000..efbc82b ---- /dev/null -+++ b/include/linux/marker.h -@@ -0,0 +1,139 @@ -+#ifndef _LINUX_MARKER_H -+#define _LINUX_MARKER_H -+ -+/* -+ * Code markup for dynamic and static tracing. -+ * -+ * See Documentation/marker.txt. -+ * -+ * (C) Copyright 2006 Mathieu Desnoyers -+ * -+ * This file is released under the GPLv2. -+ * See the file COPYING for more details. -+ */ -+ -+#include -+ -+struct module; -+struct marker; -+ -+/** -+ * marker_probe_func - Type of a marker probe function -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @args: variable argument list pointer. Use a pointer to overcome C's -+ * inability to pass this around as a pointer in a portable manner in -+ * the callee otherwise. -+ * -+ * Type of marker probe functions. They receive the mdata and need to parse the -+ * format string to recover the variable argument list. -+ */ -+typedef void marker_probe_func(void *probe_private, void *call_private, -+ const char *fmt, va_list *args); -+ -+struct marker_probe_closure { -+ marker_probe_func *func; /* Callback */ -+ void *probe_private; /* Private probe data */ -+}; -+ -+struct marker { -+ const char *name; /* Marker name */ -+ const char *format; /* Marker format string, describing the -+ * variable argument list. -+ */ -+ char state; /* Marker state. */ -+ char ptype; /* probe type : 0 : single, 1 : multi */ -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+} __attribute__((aligned(8))); -+ -+#ifdef CONFIG_MARKERS -+ -+/* -+ * Note : the empty asm volatile with read constraint is used here instead of a -+ * "used" attribute to fix a gcc 4.1.x bug. -+ * Make sure the alignment of the structure in the __markers section will -+ * not add unwanted padding between the beginning of the section and the -+ * structure. Force alignment to the same alignment as the section start. -+ */ -+#define __trace_mark(name, call_private, format, args...) \ -+ do { \ -+ static const char __mstrtab_##name[] \ -+ __attribute__((section("__markers_strings"))) \ -+ = #name "\0" format; \ -+ static struct marker __mark_##name \ -+ __attribute__((section("__markers"), aligned(8))) = \ -+ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ -+ 0, 0, marker_probe_cb, \ -+ { __mark_empty_function, NULL}, NULL }; \ -+ __mark_check_format(format, ## args); \ -+ if (unlikely(__mark_##name.state)) { \ -+ (*__mark_##name.call) \ -+ (&__mark_##name, call_private, \ -+ format, ## args); \ -+ } \ -+ } while (0) -+ -+extern void marker_update_probe_range(struct marker *begin, -+ struct marker *end); -+#else /* !CONFIG_MARKERS */ -+#define __trace_mark(name, call_private, format, args...) \ -+ __mark_check_format(format, ## args) -+static inline void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ } -+#endif /* CONFIG_MARKERS */ -+ -+/** -+ * trace_mark - Marker -+ * @name: marker name, not quoted. -+ * @format: format string -+ * @args...: variable argument list -+ * -+ * Places a marker. -+ */ -+#define trace_mark(name, format, args...) \ -+ __trace_mark(name, NULL, format, ## args) -+ -+/** -+ * MARK_NOARGS - Format string for a marker with no argument. -+ */ -+#define MARK_NOARGS " " -+ -+/* To be used for string format validity checking with gcc */ -+static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) -+{ -+} -+ -+extern marker_probe_func __mark_empty_function; -+ -+extern void marker_probe_cb(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+extern void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+ -+/* -+ * Connect a probe to a marker. -+ * private data pointer must be a valid allocated memory address, or NULL. -+ */ -+extern int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private); -+ -+/* -+ * Returns the private data given to marker_probe_register. -+ */ -+extern int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private); -+/* -+ * Unregister a marker by providing the registered private data. -+ */ -+extern int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private); -+ -+extern void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num); -+ -+#endif -diff --git a/include/linux/module.h b/include/linux/module.h -index 8da8948..2ad5efd 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -328,6 +329,10 @@ struct module - /* The command line arguments (may be mangled). People like - keeping pointers to this stuff */ - char *args; -+#ifdef CONFIG_MARKERS -+ struct marker *markers; -+ unsigned int num_markers; -+#endif - }; - - /* FIXME: It'd be nice to isolate modules during init, too, so they -@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); - int unregister_module_notifier(struct notifier_block * nb); - - extern void print_modules(void); -+extern void module_update_markers(void); - #else /* !CONFIG_MODULES... */ - #define EXPORT_SYMBOL(sym) - #define EXPORT_SYMBOL_GPL(sym) -@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) - static inline void print_modules(void) - { - } -+ -+static inline void module_update_markers(void) -+{ -+} - #endif /* CONFIG_MODULES */ - - #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) -diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h -index 91057d6..bcd0acb 100644 ---- a/include/linux/rcupdate.h -+++ b/include/linux/rcupdate.h -@@ -42,6 +42,19 @@ - #include - #include - -+/* -+ * Prevent the compiler from merging or refetching accesses. The compiler -+ * is also forbidden from reordering successive instances of ACCESS_ONCE(), -+ * but only when the compiler is aware of some particular ordering. One way -+ * to make the compiler aware of ordering is to put the two invocations of -+ * ACCESS_ONCE() in different C statements. -+ * -+ * This macro does absolutely -nothing- to prevent the CPU from reordering, -+ * merging, or refetching absolutely anything at any time. -+ * -+ */ -+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) -+ - /** - * struct rcu_head - callback structure for use with RCU - * @next: next update requests in a list -@@ -102,6 +115,7 @@ struct rcu_data { - struct rcu_head *donelist; - struct rcu_head **donetail; - int cpu; -+ struct rcu_head barrier; - }; - - DECLARE_PER_CPU(struct rcu_data, rcu_data); -@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, - extern void FASTCALL(call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head))); - extern void synchronize_kernel(void); -- -+extern void rcu_barrier(void); - #endif /* __KERNEL__ */ - #endif /* __LINUX_RCUPDATE_H */ -diff --git a/kernel/Makefile b/kernel/Makefile -index 0b8c8ca..f8248bc 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o - obj-$(CONFIG_AUDITSYSCALL) += auditsc.o - obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o - obj-$(CONFIG_KPROBES) += kprobes.o -+obj-$(CONFIG_MARKERS) += marker.o - - ifneq ($(CONFIG_IA64),y) - # According to Alan Modra , the -fno-omit-frame-pointer is -diff --git a/kernel/marker.c b/kernel/marker.c -new file mode 100644 -index 0000000..c4c2cd8 ---- /dev/null -+++ b/kernel/marker.c -@@ -0,0 +1,851 @@ -+/* -+ * Copyright (C) 2007 Mathieu Desnoyers -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+extern struct marker __start___markers[]; -+extern struct marker __stop___markers[]; -+ -+/* Set to 1 to enable marker debug output */ -+const int marker_debug; -+ -+/* -+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin -+ * and module markers and the hash table. -+ */ -+static DEFINE_MUTEX(markers_mutex); -+ -+/* -+ * Marker hash table, containing the active markers. -+ * Protected by module_mutex. -+ */ -+#define MARKER_HASH_BITS 6 -+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -+ -+/* -+ * Note about RCU : -+ * It is used to make sure every handler has finished using its private data -+ * between two consecutive operation (add or remove) on a given marker. It is -+ * also used to delay the free of multiple probes array until a quiescent state -+ * is reached. -+ * marker entries modifications are protected by the markers_mutex. -+ */ -+struct marker_entry { -+ struct hlist_node hlist; -+ char *format; -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+ int refcount; /* Number of times armed. 0 if disarmed. */ -+ struct rcu_head rcu; -+ void *oldptr; -+ char rcu_pending:1; -+ char ptype:1; -+ char name[0]; /* Contains name'\0'format'\0' */ -+}; -+ -+static struct hlist_head marker_table[MARKER_TABLE_SIZE]; -+ -+/** -+ * __mark_empty_function - Empty probe callback -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @...: variable argument list -+ * -+ * Empty callback provided as a probe to the markers. By providing this to a -+ * disabled marker, we make sure the execution flow is always valid even -+ * though the function pointer change and the marker enabling are two distinct -+ * operations that modifies the execution flow of preemptible code. -+ */ -+void __mark_empty_function(void *probe_private, void *call_private, -+ const char *fmt, va_list *args) -+{ -+} -+EXPORT_SYMBOL_GPL(__mark_empty_function); -+ -+/* -+ * marker_probe_cb Callback that prepares the variable argument list for probes. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we -+ * need to put a full smp_rmb() in this branch. This is why we do not use -+ * rcu_dereference() for the pointer read. -+ */ -+void marker_probe_cb(const struct marker *mdata, void *call_private, -+ const char *fmt, ...) -+{ -+ va_list args; -+ char ptype; -+ -+ /* -+ * disabling preemption to make sure the teardown of the callbacks can -+ * be done correctly when they are in modules and they insure RCU read -+ * coherency. -+ */ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ va_start(args, fmt); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ va_end(args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) { -+ va_start(args, fmt); -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ va_end(args); -+ } -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb); -+ -+/* -+ * marker_probe_cb Callback that does not prepare the variable argument list. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Should be connected to markers "MARK_NOARGS". -+ */ -+void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...) -+{ -+ va_list args; /* not initialized */ -+ char ptype; -+ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); -+ -+static void free_old_closure(struct rcu_head *head) -+{ -+ struct marker_entry *entry = container_of(head, -+ struct marker_entry, rcu); -+ kfree(entry->oldptr); -+ /* Make sure we free the data before setting the pending flag to 0 */ -+ smp_wmb(); -+ entry->rcu_pending = 0; -+} -+ -+static void debug_print_probes(struct marker_entry *entry) -+{ -+ int i; -+ -+ if (!marker_debug) -+ return; -+ -+ if (!entry->ptype) { -+ printk(KERN_DEBUG "Single probe : %p %p\n", -+ entry->single.func, -+ entry->single.probe_private); -+ } else { -+ for (i = 0; entry->multi[i].func; i++) -+ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, -+ entry->multi[i].func, -+ entry->multi[i].probe_private); -+ } -+} -+ -+static struct marker_probe_closure * -+marker_entry_add_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0; -+ struct marker_probe_closure *old, *new; -+ -+ WARN_ON(!probe); -+ -+ debug_print_probes(entry); -+ old = entry->multi; -+ if (!entry->ptype) { -+ if (entry->single.func == probe && -+ entry->single.probe_private == probe_private) -+ return ERR_PTR(-EBUSY); -+ if (entry->single.func == __mark_empty_function) { -+ /* 0 -> 1 probes */ -+ entry->single.func = probe; -+ entry->single.probe_private = probe_private; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* 1 -> 2 probes */ -+ nr_probes = 1; -+ old = NULL; -+ } -+ } else { -+ /* (N -> N+1), (N != 0, 1) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) -+ if (old[nr_probes].func == probe -+ && old[nr_probes].probe_private -+ == probe_private) -+ return ERR_PTR(-EBUSY); -+ } -+ /* + 2 : one for new probe, one for NULL func */ -+ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), -+ GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ if (!old) -+ new[0] = entry->single; -+ else -+ memcpy(new, old, -+ nr_probes * sizeof(struct marker_probe_closure)); -+ new[nr_probes].func = probe; -+ new[nr_probes].probe_private = probe_private; -+ entry->refcount = nr_probes + 1; -+ entry->multi = new; -+ entry->ptype = 1; -+ debug_print_probes(entry); -+ return old; -+} -+ -+static struct marker_probe_closure * -+marker_entry_remove_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0, nr_del = 0, i; -+ struct marker_probe_closure *old, *new; -+ -+ old = entry->multi; -+ -+ debug_print_probes(entry); -+ if (!entry->ptype) { -+ /* 0 -> N is an error */ -+ WARN_ON(entry->single.func == __mark_empty_function); -+ /* 1 -> 0 probes */ -+ WARN_ON(probe && entry->single.func != probe); -+ WARN_ON(entry->single.probe_private != probe_private); -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* (N -> M), (N > 1, M >= 0) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { -+ if ((!probe || old[nr_probes].func == probe) -+ && old[nr_probes].probe_private -+ == probe_private) -+ nr_del++; -+ } -+ } -+ -+ if (nr_probes - nr_del == 0) { -+ /* N -> 0, (N > 1) */ -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ } else if (nr_probes - nr_del == 1) { -+ /* N -> 1, (N > 1) */ -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ entry->single = old[i]; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ } else { -+ int j = 0; -+ /* N -> M, (N > 1, M > 1) */ -+ /* + 1 for NULL */ -+ new = kzalloc((nr_probes - nr_del + 1) -+ * sizeof(struct marker_probe_closure), GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ new[j++] = old[i]; -+ entry->refcount = nr_probes - nr_del; -+ entry->ptype = 1; -+ entry->multi = new; -+ } -+ debug_print_probes(entry); -+ return old; -+} -+ -+/* -+ * Get marker if the marker is present in the marker hash table. -+ * Must be called with markers_mutex held. -+ * Returns NULL if not present. -+ */ -+static struct marker_entry *get_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ u32 hash = jhash(name, strlen(name), 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) -+ return e; -+ } -+ return NULL; -+} -+ -+/* -+ * Add the marker to the marker hash table. Must be called with markers_mutex -+ * held. -+ */ -+static struct marker_entry *add_marker(const char *name, const char *format) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ size_t format_len = 0; -+ u32 hash = jhash(name, name_len-1, 0); -+ -+ if (format) -+ format_len = strlen(format) + 1; -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ printk(KERN_NOTICE -+ "Marker %s busy\n", name); -+ return ERR_PTR(-EBUSY); /* Already there */ -+ } -+ } -+ /* -+ * Using kmalloc here to allocate a variable length element. Could -+ * cause some memory fragmentation if overused. -+ */ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return ERR_PTR(-ENOMEM); -+ memcpy(&e->name[0], name, name_len); -+ if (format) { -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ } else { -+ e->format = NULL; -+ e->call = marker_probe_cb; -+ } -+ e->single.func = __mark_empty_function; -+ e->single.probe_private = NULL; -+ e->multi = NULL; -+ e->ptype = 0; -+ e->refcount = 0; -+ e->rcu_pending = 0; -+ hlist_add_head(&e->hlist, head); -+ return e; -+} -+ -+/* -+ * Remove the marker from the marker hash table. Must be called with mutex_lock -+ * held. -+ */ -+static int remove_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ int found = 0; -+ size_t len = strlen(name) + 1; -+ u32 hash = jhash(name, len-1, 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ found = 1; -+ break; -+ } -+ } -+ if (!found) -+ return -ENOENT; -+ if (e->single.func != __mark_empty_function) -+ return -EBUSY; -+ hlist_del(&e->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if (e->rcu_pending) -+ rcu_barrier(); -+ kfree(e); -+ return 0; -+} -+ -+/* -+ * Set the mark_entry format to the format found in the element. -+ */ -+static int marker_set_format(struct marker_entry **entry, const char *format) -+{ -+ struct marker_entry *e; -+ size_t name_len = strlen((*entry)->name) + 1; -+ size_t format_len = strlen(format) + 1; -+ -+ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return -ENOMEM; -+ memcpy(&e->name[0], (*entry)->name, name_len); -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ e->single = (*entry)->single; -+ e->multi = (*entry)->multi; -+ e->ptype = (*entry)->ptype; -+ e->refcount = (*entry)->refcount; -+ e->rcu_pending = 0; -+ hlist_add_before(&e->hlist, &(*entry)->hlist); -+ hlist_del(&(*entry)->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if ((*entry)->rcu_pending) -+ rcu_barrier(); -+ kfree(*entry); -+ *entry = e; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ return 0; -+} -+ -+/* -+ * Sets the probe callback corresponding to one marker. -+ */ -+static int set_marker(struct marker_entry **entry, struct marker *elem, -+ int active) -+{ -+ int ret; -+ WARN_ON(strcmp((*entry)->name, elem->name) != 0); -+ -+ if ((*entry)->format) { -+ if (strcmp((*entry)->format, elem->format) != 0) { -+ printk(KERN_NOTICE -+ "Format mismatch for probe %s " -+ "(%s), marker (%s)\n", -+ (*entry)->name, -+ (*entry)->format, -+ elem->format); -+ return -EPERM; -+ } -+ } else { -+ ret = marker_set_format(entry, elem->format); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * probe_cb setup (statically known) is done here. It is -+ * asynchronous with the rest of execution, therefore we only -+ * pass from a "safe" callback (with argument) to an "unsafe" -+ * callback (does not set arguments). -+ */ -+ elem->call = (*entry)->call; -+ /* -+ * Sanity check : -+ * We only update the single probe private data when the ptr is -+ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) -+ */ -+ WARN_ON(elem->single.func != __mark_empty_function -+ && elem->single.probe_private -+ != (*entry)->single.probe_private && -+ !elem->ptype); -+ elem->single.probe_private = (*entry)->single.probe_private; -+ /* -+ * Make sure the private data is valid when we update the -+ * single probe ptr. -+ */ -+ smp_wmb(); -+ elem->single.func = (*entry)->single.func; -+ /* -+ * We also make sure that the new probe callbacks array is consistent -+ * before setting a pointer to it. -+ */ -+ rcu_assign_pointer(elem->multi, (*entry)->multi); -+ /* -+ * Update the function or multi probe array pointer before setting the -+ * ptype. -+ */ -+ smp_wmb(); -+ elem->ptype = (*entry)->ptype; -+ elem->state = active; -+ -+ return 0; -+} -+ -+/* -+ * Disable a marker and its probe callback. -+ * Note: only after a synchronize_sched() issued after setting elem->call to the -+ * empty function insures that the original callback is not used anymore. This -+ * insured by preemption disabling around the call site. -+ */ -+static void disable_marker(struct marker *elem) -+{ -+ /* leave "call" as is. It is known statically. */ -+ elem->state = 0; -+ elem->single.func = __mark_empty_function; -+ /* Update the function before setting the ptype */ -+ smp_wmb(); -+ elem->ptype = 0; /* single probe */ -+ /* -+ * Leave the private data and id there, because removal is racy and -+ * should be done only after a synchronize_sched(). These are never used -+ * until the next initialization anyway. -+ */ -+} -+ -+/** -+ * marker_update_probe_range - Update a probe range -+ * @begin: beginning of the range -+ * @end: end of the range -+ * -+ * Updates the probe callback corresponding to a range of markers. -+ */ -+void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ -+ struct marker *iter; -+ struct marker_entry *mark_entry; -+ -+ mutex_lock(&markers_mutex); -+ for (iter = begin; iter < end; iter++) { -+ mark_entry = get_marker(iter->name); -+ if (mark_entry) { -+ set_marker(&mark_entry, iter, -+ !!mark_entry->refcount); -+ /* -+ * ignore error, continue -+ */ -+ } else { -+ disable_marker(iter); -+ } -+ } -+ mutex_unlock(&markers_mutex); -+} -+ -+/* -+ * Update probes, removing the faulty probes. -+ * Issues a synchronize_sched() when no reference to the module passed -+ * as parameter is found in the probes so the probe module can be -+ * safely unloaded from now on. -+ * -+ * Internal callback only changed before the first probe is connected to it. -+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 -+ * transitions. All other transitions will leave the old private data valid. -+ * This makes the non-atomicity of the callback/private data updates valid. -+ * -+ * "special case" updates : -+ * 0 -> 1 callback -+ * 1 -> 0 callback -+ * 1 -> 2 callbacks -+ * 2 -> 1 callbacks -+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. -+ * Site effect : marker_set_format may delete the marker entry (creating a -+ * replacement). -+ */ -+static void marker_update_probes(void) -+{ -+ /* Core kernel markers */ -+ marker_update_probe_range(__start___markers, __stop___markers); -+ /* Markers in modules. */ -+ module_update_markers(); -+} -+ -+/** -+ * marker_probe_register - Connect a probe to a marker -+ * @name: marker name -+ * @format: format string -+ * @probe: probe handler -+ * @probe_private: probe private data -+ * -+ * private data must be a valid allocated memory address, or NULL. -+ * Returns 0 if ok, error value on error. -+ * The probe address must at least be aligned on the architecture pointer size. -+ */ -+int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ entry = add_marker(name, format); -+ if (IS_ERR(entry)) { -+ ret = PTR_ERR(entry); -+ goto end; -+ } -+ } -+ /* -+ * If we detect that a call_rcu is pending for this marker, -+ * make sure it's executed now. -+ */ -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_add_probe(entry, probe, probe_private); -+ if (IS_ERR(old)) { -+ ret = PTR_ERR(old); -+ goto end; -+ } -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_register); -+ -+/** -+ * marker_probe_unregister - Disconnect a probe from a marker -+ * @name: marker name -+ * @probe: probe function pointer -+ * @probe_private: probe private data -+ * -+ * Returns the private data given to marker_probe_register, or an ERR_PTR(). -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ struct marker_probe_closure *old; -+ int ret = 0; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, probe, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister); -+ -+static struct marker_entry * -+get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ unsigned int i; -+ struct hlist_head *head; -+ struct hlist_node *node; -+ -+ for (i = 0; i < MARKER_TABLE_SIZE; i++) { -+ head = &marker_table[i]; -+ hlist_for_each_entry(entry, node, head, hlist) { -+ if (!entry->ptype) { -+ if (entry->single.func == probe -+ && entry->single.probe_private -+ == probe_private) -+ return entry; -+ } else { -+ struct marker_probe_closure *closure; -+ closure = entry->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func == probe && -+ closure[i].probe_private -+ == probe_private) -+ return entry; -+ } -+ } -+ } -+ } -+ return NULL; -+} -+ -+/** -+ * marker_probe_unregister_private_data - Disconnect a probe from a marker -+ * @probe: probe function -+ * @probe_private: probe private data -+ * -+ * Unregister a probe by providing the registered private data. -+ * Only removes the first marker found in hash table. -+ * Return 0 on success or error value. -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, NULL, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(entry->name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); -+ -+/** -+ * marker_get_private_data - Get a marker's probe private data -+ * @name: marker name -+ * @probe: probe to match -+ * @num: get the nth matching probe's private data -+ * -+ * Returns the nth private data pointer (starting from 0) matching, or an -+ * ERR_PTR. -+ * Returns the private data pointer, or an ERR_PTR. -+ * The private data pointer should _only_ be dereferenced if the caller is the -+ * owner of the data, or its content could vanish. This is mostly used to -+ * confirm that a caller is the owner of a registered probe. -+ */ -+void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ u32 hash = jhash(name, name_len-1, 0); -+ int i; -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ if (!e->ptype) { -+ if (num == 0 && e->single.func == probe) -+ return e->single.probe_private; -+ else -+ break; -+ } else { -+ struct marker_probe_closure *closure; -+ int match = 0; -+ closure = e->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func != probe) -+ continue; -+ if (match++ == num) -+ return closure[i].probe_private; -+ } -+ } -+ } -+ } -+ return ERR_PTR(-ENOENT); -+} -+EXPORT_SYMBOL_GPL(marker_get_private_data); -diff --git a/kernel/module.c b/kernel/module.c -index 7f0ccd8..1cd4c54 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -1505,6 +1505,8 @@ static struct module *load_module(void __user *umod, - struct exception_table_entry *extable; - mm_segment_t old_fs; - int gpgsig_ok; -+ unsigned int markersindex; -+ unsigned int markersstringsindex; - - DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", - umod, len, uargs); -@@ -1735,6 +1737,9 @@ static struct module *load_module(void __user *umod, - tainted |= TAINT_FORCED_MODULE; - } - #endif -+ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); -+ markersstringsindex = find_sec(hdr, sechdrs, secstrings, -+ "__markers_strings"); - - /* Now do relocations. */ - for (i = 1; i < hdr->e_shnum; i++) { -@@ -1758,6 +1763,12 @@ static struct module *load_module(void __user *umod, - goto cleanup; - } - -+#ifdef CONFIG_MARKERS -+ mod->markers = (void *)sechdrs[markersindex].sh_addr; -+ mod->num_markers = -+ sechdrs[markersindex].sh_size / sizeof(*mod->markers); -+#endif -+ - /* Set up and sort exception table */ - mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); - mod->extable = extable = (void *)sechdrs[exindex].sh_addr; -@@ -1769,6 +1780,12 @@ static struct module *load_module(void __user *umod, - - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); - -+#ifdef CONFIG_MARKERS -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+#endif -+ - err = module_finalize(hdr, sechdrs, mod); - if (err < 0) - goto cleanup; -@@ -2186,6 +2203,20 @@ void struct_module(struct module *mod) { return; } - EXPORT_SYMBOL(struct_module); - #endif - -+#ifdef CONFIG_MARKERS -+void module_update_markers(void) -+{ -+ struct module *mod; -+ -+ down(&module_mutex); -+ list_for_each_entry(mod, &modules, list) -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+ up(&module_mutex); -+} -+#endif -+ - static int __init modules_init(void) - { - return subsystem_register(&module_subsys); -diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c -index 1b16bfc..20ade31 100644 ---- a/kernel/rcupdate.c -+++ b/kernel/rcupdate.c -@@ -46,6 +46,7 @@ - #include - #include - #include -+#include - - /* Definition for rcupdate control block. */ - struct rcu_ctrlblk rcu_ctrlblk = -@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, - local_irq_restore(flags); - } - -+static atomic_t rcu_barrier_cpu_count; -+static DEFINE_MUTEX(rcu_barrier_mutex); -+static struct completion rcu_barrier_completion; -+ - /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. -@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, - return NOTIFY_OK; - } - -+static void rcu_barrier_callback(struct rcu_head *notused) -+{ -+ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) -+ complete(&rcu_barrier_completion); -+} -+ -+/* -+ * Called with preemption disabled, and from cross-cpu IRQ context. -+ */ -+static void rcu_barrier_func(void *notused) -+{ -+ int cpu = smp_processor_id(); -+ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); -+ struct rcu_head *head; -+ -+ head = &rdp->barrier; -+ atomic_inc(&rcu_barrier_cpu_count); -+ call_rcu(head, rcu_barrier_callback); -+} -+ -+/** -+ * rcu_barrier - Wait until all the in-flight RCUs are complete. -+ */ -+void rcu_barrier(void) -+{ -+ BUG_ON(in_interrupt()); -+ /* Take cpucontrol mutex to protect against CPU hotplug */ -+ mutex_lock(&rcu_barrier_mutex); -+ init_completion(&rcu_barrier_completion); -+ atomic_set(&rcu_barrier_cpu_count, 0); -+ on_each_cpu(rcu_barrier_func, NULL, 0, 1); -+ wait_for_completion(&rcu_barrier_completion); -+ mutex_unlock(&rcu_barrier_mutex); -+} -+EXPORT_SYMBOL_GPL(rcu_barrier); -+ -+ -+ - static struct notifier_block __devinitdata rcu_nb = { - .notifier_call = rcu_cpu_notify, - }; -diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost -index b3d31b5..b100a32 100644 ---- a/scripts/Makefile.modpost -+++ b/scripts/Makefile.modpost -@@ -13,6 +13,7 @@ - # 2) modpost is then used to - # 3) create one .mod.c file pr. module - # 4) create one Module.symvers file with CRC for all exported symbols -+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers - # 5) compile all .mod.c files - # 6) final link of the module to a file - -@@ -40,6 +41,11 @@ include scripts/Makefile.lib - - symverfile := $(objtree)/Module.symvers - -+kernelmarkersfile := $(objtree)/Module.markers -+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers -+ -+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) -+ - # Step 1), find all modules listed in $(MODVERDIR)/ - __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) - modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) -@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST - cmd_modpost = scripts/mod/modpost \ - $(if $(CONFIG_MODVERSIONS),-m) \ - $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ -+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ -+ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ - $(filter-out FORCE,$^) - - .PHONY: __modpost - __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE - $(call cmd,modpost) - -+quiet_cmd_kernel-mod = MODPOST $@ -+ cmd_kernel-mod = $(cmd_modpost) $@ -+ -+vmlinux.o: FORCE -+ $(call cmd,kernel-mod) -+ - # Declare generated files as targets for modpost - $(symverfile): __modpost ; - $(modules:.ko=.mod.c): __modpost ; - -+ifdef CONFIG_MARKERS -+$(markersfile): __modpost ; -+endif - - # Step 5), compile all *.mod.c files - -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index 2a174e5..c25948c 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -10,7 +10,8 @@ - * - * Usage: modpost vmlinux module1.o module2.o ... - */ -- -+#define _GNU_SOURCE -+#include - #include - #include "modpost.h" - -@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) - if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { - info->modinfo = (void *)hdr + sechdrs[i].sh_offset; - info->modinfo_len = sechdrs[i].sh_size; -- } -+ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) -+ info->markers_strings_sec = i; -+ - if (sechdrs[i].sh_type != SHT_SYMTAB) - continue; - -@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) - return strcmp(myname, "vmlinux") == 0; - } - -+static void get_markers(struct elf_info *info, struct module *mod) -+{ -+ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; -+ const char *strings = (const char *) info->hdr + sh->sh_offset; -+ const Elf_Sym *sym, *first_sym, *last_sym; -+ size_t n; -+ -+ -+ if (!info->markers_strings_sec) -+ return; -+ -+ /* -+ * First count the strings. We look for all the symbols defined -+ * in the __markers_strings section named __mstrtab_*. For -+ * these local names, the compiler puts a random .NNN suffix on, -+ * so the names don't correspond exactly. -+ */ -+ first_sym = last_sym = NULL; -+ n = 0; -+ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ if (first_sym == NULL) -+ first_sym = sym; -+ last_sym = sym; -+ ++n; -+ } -+ -+ if (n == 0) -+ return; -+ /* -+ * Now collect each name and format into a line for the output. -+ * Lines look like: -+ * marker_name vmlinux marker %s format %d -+ * The format string after the second \t can use whitespace. -+ */ -+ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); -+ mod->nmarkers = n; -+ -+ n = 0; -+ for (sym = first_sym; sym <= last_sym; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ const char *name = strings + sym->st_value; -+ const char *fmt = strchr(name, '\0') + 1; -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ mod->markers[n++] = line; -+ } -+} -+ -+ - void - read_symbols(char *modname) - { -@@ -426,6 +486,7 @@ read_symbols(char *modname) - } - maybe_frob_version(modname, info.modinfo, info.modinfo_len, - (void *)info.modinfo - (void *)info.hdr); -+ get_markers(&info, mod); - parse_elf_finish(&info); - - /* Our trick to get versioning for struct_module - it's -@@ -682,6 +743,92 @@ write_dump(const char *fname) - write_if_changed(&buf, fname); - } - -+static void add_marker(struct module *mod, const char *name, const char *fmt) -+{ -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ -+ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * -+ sizeof mod->markers[0]))); -+ mod->markers[mod->nmarkers++] = line; -+} -+ -+static void read_markers(const char *fname) -+{ -+ unsigned long size, pos = 0; -+ void *file = grab_file(fname, &size); -+ char *line; -+ -+ if (!file) /* No old markers, silently ignore */ -+ return; -+ -+ while ((line = get_next_line(&pos, file, size))) { -+ char *marker, *modname, *fmt; -+ struct module *mod; -+ -+ marker = line; -+ modname = strchr(marker, '\t'); -+ if (!modname) -+ goto fail; -+ *modname++ = '\0'; -+ fmt = strchr(modname, '\t'); -+ if (!fmt) -+ goto fail; -+ *fmt++ = '\0'; -+ if (*marker == '\0' || *modname == '\0') -+ goto fail; -+ -+ mod = find_module(modname); -+ if (!mod) { -+ if (is_vmlinux(modname)) -+ have_vmlinux = 1; -+ mod = new_module(NOFAIL(strdup(modname))); -+ mod->skip = 1; -+ } -+ -+ add_marker(mod, marker, fmt); -+ } -+ return; -+fail: -+ fatal("parse error in markers list file\n"); -+} -+ -+static int compare_strings(const void *a, const void *b) -+{ -+ return strcmp(*(const char **) a, *(const char **) b); -+} -+ -+static void write_markers(const char *fname) -+{ -+ struct buffer buf = { }; -+ struct module *mod; -+ size_t i; -+ -+ for (mod = modules; mod; mod = mod->next) -+ if (mod->markers != NULL) { -+ /* -+ * Sort the strings so we can skip duplicates when -+ * we write them out. -+ */ -+ qsort(mod->markers, mod->nmarkers, -+ sizeof mod->markers[0], &compare_strings); -+ for (i = 0; i < mod->nmarkers; ++i) { -+ char *line = mod->markers[i]; -+ buf_write(&buf, line, strlen(line)); -+ while (i + 1 < mod->nmarkers && -+ !strcmp(mod->markers[i], -+ mod->markers[i + 1])) -+ free(mod->markers[i++]); -+ free(mod->markers[i]); -+ } -+ free(mod->markers); -+ mod->markers = NULL; -+ } -+ -+ write_if_changed(&buf, fname); -+} -+ - int - main(int argc, char **argv) - { -@@ -690,8 +837,10 @@ main(int argc, char **argv) - char fname[SZ]; - char *dump_read = NULL, *dump_write = NULL; - int opt; -+ char *markers_read = NULL; -+ char *markers_write = NULL; - -- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { -+ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { - switch(opt) { - case 'i': - dump_read = optarg; -@@ -702,6 +851,12 @@ main(int argc, char **argv) - case 'o': - dump_write = optarg; - break; -+ case 'M': -+ markers_write = optarg; -+ break; -+ case 'K': -+ markers_read = optarg; -+ break; - default: - exit(1); - } -@@ -732,6 +887,12 @@ main(int argc, char **argv) - if (dump_write) - write_dump(dump_write); - -+ if (markers_read) -+ read_markers(markers_read); -+ -+ if (markers_write) -+ write_markers(markers_write); -+ - return 0; - } - -diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h -index 4871343..d79d7ea 100644 ---- a/scripts/mod/modpost.h -+++ b/scripts/mod/modpost.h -@@ -18,6 +18,7 @@ - #define Elf_Sym Elf32_Sym - #define ELF_ST_BIND ELF32_ST_BIND - #define ELF_ST_TYPE ELF32_ST_TYPE -+#define Elf_Section Elf32_Half - - #else - -@@ -26,7 +27,7 @@ - #define Elf_Sym Elf64_Sym - #define ELF_ST_BIND ELF64_ST_BIND - #define ELF_ST_TYPE ELF64_ST_TYPE -- -+#define Elf_Section Elf64_Half - #endif - - #if KERNEL_ELFDATA != HOST_ELFDATA -@@ -77,6 +78,8 @@ struct module { - int has_init; - int has_cleanup; - struct buffer dev_table_buf; -+ char **markers; -+ size_t nmarkers; - }; - - struct elf_info { -@@ -85,6 +88,7 @@ struct elf_info { - Elf_Shdr *sechdrs; - Elf_Sym *symtab_start; - Elf_Sym *symtab_stop; -+ Elf_Section markers_strings_sec; - const char *strtab; - char *modinfo; - unsigned int modinfo_len; Deleted: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch 2008-07-04 02:18:23 UTC (rev 6) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch 2008-07-04 02:22:00 UTC (rev 7) @@ -1,1917 +0,0 @@ -diff --git a/Makefile b/Makefile -index 06b517d..afa3d2c 100644 ---- a/Makefile -+++ b/Makefile -@@ -583,7 +583,7 @@ quiet_cmd_vmlinux__ ?= LD $@ - cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ - -T $(vmlinux-lds) $(vmlinux-init) \ - --start-group $(vmlinux-main) --end-group \ -- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) -+ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) - - # Generate new vmlinux version - quiet_cmd_vmlinux_version = GEN .version -@@ -688,10 +688,30 @@ $(KALLSYMS): scripts ; - - endif # ifdef CONFIG_KALLSYMS - -+# Do modpost on a prelinked vmlinux. The finally linked vmlinux has -+# relevant sections renamed as per the linker script. -+quiet_cmd_vmlinux-modpost = LD $@ -+ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ -+ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ -+ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) -+define rule_vmlinux-modpost -+ : -+ +$(call cmd,vmlinux-modpost) -+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ -+ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd -+endef -+ -+ - # vmlinux image - including updated kernel symbols --vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE -+vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE -+ $(call vmlinux-modpost) - $(call if_changed_rule,vmlinux__) - -+# build vmlinux.o first to catch section mismatch errors early -+$(kallsyms.o): vmlinux.o -+vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE -+ $(call if_changed_rule,vmlinux-modpost) -+ - # The actual objects are generated when descending, - # make sure no implicit rule kicks in - $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; -diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug -index cf069b7..d39d5c5 100644 ---- a/arch/i386/Kconfig.debug -+++ b/arch/i386/Kconfig.debug -@@ -29,6 +29,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL -diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S -index e8db99c..b846b21 100644 ---- a/arch/i386/kernel/vmlinux.lds.S -+++ b/arch/i386/kernel/vmlinux.lds.S -@@ -53,6 +53,7 @@ SECTIONS - /* writeable */ - .data : { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } - -diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug -index 2782b48..17d4a44 100644 ---- a/arch/ia64/Kconfig.debug -+++ b/arch/ia64/Kconfig.debug -@@ -12,6 +12,12 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. - - choice - prompt "Physical memory granularity" -diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S -index a676e79..c6ea47e 100644 ---- a/arch/ia64/kernel/vmlinux.lds.S -+++ b/arch/ia64/kernel/vmlinux.lds.S -@@ -193,7 +193,7 @@ SECTIONS - - data : { } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) -- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } -+ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } - - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) -diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug -index 860472a..5c3a9f5 100644 ---- a/arch/ppc64/Kconfig.debug -+++ b/arch/ppc64/Kconfig.debug -@@ -16,6 +16,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL -diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S -index 4103cc1..76d0bf0 100644 ---- a/arch/ppc64/kernel/vmlinux.lds.S -+++ b/arch/ppc64/kernel/vmlinux.lds.S -@@ -118,6 +118,7 @@ SECTIONS - .data : { - *(.data .data.rel* .toc1) - *(.branch_lt) -+ MARKER - } - - .opd : { -diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug -index cd8d39f..f39bf8d 100644 ---- a/arch/sparc64/Kconfig.debug -+++ b/arch/sparc64/Kconfig.debug -@@ -21,6 +21,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config DEBUG_DCFLUSH - bool "D-cache flush debugging" - depends on DEBUG_KERNEL -diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S -index a710d38..d4b3b62 100644 ---- a/arch/sparc64/kernel/vmlinux.lds.S -+++ b/arch/sparc64/kernel/vmlinux.lds.S -@@ -27,6 +27,7 @@ SECTIONS - .data : - { - *(.data) -+ MARKER - CONSTRUCTORS - } - .data1 : { *(.data1) } -diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug -index cb33186..d0260cb 100644 ---- a/arch/x86_64/Kconfig.debug -+++ b/arch/x86_64/Kconfig.debug -@@ -55,6 +55,13 @@ config KPROBES - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - -+config MARKERS -+ bool "Activate markers" -+ default y -+ help -+ Place an empty function call at each marker site. Can be -+ dynamically changed for a probe function. -+ - config IOMMU_LEAK - bool "IOMMU leak tracing" - depends on DEBUG_KERNEL -diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S -index 053c826..3b3928f 100644 ---- a/arch/x86_64/kernel/vmlinux.lds.S -+++ b/arch/x86_64/kernel/vmlinux.lds.S -@@ -33,6 +33,7 @@ SECTIONS - - .data : { /* Data */ - *(.data) -+ MARKER - CONSTRUCTORS - } - -diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c -index 7a4d28d..991a1ef 100644 ---- a/drivers/net/bonding/bond_3ad.c -+++ b/drivers/net/bonding/bond_3ad.c -@@ -159,7 +159,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); - - // ================= main 802.3ad protocol functions ================== - static int ad_lacpdu_send(struct port *port); --static int ad_marker_send(struct port *port, struct marker *marker); -+static int ad_marker_send(struct port *port, struct bond_marker *marker); - static void ad_mux_machine(struct port *port); - static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); - static void ad_tx_machine(struct port *port); -@@ -172,8 +172,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); - static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); - static void ad_enable_collecting_distributing(struct port *port); - static void ad_disable_collecting_distributing(struct port *port); --static void ad_marker_info_received(struct marker *marker_info, struct port *port); --static void ad_marker_response_received(struct marker *marker, struct port *port); -+static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); -+static void ad_marker_response_received(struct bond_marker *marker, struct port *port); - - - ///////////////////////////////////////////////////////////////////////////////// -@@ -954,12 +954,13 @@ static int ad_lacpdu_send(struct port *port) - * Returns: 0 on success - * < 0 on error - */ --static int ad_marker_send(struct port *port, struct marker *marker) -+static int ad_marker_send(struct port *port, struct bond_marker *marker) - { - struct slave *slave = port->slave; - struct sk_buff *skb; -- struct marker_header *marker_header; -- int length = sizeof(struct marker_header); -+ struct bond_marker_header *marker_header; -+ int length = sizeof(struct bond_marker_header); -+ - struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; - - skb = dev_alloc_skb(length + 16); -@@ -974,7 +975,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) - skb->nh.raw = skb->data + ETH_HLEN; - skb->protocol = PKT_TYPE_LACPDU; - -- marker_header = (struct marker_header *)skb_put(skb, length); -+ marker_header = (struct bond_marker_header *)skb_put(skb, length); - - marker_header->ad_header.destination_address = lacpdu_multicast_address; - /* Note: source addres is set to be the member's PERMANENT address, because we use it -@@ -1771,7 +1772,7 @@ static void ad_disable_collecting_distributing(struct port *port) - */ - static void ad_marker_info_send(struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - u16 index; - - // fill the marker PDU with the appropriate values -@@ -1804,13 +1805,14 @@ static void ad_marker_info_send(struct port *port) - * @port: the port we're looking at - * - */ --static void ad_marker_info_received(struct marker *marker_info,struct port *port) -+static void ad_marker_info_received(struct bond_marker *marker_info, -+ struct port *port) - { -- struct marker marker; -+ struct bond_marker marker; - - // copy the received marker data to the response marker - //marker = *marker_info; -- memcpy(&marker, marker_info, sizeof(struct marker)); -+ memcpy(&marker, marker_info, sizeof(struct bond_marker)); - // change the marker subtype to marker response - marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; - // send the marker response -@@ -1829,7 +1831,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port - * response for marker PDU's, in this stage, but only to respond to marker - * information. - */ --static void ad_marker_response_received(struct marker *marker, struct port *port) -+static void ad_marker_response_received(struct bond_marker *marker, -+ struct port *port) - { - marker=NULL; // just to satisfy the compiler - port=NULL; // just to satisfy the compiler -@@ -2217,15 +2220,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng - case AD_TYPE_MARKER: - // No need to convert fields to Little Endian since we don't use the marker's fields. - -- switch (((struct marker *)lacpdu)->tlv_type) { -+ switch (((struct bond_marker *)lacpdu)->tlv_type) { - case AD_MARKER_INFORMATION_SUBTYPE: - dprintk("Received Marker Information on port %d\n", port->actor_port_number); -- ad_marker_info_received((struct marker *)lacpdu, port); -+ ad_marker_info_received((struct bond_marker *)lacpdu, port); -+ - break; - - case AD_MARKER_RESPONSE_SUBTYPE: - dprintk("Received Marker Response on port %d\n", port->actor_port_number); -- ad_marker_response_received((struct marker *)lacpdu, port); -+ ad_marker_response_received((struct bond_marker *)lacpdu, port); - break; - - default: -diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h -index 4c60b17..2bb477e 100644 ---- a/drivers/net/bonding/bond_3ad.h -+++ b/drivers/net/bonding/bond_3ad.h -@@ -105,7 +105,8 @@ typedef enum { - typedef enum { - AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype - AD_MARKER_RESPONSE_SUBTYPE // marker response subtype --} marker_subtype_t; -+} bond_marker_subtype_t; -+ - - // timers types(43.4.9 in the 802.3ad standard) - typedef enum { -@@ -161,7 +162,7 @@ typedef struct lacpdu_header { - } lacpdu_header_t; - - // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) --typedef struct marker { -+typedef struct bond_marker { - u8 subtype; // = 0x02 (marker PDU) - u8 version_number; // = 0x01 - u8 tlv_type; // = 0x01 (marker information) -@@ -174,12 +175,12 @@ typedef struct marker { - u8 tlv_type_terminator; // = 0x00 - u8 terminator_length; // = 0x00 - u8 reserved_90[90]; // = 0 --} marker_t; -+} bond_marker_t; - --typedef struct marker_header { -+typedef struct bond_marker_header { - struct ad_header ad_header; -- struct marker marker; --} marker_header_t; -+ struct bond_marker marker; -+} bond_marker_header_t; - - #pragma pack() - -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index 165a02f..eaf230e 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -6,10 +6,18 @@ - #define VMLINUX_SYMBOL(_sym_) _sym_ - #endif - -+/* Kernel markers : pointers */ -+#define MARKER \ -+ . = ALIGN(8); \ -+ VMLINUX_SYMBOL(__start___markers) = .; \ -+ *(__markers) \ -+ VMLINUX_SYMBOL(__stop___markers) = .; -+ - #define RODATA \ - .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ - *(.rodata) *(.rodata.*) \ - *(__vermagic) /* Kernel version magic */ \ -+ *(__markers_strings) /* Markers: strings */ \ - } \ - \ - .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ -diff --git a/include/linux/marker.h b/include/linux/marker.h -new file mode 100644 -index 0000000..efbc82b ---- /dev/null -+++ b/include/linux/marker.h -@@ -0,0 +1,139 @@ -+#ifndef _LINUX_MARKER_H -+#define _LINUX_MARKER_H -+ -+/* -+ * Code markup for dynamic and static tracing. -+ * -+ * See Documentation/marker.txt. -+ * -+ * (C) Copyright 2006 Mathieu Desnoyers -+ * -+ * This file is released under the GPLv2. -+ * See the file COPYING for more details. -+ */ -+ -+#include -+ -+struct module; -+struct marker; -+ -+/** -+ * marker_probe_func - Type of a marker probe function -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @args: variable argument list pointer. Use a pointer to overcome C's -+ * inability to pass this around as a pointer in a portable manner in -+ * the callee otherwise. -+ * -+ * Type of marker probe functions. They receive the mdata and need to parse the -+ * format string to recover the variable argument list. -+ */ -+typedef void marker_probe_func(void *probe_private, void *call_private, -+ const char *fmt, va_list *args); -+ -+struct marker_probe_closure { -+ marker_probe_func *func; /* Callback */ -+ void *probe_private; /* Private probe data */ -+}; -+ -+struct marker { -+ const char *name; /* Marker name */ -+ const char *format; /* Marker format string, describing the -+ * variable argument list. -+ */ -+ char state; /* Marker state. */ -+ char ptype; /* probe type : 0 : single, 1 : multi */ -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+} __attribute__((aligned(8))); -+ -+#ifdef CONFIG_MARKERS -+ -+/* -+ * Note : the empty asm volatile with read constraint is used here instead of a -+ * "used" attribute to fix a gcc 4.1.x bug. -+ * Make sure the alignment of the structure in the __markers section will -+ * not add unwanted padding between the beginning of the section and the -+ * structure. Force alignment to the same alignment as the section start. -+ */ -+#define __trace_mark(name, call_private, format, args...) \ -+ do { \ -+ static const char __mstrtab_##name[] \ -+ __attribute__((section("__markers_strings"))) \ -+ = #name "\0" format; \ -+ static struct marker __mark_##name \ -+ __attribute__((section("__markers"), aligned(8))) = \ -+ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ -+ 0, 0, marker_probe_cb, \ -+ { __mark_empty_function, NULL}, NULL }; \ -+ __mark_check_format(format, ## args); \ -+ if (unlikely(__mark_##name.state)) { \ -+ (*__mark_##name.call) \ -+ (&__mark_##name, call_private, \ -+ format, ## args); \ -+ } \ -+ } while (0) -+ -+extern void marker_update_probe_range(struct marker *begin, -+ struct marker *end); -+#else /* !CONFIG_MARKERS */ -+#define __trace_mark(name, call_private, format, args...) \ -+ __mark_check_format(format, ## args) -+static inline void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ } -+#endif /* CONFIG_MARKERS */ -+ -+/** -+ * trace_mark - Marker -+ * @name: marker name, not quoted. -+ * @format: format string -+ * @args...: variable argument list -+ * -+ * Places a marker. -+ */ -+#define trace_mark(name, format, args...) \ -+ __trace_mark(name, NULL, format, ## args) -+ -+/** -+ * MARK_NOARGS - Format string for a marker with no argument. -+ */ -+#define MARK_NOARGS " " -+ -+/* To be used for string format validity checking with gcc */ -+static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) -+{ -+} -+ -+extern marker_probe_func __mark_empty_function; -+ -+extern void marker_probe_cb(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+extern void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...); -+ -+/* -+ * Connect a probe to a marker. -+ * private data pointer must be a valid allocated memory address, or NULL. -+ */ -+extern int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private); -+ -+/* -+ * Returns the private data given to marker_probe_register. -+ */ -+extern int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private); -+/* -+ * Unregister a marker by providing the registered private data. -+ */ -+extern int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private); -+ -+extern void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num); -+ -+#endif -diff --git a/include/linux/module.h b/include/linux/module.h -index 8da8948..2ad5efd 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -328,6 +329,10 @@ struct module - /* The command line arguments (may be mangled). People like - keeping pointers to this stuff */ - char *args; -+#ifdef CONFIG_MARKERS -+ struct marker *markers; -+ unsigned int num_markers; -+#endif - }; - - /* FIXME: It'd be nice to isolate modules during init, too, so they -@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); - int unregister_module_notifier(struct notifier_block * nb); - - extern void print_modules(void); -+extern void module_update_markers(void); - #else /* !CONFIG_MODULES... */ - #define EXPORT_SYMBOL(sym) - #define EXPORT_SYMBOL_GPL(sym) -@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) - static inline void print_modules(void) - { - } -+ -+static inline void module_update_markers(void) -+{ -+} - #endif /* CONFIG_MODULES */ - - #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) -diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h -index 91057d6..bcd0acb 100644 ---- a/include/linux/rcupdate.h -+++ b/include/linux/rcupdate.h -@@ -42,6 +42,19 @@ - #include - #include - -+/* -+ * Prevent the compiler from merging or refetching accesses. The compiler -+ * is also forbidden from reordering successive instances of ACCESS_ONCE(), -+ * but only when the compiler is aware of some particular ordering. One way -+ * to make the compiler aware of ordering is to put the two invocations of -+ * ACCESS_ONCE() in different C statements. -+ * -+ * This macro does absolutely -nothing- to prevent the CPU from reordering, -+ * merging, or refetching absolutely anything at any time. -+ * -+ */ -+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) -+ - /** - * struct rcu_head - callback structure for use with RCU - * @next: next update requests in a list -@@ -102,6 +115,7 @@ struct rcu_data { - struct rcu_head *donelist; - struct rcu_head **donetail; - int cpu; -+ struct rcu_head barrier; - }; - - DECLARE_PER_CPU(struct rcu_data, rcu_data); -@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, - extern void FASTCALL(call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head))); - extern void synchronize_kernel(void); -- -+extern void rcu_barrier(void); - #endif /* __KERNEL__ */ - #endif /* __LINUX_RCUPDATE_H */ -diff --git a/kernel/Makefile b/kernel/Makefile -index 0b8c8ca..f8248bc 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o - obj-$(CONFIG_AUDITSYSCALL) += auditsc.o - obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o - obj-$(CONFIG_KPROBES) += kprobes.o -+obj-$(CONFIG_MARKERS) += marker.o - - ifneq ($(CONFIG_IA64),y) - # According to Alan Modra , the -fno-omit-frame-pointer is -diff --git a/kernel/marker.c b/kernel/marker.c -new file mode 100644 -index 0000000..c4c2cd8 ---- /dev/null -+++ b/kernel/marker.c -@@ -0,0 +1,851 @@ -+/* -+ * Copyright (C) 2007 Mathieu Desnoyers -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+extern struct marker __start___markers[]; -+extern struct marker __stop___markers[]; -+ -+/* Set to 1 to enable marker debug output */ -+const int marker_debug; -+ -+/* -+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin -+ * and module markers and the hash table. -+ */ -+static DEFINE_MUTEX(markers_mutex); -+ -+/* -+ * Marker hash table, containing the active markers. -+ * Protected by module_mutex. -+ */ -+#define MARKER_HASH_BITS 6 -+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -+ -+/* -+ * Note about RCU : -+ * It is used to make sure every handler has finished using its private data -+ * between two consecutive operation (add or remove) on a given marker. It is -+ * also used to delay the free of multiple probes array until a quiescent state -+ * is reached. -+ * marker entries modifications are protected by the markers_mutex. -+ */ -+struct marker_entry { -+ struct hlist_node hlist; -+ char *format; -+ void (*call)(const struct marker *mdata, /* Probe wrapper */ -+ void *call_private, const char *fmt, ...); -+ struct marker_probe_closure single; -+ struct marker_probe_closure *multi; -+ int refcount; /* Number of times armed. 0 if disarmed. */ -+ struct rcu_head rcu; -+ void *oldptr; -+ char rcu_pending:1; -+ char ptype:1; -+ char name[0]; /* Contains name'\0'format'\0' */ -+}; -+ -+static struct hlist_head marker_table[MARKER_TABLE_SIZE]; -+ -+/** -+ * __mark_empty_function - Empty probe callback -+ * @probe_private: probe private data -+ * @call_private: call site private data -+ * @fmt: format string -+ * @...: variable argument list -+ * -+ * Empty callback provided as a probe to the markers. By providing this to a -+ * disabled marker, we make sure the execution flow is always valid even -+ * though the function pointer change and the marker enabling are two distinct -+ * operations that modifies the execution flow of preemptible code. -+ */ -+void __mark_empty_function(void *probe_private, void *call_private, -+ const char *fmt, va_list *args) -+{ -+} -+EXPORT_SYMBOL_GPL(__mark_empty_function); -+ -+/* -+ * marker_probe_cb Callback that prepares the variable argument list for probes. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we -+ * need to put a full smp_rmb() in this branch. This is why we do not use -+ * rcu_dereference() for the pointer read. -+ */ -+void marker_probe_cb(const struct marker *mdata, void *call_private, -+ const char *fmt, ...) -+{ -+ va_list args; -+ char ptype; -+ -+ /* -+ * disabling preemption to make sure the teardown of the callbacks can -+ * be done correctly when they are in modules and they insure RCU read -+ * coherency. -+ */ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ va_start(args, fmt); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ va_end(args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) { -+ va_start(args, fmt); -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ va_end(args); -+ } -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb); -+ -+/* -+ * marker_probe_cb Callback that does not prepare the variable argument list. -+ * @mdata: pointer of type struct marker -+ * @call_private: caller site private data -+ * @fmt: format string -+ * @...: Variable argument list. -+ * -+ * Should be connected to markers "MARK_NOARGS". -+ */ -+void marker_probe_cb_noarg(const struct marker *mdata, -+ void *call_private, const char *fmt, ...) -+{ -+ va_list args; /* not initialized */ -+ char ptype; -+ -+ preempt_disable(); -+ ptype = ACCESS_ONCE(mdata->ptype); -+ if (likely(!ptype)) { -+ marker_probe_func *func; -+ /* Must read the ptype before ptr. They are not data dependant, -+ * so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func = ACCESS_ONCE(mdata->single.func); -+ /* Must read the ptr before private data. They are not data -+ * dependant, so we put an explicit smp_rmb() here. */ -+ smp_rmb(); -+ func(mdata->single.probe_private, call_private, fmt, &args); -+ } else { -+ struct marker_probe_closure *multi; -+ int i; -+ /* -+ * multi points to an array, therefore accessing the array -+ * depends on reading multi. However, even in this case, -+ * we must insure that the pointer is read _before_ the array -+ * data. Same as rcu_dereference, but we need a full smp_rmb() -+ * in the fast path, so put the explicit barrier here. -+ */ -+ smp_read_barrier_depends(); -+ multi = ACCESS_ONCE(mdata->multi); -+ for (i = 0; multi[i].func; i++) -+ multi[i].func(multi[i].probe_private, call_private, fmt, -+ &args); -+ } -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); -+ -+static void free_old_closure(struct rcu_head *head) -+{ -+ struct marker_entry *entry = container_of(head, -+ struct marker_entry, rcu); -+ kfree(entry->oldptr); -+ /* Make sure we free the data before setting the pending flag to 0 */ -+ smp_wmb(); -+ entry->rcu_pending = 0; -+} -+ -+static void debug_print_probes(struct marker_entry *entry) -+{ -+ int i; -+ -+ if (!marker_debug) -+ return; -+ -+ if (!entry->ptype) { -+ printk(KERN_DEBUG "Single probe : %p %p\n", -+ entry->single.func, -+ entry->single.probe_private); -+ } else { -+ for (i = 0; entry->multi[i].func; i++) -+ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, -+ entry->multi[i].func, -+ entry->multi[i].probe_private); -+ } -+} -+ -+static struct marker_probe_closure * -+marker_entry_add_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0; -+ struct marker_probe_closure *old, *new; -+ -+ WARN_ON(!probe); -+ -+ debug_print_probes(entry); -+ old = entry->multi; -+ if (!entry->ptype) { -+ if (entry->single.func == probe && -+ entry->single.probe_private == probe_private) -+ return ERR_PTR(-EBUSY); -+ if (entry->single.func == __mark_empty_function) { -+ /* 0 -> 1 probes */ -+ entry->single.func = probe; -+ entry->single.probe_private = probe_private; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* 1 -> 2 probes */ -+ nr_probes = 1; -+ old = NULL; -+ } -+ } else { -+ /* (N -> N+1), (N != 0, 1) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) -+ if (old[nr_probes].func == probe -+ && old[nr_probes].probe_private -+ == probe_private) -+ return ERR_PTR(-EBUSY); -+ } -+ /* + 2 : one for new probe, one for NULL func */ -+ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), -+ GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ if (!old) -+ new[0] = entry->single; -+ else -+ memcpy(new, old, -+ nr_probes * sizeof(struct marker_probe_closure)); -+ new[nr_probes].func = probe; -+ new[nr_probes].probe_private = probe_private; -+ entry->refcount = nr_probes + 1; -+ entry->multi = new; -+ entry->ptype = 1; -+ debug_print_probes(entry); -+ return old; -+} -+ -+static struct marker_probe_closure * -+marker_entry_remove_probe(struct marker_entry *entry, -+ marker_probe_func *probe, void *probe_private) -+{ -+ int nr_probes = 0, nr_del = 0, i; -+ struct marker_probe_closure *old, *new; -+ -+ old = entry->multi; -+ -+ debug_print_probes(entry); -+ if (!entry->ptype) { -+ /* 0 -> N is an error */ -+ WARN_ON(entry->single.func == __mark_empty_function); -+ /* 1 -> 0 probes */ -+ WARN_ON(probe && entry->single.func != probe); -+ WARN_ON(entry->single.probe_private != probe_private); -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ debug_print_probes(entry); -+ return NULL; -+ } else { -+ /* (N -> M), (N > 1, M >= 0) probes */ -+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { -+ if ((!probe || old[nr_probes].func == probe) -+ && old[nr_probes].probe_private -+ == probe_private) -+ nr_del++; -+ } -+ } -+ -+ if (nr_probes - nr_del == 0) { -+ /* N -> 0, (N > 1) */ -+ entry->single.func = __mark_empty_function; -+ entry->refcount = 0; -+ entry->ptype = 0; -+ } else if (nr_probes - nr_del == 1) { -+ /* N -> 1, (N > 1) */ -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ entry->single = old[i]; -+ entry->refcount = 1; -+ entry->ptype = 0; -+ } else { -+ int j = 0; -+ /* N -> M, (N > 1, M > 1) */ -+ /* + 1 for NULL */ -+ new = kzalloc((nr_probes - nr_del + 1) -+ * sizeof(struct marker_probe_closure), GFP_KERNEL); -+ if (new == NULL) -+ return ERR_PTR(-ENOMEM); -+ for (i = 0; old[i].func; i++) -+ if ((probe && old[i].func != probe) || -+ old[i].probe_private != probe_private) -+ new[j++] = old[i]; -+ entry->refcount = nr_probes - nr_del; -+ entry->ptype = 1; -+ entry->multi = new; -+ } -+ debug_print_probes(entry); -+ return old; -+} -+ -+/* -+ * Get marker if the marker is present in the marker hash table. -+ * Must be called with markers_mutex held. -+ * Returns NULL if not present. -+ */ -+static struct marker_entry *get_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ u32 hash = jhash(name, strlen(name), 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) -+ return e; -+ } -+ return NULL; -+} -+ -+/* -+ * Add the marker to the marker hash table. Must be called with markers_mutex -+ * held. -+ */ -+static struct marker_entry *add_marker(const char *name, const char *format) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ size_t format_len = 0; -+ u32 hash = jhash(name, name_len-1, 0); -+ -+ if (format) -+ format_len = strlen(format) + 1; -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ printk(KERN_NOTICE -+ "Marker %s busy\n", name); -+ return ERR_PTR(-EBUSY); /* Already there */ -+ } -+ } -+ /* -+ * Using kmalloc here to allocate a variable length element. Could -+ * cause some memory fragmentation if overused. -+ */ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return ERR_PTR(-ENOMEM); -+ memcpy(&e->name[0], name, name_len); -+ if (format) { -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ } else { -+ e->format = NULL; -+ e->call = marker_probe_cb; -+ } -+ e->single.func = __mark_empty_function; -+ e->single.probe_private = NULL; -+ e->multi = NULL; -+ e->ptype = 0; -+ e->refcount = 0; -+ e->rcu_pending = 0; -+ hlist_add_head(&e->hlist, head); -+ return e; -+} -+ -+/* -+ * Remove the marker from the marker hash table. Must be called with mutex_lock -+ * held. -+ */ -+static int remove_marker(const char *name) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ int found = 0; -+ size_t len = strlen(name) + 1; -+ u32 hash = jhash(name, len-1, 0); -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ found = 1; -+ break; -+ } -+ } -+ if (!found) -+ return -ENOENT; -+ if (e->single.func != __mark_empty_function) -+ return -EBUSY; -+ hlist_del(&e->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if (e->rcu_pending) -+ rcu_barrier(); -+ kfree(e); -+ return 0; -+} -+ -+/* -+ * Set the mark_entry format to the format found in the element. -+ */ -+static int marker_set_format(struct marker_entry **entry, const char *format) -+{ -+ struct marker_entry *e; -+ size_t name_len = strlen((*entry)->name) + 1; -+ size_t format_len = strlen(format) + 1; -+ -+ -+ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, -+ GFP_KERNEL); -+ if (!e) -+ return -ENOMEM; -+ memcpy(&e->name[0], (*entry)->name, name_len); -+ e->format = &e->name[name_len]; -+ memcpy(e->format, format, format_len); -+ if (strcmp(e->format, MARK_NOARGS) == 0) -+ e->call = marker_probe_cb_noarg; -+ else -+ e->call = marker_probe_cb; -+ e->single = (*entry)->single; -+ e->multi = (*entry)->multi; -+ e->ptype = (*entry)->ptype; -+ e->refcount = (*entry)->refcount; -+ e->rcu_pending = 0; -+ hlist_add_before(&e->hlist, &(*entry)->hlist); -+ hlist_del(&(*entry)->hlist); -+ /* Make sure the call_rcu has been executed */ -+ if ((*entry)->rcu_pending) -+ rcu_barrier(); -+ kfree(*entry); -+ *entry = e; -+ trace_mark(core_marker_format, "name %s format %s", -+ e->name, e->format); -+ return 0; -+} -+ -+/* -+ * Sets the probe callback corresponding to one marker. -+ */ -+static int set_marker(struct marker_entry **entry, struct marker *elem, -+ int active) -+{ -+ int ret; -+ WARN_ON(strcmp((*entry)->name, elem->name) != 0); -+ -+ if ((*entry)->format) { -+ if (strcmp((*entry)->format, elem->format) != 0) { -+ printk(KERN_NOTICE -+ "Format mismatch for probe %s " -+ "(%s), marker (%s)\n", -+ (*entry)->name, -+ (*entry)->format, -+ elem->format); -+ return -EPERM; -+ } -+ } else { -+ ret = marker_set_format(entry, elem->format); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * probe_cb setup (statically known) is done here. It is -+ * asynchronous with the rest of execution, therefore we only -+ * pass from a "safe" callback (with argument) to an "unsafe" -+ * callback (does not set arguments). -+ */ -+ elem->call = (*entry)->call; -+ /* -+ * Sanity check : -+ * We only update the single probe private data when the ptr is -+ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) -+ */ -+ WARN_ON(elem->single.func != __mark_empty_function -+ && elem->single.probe_private -+ != (*entry)->single.probe_private && -+ !elem->ptype); -+ elem->single.probe_private = (*entry)->single.probe_private; -+ /* -+ * Make sure the private data is valid when we update the -+ * single probe ptr. -+ */ -+ smp_wmb(); -+ elem->single.func = (*entry)->single.func; -+ /* -+ * We also make sure that the new probe callbacks array is consistent -+ * before setting a pointer to it. -+ */ -+ rcu_assign_pointer(elem->multi, (*entry)->multi); -+ /* -+ * Update the function or multi probe array pointer before setting the -+ * ptype. -+ */ -+ smp_wmb(); -+ elem->ptype = (*entry)->ptype; -+ elem->state = active; -+ -+ return 0; -+} -+ -+/* -+ * Disable a marker and its probe callback. -+ * Note: only after a synchronize_sched() issued after setting elem->call to the -+ * empty function insures that the original callback is not used anymore. This -+ * insured by preemption disabling around the call site. -+ */ -+static void disable_marker(struct marker *elem) -+{ -+ /* leave "call" as is. It is known statically. */ -+ elem->state = 0; -+ elem->single.func = __mark_empty_function; -+ /* Update the function before setting the ptype */ -+ smp_wmb(); -+ elem->ptype = 0; /* single probe */ -+ /* -+ * Leave the private data and id there, because removal is racy and -+ * should be done only after a synchronize_sched(). These are never used -+ * until the next initialization anyway. -+ */ -+} -+ -+/** -+ * marker_update_probe_range - Update a probe range -+ * @begin: beginning of the range -+ * @end: end of the range -+ * -+ * Updates the probe callback corresponding to a range of markers. -+ */ -+void marker_update_probe_range(struct marker *begin, -+ struct marker *end) -+{ -+ struct marker *iter; -+ struct marker_entry *mark_entry; -+ -+ mutex_lock(&markers_mutex); -+ for (iter = begin; iter < end; iter++) { -+ mark_entry = get_marker(iter->name); -+ if (mark_entry) { -+ set_marker(&mark_entry, iter, -+ !!mark_entry->refcount); -+ /* -+ * ignore error, continue -+ */ -+ } else { -+ disable_marker(iter); -+ } -+ } -+ mutex_unlock(&markers_mutex); -+} -+ -+/* -+ * Update probes, removing the faulty probes. -+ * Issues a synchronize_sched() when no reference to the module passed -+ * as parameter is found in the probes so the probe module can be -+ * safely unloaded from now on. -+ * -+ * Internal callback only changed before the first probe is connected to it. -+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 -+ * transitions. All other transitions will leave the old private data valid. -+ * This makes the non-atomicity of the callback/private data updates valid. -+ * -+ * "special case" updates : -+ * 0 -> 1 callback -+ * 1 -> 0 callback -+ * 1 -> 2 callbacks -+ * 2 -> 1 callbacks -+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. -+ * Site effect : marker_set_format may delete the marker entry (creating a -+ * replacement). -+ */ -+static void marker_update_probes(void) -+{ -+ /* Core kernel markers */ -+ marker_update_probe_range(__start___markers, __stop___markers); -+ /* Markers in modules. */ -+ module_update_markers(); -+} -+ -+/** -+ * marker_probe_register - Connect a probe to a marker -+ * @name: marker name -+ * @format: format string -+ * @probe: probe handler -+ * @probe_private: probe private data -+ * -+ * private data must be a valid allocated memory address, or NULL. -+ * Returns 0 if ok, error value on error. -+ * The probe address must at least be aligned on the architecture pointer size. -+ */ -+int marker_probe_register(const char *name, const char *format, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ entry = add_marker(name, format); -+ if (IS_ERR(entry)) { -+ ret = PTR_ERR(entry); -+ goto end; -+ } -+ } -+ /* -+ * If we detect that a call_rcu is pending for this marker, -+ * make sure it's executed now. -+ */ -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_add_probe(entry, probe, probe_private); -+ if (IS_ERR(old)) { -+ ret = PTR_ERR(old); -+ goto end; -+ } -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_register); -+ -+/** -+ * marker_probe_unregister - Disconnect a probe from a marker -+ * @name: marker name -+ * @probe: probe function pointer -+ * @probe_private: probe private data -+ * -+ * Returns the private data given to marker_probe_register, or an ERR_PTR(). -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister(const char *name, -+ marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ struct marker_probe_closure *old; -+ int ret = 0; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, probe, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker(name); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister); -+ -+static struct marker_entry * -+get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -+{ -+ struct marker_entry *entry; -+ unsigned int i; -+ struct hlist_head *head; -+ struct hlist_node *node; -+ -+ for (i = 0; i < MARKER_TABLE_SIZE; i++) { -+ head = &marker_table[i]; -+ hlist_for_each_entry(entry, node, head, hlist) { -+ if (!entry->ptype) { -+ if (entry->single.func == probe -+ && entry->single.probe_private -+ == probe_private) -+ return entry; -+ } else { -+ struct marker_probe_closure *closure; -+ closure = entry->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func == probe && -+ closure[i].probe_private -+ == probe_private) -+ return entry; -+ } -+ } -+ } -+ } -+ return NULL; -+} -+ -+/** -+ * marker_probe_unregister_private_data - Disconnect a probe from a marker -+ * @probe: probe function -+ * @probe_private: probe private data -+ * -+ * Unregister a probe by providing the registered private data. -+ * Only removes the first marker found in hash table. -+ * Return 0 on success or error value. -+ * We do not need to call a synchronize_sched to make sure the probes have -+ * finished running before doing a module unload, because the module unload -+ * itself uses stop_machine(), which insures that every preempt disabled section -+ * have finished. -+ */ -+int marker_probe_unregister_private_data(marker_probe_func *probe, -+ void *probe_private) -+{ -+ struct marker_entry *entry; -+ int ret = 0; -+ struct marker_probe_closure *old; -+ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ if (!entry) { -+ ret = -ENOENT; -+ goto end; -+ } -+ if (entry->rcu_pending) -+ rcu_barrier(); -+ old = marker_entry_remove_probe(entry, NULL, probe_private); -+ mutex_unlock(&markers_mutex); -+ marker_update_probes(); /* may update entry */ -+ mutex_lock(&markers_mutex); -+ entry = get_marker_from_private_data(probe, probe_private); -+ WARN_ON(!entry); -+ entry->oldptr = old; -+ entry->rcu_pending = 1; -+ /* write rcu_pending before calling the RCU callback */ -+ smp_wmb(); -+ call_rcu(&entry->rcu, free_old_closure); -+ remove_marker(entry->name); /* Ignore busy error message */ -+end: -+ mutex_unlock(&markers_mutex); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); -+ -+/** -+ * marker_get_private_data - Get a marker's probe private data -+ * @name: marker name -+ * @probe: probe to match -+ * @num: get the nth matching probe's private data -+ * -+ * Returns the nth private data pointer (starting from 0) matching, or an -+ * ERR_PTR. -+ * Returns the private data pointer, or an ERR_PTR. -+ * The private data pointer should _only_ be dereferenced if the caller is the -+ * owner of the data, or its content could vanish. This is mostly used to -+ * confirm that a caller is the owner of a registered probe. -+ */ -+void *marker_get_private_data(const char *name, marker_probe_func *probe, -+ int num) -+{ -+ struct hlist_head *head; -+ struct hlist_node *node; -+ struct marker_entry *e; -+ size_t name_len = strlen(name) + 1; -+ u32 hash = jhash(name, name_len-1, 0); -+ int i; -+ -+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; -+ hlist_for_each_entry(e, node, head, hlist) { -+ if (!strcmp(name, e->name)) { -+ if (!e->ptype) { -+ if (num == 0 && e->single.func == probe) -+ return e->single.probe_private; -+ else -+ break; -+ } else { -+ struct marker_probe_closure *closure; -+ int match = 0; -+ closure = e->multi; -+ for (i = 0; closure[i].func; i++) { -+ if (closure[i].func != probe) -+ continue; -+ if (match++ == num) -+ return closure[i].probe_private; -+ } -+ } -+ } -+ } -+ return ERR_PTR(-ENOENT); -+} -+EXPORT_SYMBOL_GPL(marker_get_private_data); -diff --git a/kernel/module.c b/kernel/module.c -index 624e7ee..ae16b04 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -1507,6 +1507,8 @@ static struct module *load_module(void __user *umod, - struct exception_table_entry *extable; - mm_segment_t old_fs; - int gpgsig_ok; -+ unsigned int markersindex; -+ unsigned int markersstringsindex; - - DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", - umod, len, uargs); -@@ -1739,6 +1741,9 @@ static struct module *load_module(void __user *umod, - tainted |= TAINT_FORCED_MODULE; - } - #endif -+ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); -+ markersstringsindex = find_sec(hdr, sechdrs, secstrings, -+ "__markers_strings"); - - /* Now do relocations. */ - for (i = 1; i < hdr->e_shnum; i++) { -@@ -1762,6 +1767,12 @@ static struct module *load_module(void __user *umod, - goto cleanup; - } - -+#ifdef CONFIG_MARKERS -+ mod->markers = (void *)sechdrs[markersindex].sh_addr; -+ mod->num_markers = -+ sechdrs[markersindex].sh_size / sizeof(*mod->markers); -+#endif -+ - /* Set up and sort exception table */ - mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); - mod->extable = extable = (void *)sechdrs[exindex].sh_addr; -@@ -1773,6 +1784,12 @@ static struct module *load_module(void __user *umod, - - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); - -+#ifdef CONFIG_MARKERS -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+#endif -+ - err = module_finalize(hdr, sechdrs, mod); - if (err < 0) - goto cleanup; -@@ -2190,6 +2207,20 @@ void struct_module(struct module *mod) { return; } - EXPORT_SYMBOL(struct_module); - #endif - -+#ifdef CONFIG_MARKERS -+void module_update_markers(void) -+{ -+ struct module *mod; -+ -+ down(&module_mutex); -+ list_for_each_entry(mod, &modules, list) -+ if (!tainted) -+ marker_update_probe_range(mod->markers, -+ mod->markers + mod->num_markers); -+ up(&module_mutex); -+} -+#endif -+ - static int __init modules_init(void) - { - return subsystem_register(&module_subsys); -diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c -index 1b16bfc..20ade31 100644 ---- a/kernel/rcupdate.c -+++ b/kernel/rcupdate.c -@@ -46,6 +46,7 @@ - #include - #include - #include -+#include - - /* Definition for rcupdate control block. */ - struct rcu_ctrlblk rcu_ctrlblk = -@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, - local_irq_restore(flags); - } - -+static atomic_t rcu_barrier_cpu_count; -+static DEFINE_MUTEX(rcu_barrier_mutex); -+static struct completion rcu_barrier_completion; -+ - /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. -@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, - return NOTIFY_OK; - } - -+static void rcu_barrier_callback(struct rcu_head *notused) -+{ -+ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) -+ complete(&rcu_barrier_completion); -+} -+ -+/* -+ * Called with preemption disabled, and from cross-cpu IRQ context. -+ */ -+static void rcu_barrier_func(void *notused) -+{ -+ int cpu = smp_processor_id(); -+ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); -+ struct rcu_head *head; -+ -+ head = &rdp->barrier; -+ atomic_inc(&rcu_barrier_cpu_count); -+ call_rcu(head, rcu_barrier_callback); -+} -+ -+/** -+ * rcu_barrier - Wait until all the in-flight RCUs are complete. -+ */ -+void rcu_barrier(void) -+{ -+ BUG_ON(in_interrupt()); -+ /* Take cpucontrol mutex to protect against CPU hotplug */ -+ mutex_lock(&rcu_barrier_mutex); -+ init_completion(&rcu_barrier_completion); -+ atomic_set(&rcu_barrier_cpu_count, 0); -+ on_each_cpu(rcu_barrier_func, NULL, 0, 1); -+ wait_for_completion(&rcu_barrier_completion); -+ mutex_unlock(&rcu_barrier_mutex); -+} -+EXPORT_SYMBOL_GPL(rcu_barrier); -+ -+ -+ - static struct notifier_block __devinitdata rcu_nb = { - .notifier_call = rcu_cpu_notify, - }; -diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost -index b3d31b5..b100a32 100644 ---- a/scripts/Makefile.modpost -+++ b/scripts/Makefile.modpost -@@ -13,6 +13,7 @@ - # 2) modpost is then used to - # 3) create one .mod.c file pr. module - # 4) create one Module.symvers file with CRC for all exported symbols -+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers - # 5) compile all .mod.c files - # 6) final link of the module to a file - -@@ -40,6 +41,11 @@ include scripts/Makefile.lib - - symverfile := $(objtree)/Module.symvers - -+kernelmarkersfile := $(objtree)/Module.markers -+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers -+ -+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) -+ - # Step 1), find all modules listed in $(MODVERDIR)/ - __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) - modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) -@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST - cmd_modpost = scripts/mod/modpost \ - $(if $(CONFIG_MODVERSIONS),-m) \ - $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ -+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ -+ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ - $(filter-out FORCE,$^) - - .PHONY: __modpost - __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE - $(call cmd,modpost) - -+quiet_cmd_kernel-mod = MODPOST $@ -+ cmd_kernel-mod = $(cmd_modpost) $@ -+ -+vmlinux.o: FORCE -+ $(call cmd,kernel-mod) -+ - # Declare generated files as targets for modpost - $(symverfile): __modpost ; - $(modules:.ko=.mod.c): __modpost ; - -+ifdef CONFIG_MARKERS -+$(markersfile): __modpost ; -+endif - - # Step 5), compile all *.mod.c files - -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index 2a174e5..c25948c 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -10,7 +10,8 @@ - * - * Usage: modpost vmlinux module1.o module2.o ... - */ -- -+#define _GNU_SOURCE -+#include - #include - #include "modpost.h" - -@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) - if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { - info->modinfo = (void *)hdr + sechdrs[i].sh_offset; - info->modinfo_len = sechdrs[i].sh_size; -- } -+ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) -+ info->markers_strings_sec = i; -+ - if (sechdrs[i].sh_type != SHT_SYMTAB) - continue; - -@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) - return strcmp(myname, "vmlinux") == 0; - } - -+static void get_markers(struct elf_info *info, struct module *mod) -+{ -+ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; -+ const char *strings = (const char *) info->hdr + sh->sh_offset; -+ const Elf_Sym *sym, *first_sym, *last_sym; -+ size_t n; -+ -+ -+ if (!info->markers_strings_sec) -+ return; -+ -+ /* -+ * First count the strings. We look for all the symbols defined -+ * in the __markers_strings section named __mstrtab_*. For -+ * these local names, the compiler puts a random .NNN suffix on, -+ * so the names don't correspond exactly. -+ */ -+ first_sym = last_sym = NULL; -+ n = 0; -+ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ if (first_sym == NULL) -+ first_sym = sym; -+ last_sym = sym; -+ ++n; -+ } -+ -+ if (n == 0) -+ return; -+ /* -+ * Now collect each name and format into a line for the output. -+ * Lines look like: -+ * marker_name vmlinux marker %s format %d -+ * The format string after the second \t can use whitespace. -+ */ -+ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); -+ mod->nmarkers = n; -+ -+ n = 0; -+ for (sym = first_sym; sym <= last_sym; sym++) -+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && -+ sym->st_shndx == info->markers_strings_sec && -+ !strncmp(info->strtab + sym->st_name, -+ "__mstrtab_", sizeof "__mstrtab_" - 1)) { -+ const char *name = strings + sym->st_value; -+ const char *fmt = strchr(name, '\0') + 1; -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ mod->markers[n++] = line; -+ } -+} -+ -+ - void - read_symbols(char *modname) - { -@@ -426,6 +486,7 @@ read_symbols(char *modname) - } - maybe_frob_version(modname, info.modinfo, info.modinfo_len, - (void *)info.modinfo - (void *)info.hdr); -+ get_markers(&info, mod); - parse_elf_finish(&info); - - /* Our trick to get versioning for struct_module - it's -@@ -682,6 +743,92 @@ write_dump(const char *fname) - write_if_changed(&buf, fname); - } - -+static void add_marker(struct module *mod, const char *name, const char *fmt) -+{ -+ char *line = NULL; -+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); -+ NOFAIL(line); -+ -+ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * -+ sizeof mod->markers[0]))); -+ mod->markers[mod->nmarkers++] = line; -+} -+ -+static void read_markers(const char *fname) -+{ -+ unsigned long size, pos = 0; -+ void *file = grab_file(fname, &size); -+ char *line; -+ -+ if (!file) /* No old markers, silently ignore */ -+ return; -+ -+ while ((line = get_next_line(&pos, file, size))) { -+ char *marker, *modname, *fmt; -+ struct module *mod; -+ -+ marker = line; -+ modname = strchr(marker, '\t'); -+ if (!modname) -+ goto fail; -+ *modname++ = '\0'; -+ fmt = strchr(modname, '\t'); -+ if (!fmt) -+ goto fail; -+ *fmt++ = '\0'; -+ if (*marker == '\0' || *modname == '\0') -+ goto fail; -+ -+ mod = find_module(modname); -+ if (!mod) { -+ if (is_vmlinux(modname)) -+ have_vmlinux = 1; -+ mod = new_module(NOFAIL(strdup(modname))); -+ mod->skip = 1; -+ } -+ -+ add_marker(mod, marker, fmt); -+ } -+ return; -+fail: -+ fatal("parse error in markers list file\n"); -+} -+ -+static int compare_strings(const void *a, const void *b) -+{ -+ return strcmp(*(const char **) a, *(const char **) b); -+} -+ -+static void write_markers(const char *fname) -+{ -+ struct buffer buf = { }; -+ struct module *mod; -+ size_t i; -+ -+ for (mod = modules; mod; mod = mod->next) -+ if (mod->markers != NULL) { -+ /* -+ * Sort the strings so we can skip duplicates when -+ * we write them out. -+ */ -+ qsort(mod->markers, mod->nmarkers, -+ sizeof mod->markers[0], &compare_strings); -+ for (i = 0; i < mod->nmarkers; ++i) { -+ char *line = mod->markers[i]; -+ buf_write(&buf, line, strlen(line)); -+ while (i + 1 < mod->nmarkers && -+ !strcmp(mod->markers[i], -+ mod->markers[i + 1])) -+ free(mod->markers[i++]); -+ free(mod->markers[i]); -+ } -+ free(mod->markers); -+ mod->markers = NULL; -+ } -+ -+ write_if_changed(&buf, fname); -+} -+ - int - main(int argc, char **argv) - { -@@ -690,8 +837,10 @@ main(int argc, char **argv) - char fname[SZ]; - char *dump_read = NULL, *dump_write = NULL; - int opt; -+ char *markers_read = NULL; -+ char *markers_write = NULL; - -- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { -+ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { - switch(opt) { - case 'i': - dump_read = optarg; -@@ -702,6 +851,12 @@ main(int argc, char **argv) - case 'o': - dump_write = optarg; - break; -+ case 'M': -+ markers_write = optarg; -+ break; -+ case 'K': -+ markers_read = optarg; -+ break; - default: - exit(1); - } -@@ -732,6 +887,12 @@ main(int argc, char **argv) - if (dump_write) - write_dump(dump_write); - -+ if (markers_read) -+ read_markers(markers_read); -+ -+ if (markers_write) -+ write_markers(markers_write); -+ - return 0; - } - -diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h -index 4871343..d79d7ea 100644 ---- a/scripts/mod/modpost.h -+++ b/scripts/mod/modpost.h -@@ -18,6 +18,7 @@ - #define Elf_Sym Elf32_Sym - #define ELF_ST_BIND ELF32_ST_BIND - #define ELF_ST_TYPE ELF32_ST_TYPE -+#define Elf_Section Elf32_Half - - #else - -@@ -26,7 +27,7 @@ - #define Elf_Sym Elf64_Sym - #define ELF_ST_BIND ELF64_ST_BIND - #define ELF_ST_TYPE ELF64_ST_TYPE -- -+#define Elf_Section Elf64_Half - #endif - - #if KERNEL_ELFDATA != HOST_ELFDATA -@@ -77,6 +78,8 @@ struct module { - int has_init; - int has_cleanup; - struct buffer dev_table_buf; -+ char **markers; -+ size_t nmarkers; - }; - - struct elf_info { -@@ -85,6 +88,7 @@ struct elf_info { - Elf_Shdr *sechdrs; - Elf_Sym *symtab_start; - Elf_Sym *symtab_stop; -+ Elf_Section markers_strings_sec; - const char *strtab; - char *modinfo; - unsigned int modinfo_len; From svn-commits at oss.oracle.com Thu Jul 3 20:01:41 2008 From: svn-commits at oss.oracle.com (svn-commits at oss.oracle.com) Date: Thu, 03 Jul 2008 20:01:41 -0700 Subject: [Kernelpatches-commits] wjhuang commits r8 - trunk/wjhuang/marker_patches Message-ID: Author: wjhuang Date: 2008-07-03 20:01:41 -0700 (Thu, 03 Jul 2008) New Revision: 8 Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch Log: upload marker pathes Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch 2008-07-04 03:01:41 UTC (rev 8) @@ -0,0 +1,1882 @@ +diff --git a/Makefile b/Makefile +index 7682056..d1b7d78 100644 +--- a/Makefile ++++ b/Makefile +@@ -601,7 +601,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -725,11 +725,30 @@ debug_kallsyms: .tmp_map$(last_kallsyms) + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + $(Q)rm -f .old_version + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig +index 7dec8f0..73f833c 100644 +--- a/arch/i386/Kconfig ++++ b/arch/i386/Kconfig +@@ -1205,6 +1205,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/i386/Kconfig.debug" +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index a44d95d..23d73bb 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -61,6 +61,7 @@ SECTIONS + /* writeable */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } :data + +diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig +index c45b7b5..aee46db 100644 +--- a/arch/ia64/Kconfig ++++ b/arch/ia64/Kconfig +@@ -587,6 +587,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/ia64/Kconfig.debug" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index 5b0d5f6..9c63f83 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -206,7 +206,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 41024aa..4ab8d59 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -1095,6 +1095,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/powerpc/Kconfig.debug" +diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S +index c02298a..709bc47 100644 +--- a/arch/powerpc/kernel/vmlinux.lds.S ++++ b/arch/powerpc/kernel/vmlinux.lds.S +@@ -176,11 +176,13 @@ SECTIONS + *(.data) + *(.sdata) + *(.got.plt) *(.got) ++ MARKER + } + #else + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig +index 974474a..c81600c 100644 +--- a/arch/s390/Kconfig ++++ b/arch/s390/Kconfig +@@ -505,6 +505,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/s390/Kconfig.debug" +diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S +index df0c16a..69826b1 100644 +--- a/arch/s390/kernel/vmlinux.lds.S ++++ b/arch/s390/kernel/vmlinux.lds.S +@@ -46,6 +46,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig +index b627f8d..e0467d1 100644 +--- a/arch/sparc64/Kconfig ++++ b/arch/sparc64/Kconfig +@@ -427,6 +427,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/sparc64/Kconfig.debug" +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index b097379..1f10e43 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig +index d284a9a..e556e06 100644 +--- a/arch/x86_64/Kconfig ++++ b/arch/x86_64/Kconfig +@@ -704,6 +704,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/x86_64/Kconfig.debug" +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index f17185f..a33251a 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -67,6 +67,7 @@ SECTIONS + /* Data */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { + *(.data) ++ MARKER + CONSTRUCTORS + } :data + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 6a40707..9b0fab5 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -125,7 +125,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -138,8 +138,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -903,12 +903,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -923,7 +924,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1723,7 +1724,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1756,13 +1757,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1781,7 +1783,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2179,15 +2182,16 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 6ad5ad6..bf93e7e 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -92,7 +92,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -148,7 +149,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -161,12 +162,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h +index 5102c6b..aed4049 100644 +--- a/drivers/scsi/qla4xxx/ql4_fw.h ++++ b/drivers/scsi/qla4xxx/ql4_fw.h +@@ -744,7 +744,7 @@ struct continuation_t1_entry { + #define ET_CONTINUE ET_CONT_T1 + + /* Marker entry structure*/ +-struct marker_entry { ++struct qla4_marker_entry { + struct qla4_header hdr; /* 00-03 */ + + uint32_t system_defined; /* 04-07 */ +diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c +index 4e532b4..43860eb 100644 +--- a/drivers/scsi/qla4xxx/ql4_iocb.c ++++ b/drivers/scsi/qla4xxx/ql4_iocb.c +@@ -65,7 +65,8 @@ int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, + int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha, + struct ddb_entry *ddb_entry, int lun) + { +- struct marker_entry *marker_entry; ++ struct qla4_marker_entry *marker_entry; ++ + unsigned long flags = 0; + uint8_t status = QLA_SUCCESS; + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index b9e964c..a175f1e 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -9,12 +9,21 @@ + /* Align . to a 8 byte boundary equals to maximum function alignment. */ + #define ALIGN_FUNCTION() . = ALIGN(8) + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ ++ + #define RODATA \ + . = ALIGN(4096); \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_rodata) = .; \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index c458418..627afb9 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -348,6 +349,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -467,6 +472,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + + struct device_driver; + void module_add_driver(struct module *, struct device_driver *); +@@ -569,6 +575,10 @@ static inline void print_modules(void) + { + } + ++static inline void module_update_markers(void) ++{ ++} ++ + struct device_driver; + struct module; + +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index b4ca73d..5f12d1b 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +diff --git a/kernel/Makefile b/kernel/Makefile +index ed4af9c..b4ad7a7 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -50,6 +50,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o + obj-$(CONFIG_RELAY) += relay.o + obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o + obj-$(CONFIG_TASKSTATS) += taskstats.o ++obj-$(CONFIG_MARKERS) += marker.o + obj-$(CONFIG_UTRACE) += utrace.o + + ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index f9a5987..ac99222 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1488,6 +1488,8 @@ static struct module *load_module(void __user *umod, + unsigned int unusedcrcindex; + unsigned int unusedgplindex; + unsigned int unusedgplcrcindex; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + struct module *mod; + long err = 0; + void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +@@ -1756,6 +1758,9 @@ static struct module *load_module(void __user *umod, + add_taint(TAINT_FORCED_MODULE); + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1778,6 +1783,11 @@ static struct module *load_module(void __user *umod, + if (err < 0) + goto cleanup; + } ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif + + /* Find duplicate symbols */ + err = verify_export_symbols(mod); +@@ -1796,6 +1806,11 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2259,3 +2274,18 @@ EXPORT_SYMBOL(module_remove_driver); + void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif ++ ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ mutex_lock(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ mutex_unlock(&module_mutex); ++} ++#endif ++ +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index 0a64688..886b70c 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -41,6 +42,10 @@ include scripts/Makefile.lib + + kernelsymfile := $(objtree)/Module.symvers + modulesymfile := $(KBUILD_EXTMOD)/Module.symvers ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) + + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) +@@ -58,16 +63,27 @@ quiet_cmd_modpost = MODPOST + $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ + $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ + $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + PHONY += __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index dfde0e8..81e5910 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + #include "../../include/linux/license.h" +@@ -385,6 +386,8 @@ static void parse_elf(struct elf_info *info, const char *filename) + info->export_unused_gpl_sec = i; + else if (strcmp(secname, "__ksymtab_gpl_future") == 0) + info->export_gpl_future_sec = i; ++ else if (strcmp(secname, "__markers_strings") == 0) ++ info->markers_strings_sec = i; + + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; +@@ -859,6 +862,62 @@ static void check_sec_ref(struct module *mod, const char *modname, + } + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ + /** + * Functions used only during module init is marked __init and is stored in + * a .init.text section. Likewise data is marked __initdata and stored in +@@ -1061,6 +1120,8 @@ static void read_symbols(char *modname) + get_src_version(modname, mod->srcversion, + sizeof(mod->srcversion)-1); + ++ get_markers(&info, mod); ++ + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -1394,6 +1455,93 @@ static void write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if ((!external_module || !mod->skip) && mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ ++ + int main(int argc, char **argv) + { + struct module *mod; +@@ -1402,8 +1550,10 @@ int main(int argc, char **argv) + char *kernel_read = NULL, *module_read = NULL; + char *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:I:mo:a")) != -1) { ++ while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) { + switch(opt) { + case 'i': + kernel_read = optarg; +@@ -1421,6 +1571,12 @@ int main(int argc, char **argv) + case 'a': + all_versions = 1; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -1460,5 +1616,11 @@ int main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index d398c61..27b05e6 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -107,6 +107,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + char srcversion[25]; + }; + +@@ -121,6 +123,7 @@ struct elf_info { + Elf_Section export_gpl_sec; + Elf_Section export_unused_gpl_sec; + Elf_Section export_gpl_future_sec; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch 2008-07-04 03:01:41 UTC (rev 8) @@ -0,0 +1,1882 @@ +diff --git a/Makefile b/Makefile +index 93bffdf..59693ac 100644 +--- a/Makefile ++++ b/Makefile +@@ -603,7 +603,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -727,11 +727,30 @@ debug_kallsyms: .tmp_map$(last_kallsyms) + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + $(Q)rm -f .old_version + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig +index d04fcc6..ad73af5 100644 +--- a/arch/i386/Kconfig ++++ b/arch/i386/Kconfig +@@ -1212,6 +1212,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/i386/Kconfig.debug" +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index 7f4ca6b..3978095 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -67,6 +67,7 @@ SECTIONS + . = ALIGN(4096); + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } :data + +diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig +index f916fba..b81a36b 100644 +--- a/arch/ia64/Kconfig ++++ b/arch/ia64/Kconfig +@@ -591,6 +591,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/ia64/Kconfig.debug" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index 69cba94..2c0a5c9 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -207,7 +207,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 7295252..fb8a2b9 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -1091,6 +1091,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/powerpc/Kconfig.debug" +diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S +index c02298a..709bc47 100644 +--- a/arch/powerpc/kernel/vmlinux.lds.S ++++ b/arch/powerpc/kernel/vmlinux.lds.S +@@ -176,11 +176,13 @@ SECTIONS + *(.data) + *(.sdata) + *(.got.plt) *(.got) ++ MARKER + } + #else + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig +index d121403..d8d276a 100644 +--- a/arch/s390/Kconfig ++++ b/arch/s390/Kconfig +@@ -503,6 +503,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/s390/Kconfig.debug" +diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S +index af9e69a..8443156 100644 +--- a/arch/s390/kernel/vmlinux.lds.S ++++ b/arch/s390/kernel/vmlinux.lds.S +@@ -46,6 +46,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig +index b627f8d..e0467d1 100644 +--- a/arch/sparc64/Kconfig ++++ b/arch/sparc64/Kconfig +@@ -427,6 +427,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/sparc64/Kconfig.debug" +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index b097379..1f10e43 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig +index 651d6cb..0e6570c 100644 +--- a/arch/x86_64/Kconfig ++++ b/arch/x86_64/Kconfig +@@ -711,6 +711,14 @@ config KPROBES + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". ++ ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + endmenu + + source "arch/x86_64/Kconfig.debug" +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index 57e3255..867fce1 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -68,6 +68,7 @@ SECTIONS + /* Data */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { + *(.data) ++ MARKER + CONSTRUCTORS + } :data + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 3fb354d..e884942 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -126,7 +126,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -139,8 +139,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -912,12 +912,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -932,7 +933,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1732,7 +1733,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1765,13 +1766,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1790,7 +1792,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2187,15 +2190,16 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 6ad5ad6..bf93e7e 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -92,7 +92,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -148,7 +149,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -161,12 +162,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h +index 8c6f9b6..efc3d79 100644 +--- a/drivers/scsi/qla4xxx/ql4_fw.h ++++ b/drivers/scsi/qla4xxx/ql4_fw.h +@@ -671,7 +671,7 @@ struct continuation_t1_entry { + #define ET_CONTINUE ET_CONT_T1 + + /* Marker entry structure*/ +-struct marker_entry { ++struct qla4_marker_entry { + struct qla4_header hdr; /* 00-03 */ + + uint32_t system_defined; /* 04-07 */ +diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c +index d45e4d0..b85a484 100644 +--- a/drivers/scsi/qla4xxx/ql4_iocb.c ++++ b/drivers/scsi/qla4xxx/ql4_iocb.c +@@ -70,7 +70,8 @@ int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, + int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha, + struct ddb_entry *ddb_entry, int lun) + { +- struct marker_entry *marker_entry; ++ struct qla4_marker_entry *marker_entry; ++ + unsigned long flags = 0; + uint8_t status = QLA_SUCCESS; + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index b9e964c..a175f1e 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -9,12 +9,21 @@ + /* Align . to a 8 byte boundary equals to maximum function alignment. */ + #define ALIGN_FUNCTION() . = ALIGN(8) + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ ++ + #define RODATA \ + . = ALIGN(4096); \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_rodata) = .; \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 0460634..de5b51e 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -353,6 +354,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -472,6 +477,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + + struct device_driver; + void module_add_driver(struct module *, struct device_driver *); +@@ -574,6 +580,10 @@ static inline void print_modules(void) + { + } + ++static inline void module_update_markers(void) ++{ ++} ++ + struct device_driver; + struct module; + +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index b4ca73d..5f12d1b 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +diff --git a/kernel/Makefile b/kernel/Makefile +index ef832fa..5879e8b 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -50,6 +50,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o + obj-$(CONFIG_RELAY) += relay.o + obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o + obj-$(CONFIG_TASKSTATS) += taskstats.o ++obj-$(CONFIG_MARKERS) += marker.o + obj-$(CONFIG_UTRACE) += utrace.o + obj-$(CONFIG_PTRACE) += ptrace.o + +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index f9a5987..ac99222 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1488,6 +1488,8 @@ static struct module *load_module(void __user *umod, + unsigned int unusedcrcindex; + unsigned int unusedgplindex; + unsigned int unusedgplcrcindex; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + struct module *mod; + long err = 0; + void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +@@ -1756,6 +1758,9 @@ static struct module *load_module(void __user *umod, + add_taint(TAINT_FORCED_MODULE); + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1778,6 +1783,11 @@ static struct module *load_module(void __user *umod, + if (err < 0) + goto cleanup; + } ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif + + /* Find duplicate symbols */ + err = verify_export_symbols(mod); +@@ -1796,6 +1806,11 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2259,3 +2274,18 @@ EXPORT_SYMBOL(module_remove_driver); + void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif ++ ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ mutex_lock(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ mutex_unlock(&module_mutex); ++} ++#endif ++ +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index 0a64688..886b70c 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -41,6 +42,10 @@ include scripts/Makefile.lib + + kernelsymfile := $(objtree)/Module.symvers + modulesymfile := $(KBUILD_EXTMOD)/Module.symvers ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) + + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) +@@ -58,16 +63,27 @@ quiet_cmd_modpost = MODPOST + $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ + $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ + $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + PHONY += __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 0ec3321..383f310 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + #include "../../include/linux/license.h" +@@ -385,6 +386,8 @@ static void parse_elf(struct elf_info *info, const char *filename) + info->export_unused_gpl_sec = i; + else if (strcmp(secname, "__ksymtab_gpl_future") == 0) + info->export_gpl_future_sec = i; ++ else if (strcmp(secname, "__markers_strings") == 0) ++ info->markers_strings_sec = i; + + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; +@@ -900,6 +903,62 @@ static void check_sec_ref(struct module *mod, const char *modname, + } + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ + /** + * Functions used only during module init is marked __init and is stored in + * a .init.text section. Likewise data is marked __initdata and stored in +@@ -1103,6 +1162,8 @@ static void read_symbols(char *modname) + get_src_version(modname, mod->srcversion, + sizeof(mod->srcversion)-1); + ++ get_markers(&info, mod); ++ + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -1436,6 +1497,93 @@ static void write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if ((!external_module || !mod->skip) && mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ ++ + int main(int argc, char **argv) + { + struct module *mod; +@@ -1444,8 +1592,10 @@ int main(int argc, char **argv) + char *kernel_read = NULL, *module_read = NULL; + char *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:I:mo:a")) != -1) { ++ while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) { + switch(opt) { + case 'i': + kernel_read = optarg; +@@ -1463,6 +1613,12 @@ int main(int argc, char **argv) + case 'a': + all_versions = 1; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -1502,5 +1658,11 @@ int main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index d398c61..27b05e6 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -107,6 +107,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + char srcversion[25]; + }; + +@@ -121,6 +123,7 @@ struct elf_info { + Elf_Section export_gpl_sec; + Elf_Section export_unused_gpl_sec; + Elf_Section export_gpl_future_sec; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch 2008-07-04 03:01:41 UTC (rev 8) @@ -0,0 +1,1917 @@ +diff --git a/Makefile b/Makefile +index ce04b1f..f059302 100644 +--- a/Makefile ++++ b/Makefile +@@ -581,7 +581,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -686,10 +686,30 @@ $(KALLSYMS): scripts ; + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug +index cf069b7..d39d5c5 100644 +--- a/arch/i386/Kconfig.debug ++++ b/arch/i386/Kconfig.debug +@@ -29,6 +29,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index d6cc9c0..7bbda2e 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -52,6 +52,7 @@ SECTIONS + /* writeable */ + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug +index 2782b48..17d4a44 100644 +--- a/arch/ia64/Kconfig.debug ++++ b/arch/ia64/Kconfig.debug +@@ -12,6 +12,12 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. + + choice + prompt "Physical memory granularity" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index a676e79..c6ea47e 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -193,7 +193,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug +index 860472a..5c3a9f5 100644 +--- a/arch/ppc64/Kconfig.debug ++++ b/arch/ppc64/Kconfig.debug +@@ -16,6 +16,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S +index 4103cc1..76d0bf0 100644 +--- a/arch/ppc64/kernel/vmlinux.lds.S ++++ b/arch/ppc64/kernel/vmlinux.lds.S +@@ -118,6 +118,7 @@ SECTIONS + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug +index cd8d39f..f39bf8d 100644 +--- a/arch/sparc64/Kconfig.debug ++++ b/arch/sparc64/Kconfig.debug +@@ -21,6 +21,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_DCFLUSH + bool "D-cache flush debugging" + depends on DEBUG_KERNEL +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index a710d38..d4b3b62 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug +index cb33186..d0260cb 100644 +--- a/arch/x86_64/Kconfig.debug ++++ b/arch/x86_64/Kconfig.debug +@@ -55,6 +55,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on DEBUG_KERNEL +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index f656f19..12ce116 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -33,6 +33,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 54fb0a0..cbdcd93 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -158,7 +158,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -171,8 +171,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -945,12 +945,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -965,7 +966,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1762,7 +1763,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1795,13 +1796,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1820,7 +1822,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2208,15 +2211,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 4119f0f..7e770ff 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -105,7 +105,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -161,7 +162,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -174,12 +175,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index bb340cb..fd6e57f 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -6,10 +6,18 @@ + #define VMLINUX_SYMBOL(_sym_) _sym_ + #endif + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ + #define RODATA \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 8da8948..2ad5efd 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -328,6 +329,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + #else /* !CONFIG_MODULES... */ + #define EXPORT_SYMBOL(sym) + #define EXPORT_SYMBOL_GPL(sym) +@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) + static inline void print_modules(void) + { + } ++ ++static inline void module_update_markers(void) ++{ ++} + #endif /* CONFIG_MODULES */ + + #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 91057d6..bcd0acb 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +@@ -102,6 +115,7 @@ struct rcu_data { + struct rcu_head *donelist; + struct rcu_head **donetail; + int cpu; ++ struct rcu_head barrier; + }; + + DECLARE_PER_CPU(struct rcu_data, rcu_data); +@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, + extern void FASTCALL(call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head))); + extern void synchronize_kernel(void); +- ++extern void rcu_barrier(void); + #endif /* __KERNEL__ */ + #endif /* __LINUX_RCUPDATE_H */ +diff --git a/kernel/Makefile b/kernel/Makefile +index 0b8c8ca..f8248bc 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o + obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_MARKERS) += marker.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index 18b39bc..096c3dc 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1504,6 +1504,8 @@ static struct module *load_module(void __user *umod, + void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + struct exception_table_entry *extable; + int gpgsig_ok; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); +@@ -1734,6 +1736,9 @@ static struct module *load_module(void __user *umod, + tainted |= TAINT_FORCED_MODULE; + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1757,6 +1762,12 @@ static struct module *load_module(void __user *umod, + goto cleanup; + } + ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif ++ + /* Set up and sort exception table */ + mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); + mod->extable = extable = (void *)sechdrs[exindex].sh_addr; +@@ -1768,6 +1779,12 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif ++ + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2175,6 +2192,20 @@ void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif + ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ down(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ up(&module_mutex); ++} ++#endif ++ + static int __init modules_init(void) + { + return subsystem_register(&module_subsys); +diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c +index 1b16bfc..20ade31 100644 +--- a/kernel/rcupdate.c ++++ b/kernel/rcupdate.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + /* Definition for rcupdate control block. */ + struct rcu_ctrlblk rcu_ctrlblk = +@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, + local_irq_restore(flags); + } + ++static atomic_t rcu_barrier_cpu_count; ++static DEFINE_MUTEX(rcu_barrier_mutex); ++static struct completion rcu_barrier_completion; ++ + /** + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. +@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, + return NOTIFY_OK; + } + ++static void rcu_barrier_callback(struct rcu_head *notused) ++{ ++ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) ++ complete(&rcu_barrier_completion); ++} ++ ++/* ++ * Called with preemption disabled, and from cross-cpu IRQ context. ++ */ ++static void rcu_barrier_func(void *notused) ++{ ++ int cpu = smp_processor_id(); ++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); ++ struct rcu_head *head; ++ ++ head = &rdp->barrier; ++ atomic_inc(&rcu_barrier_cpu_count); ++ call_rcu(head, rcu_barrier_callback); ++} ++ ++/** ++ * rcu_barrier - Wait until all the in-flight RCUs are complete. ++ */ ++void rcu_barrier(void) ++{ ++ BUG_ON(in_interrupt()); ++ /* Take cpucontrol mutex to protect against CPU hotplug */ ++ mutex_lock(&rcu_barrier_mutex); ++ init_completion(&rcu_barrier_completion); ++ atomic_set(&rcu_barrier_cpu_count, 0); ++ on_each_cpu(rcu_barrier_func, NULL, 0, 1); ++ wait_for_completion(&rcu_barrier_completion); ++ mutex_unlock(&rcu_barrier_mutex); ++} ++EXPORT_SYMBOL_GPL(rcu_barrier); ++ ++ ++ + static struct notifier_block __devinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, + }; +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index b3d31b5..b100a32 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -40,6 +41,11 @@ include scripts/Makefile.lib + + symverfile := $(objtree)/Module.symvers + ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) ++ + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) + modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) +@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST + cmd_modpost = scripts/mod/modpost \ + $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + .PHONY: __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 2a174e5..c25948c 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + +@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) + if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { + info->modinfo = (void *)hdr + sechdrs[i].sh_offset; + info->modinfo_len = sechdrs[i].sh_size; +- } ++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) ++ info->markers_strings_sec = i; ++ + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + +@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) + return strcmp(myname, "vmlinux") == 0; + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ ++ + void + read_symbols(char *modname) + { +@@ -426,6 +486,7 @@ read_symbols(char *modname) + } + maybe_frob_version(modname, info.modinfo, info.modinfo_len, + (void *)info.modinfo - (void *)info.hdr); ++ get_markers(&info, mod); + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -682,6 +743,92 @@ write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if (mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ + int + main(int argc, char **argv) + { +@@ -690,8 +837,10 @@ main(int argc, char **argv) + char fname[SZ]; + char *dump_read = NULL, *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { ++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { + switch(opt) { + case 'i': + dump_read = optarg; +@@ -702,6 +851,12 @@ main(int argc, char **argv) + case 'o': + dump_write = optarg; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -732,6 +887,12 @@ main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } + +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index 4871343..d79d7ea 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -18,6 +18,7 @@ + #define Elf_Sym Elf32_Sym + #define ELF_ST_BIND ELF32_ST_BIND + #define ELF_ST_TYPE ELF32_ST_TYPE ++#define Elf_Section Elf32_Half + + #else + +@@ -26,7 +27,7 @@ + #define Elf_Sym Elf64_Sym + #define ELF_ST_BIND ELF64_ST_BIND + #define ELF_ST_TYPE ELF64_ST_TYPE +- ++#define Elf_Section Elf64_Half + #endif + + #if KERNEL_ELFDATA != HOST_ELFDATA +@@ -77,6 +78,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + }; + + struct elf_info { +@@ -85,6 +88,7 @@ struct elf_info { + Elf_Shdr *sechdrs; + Elf_Sym *symtab_start; + Elf_Sym *symtab_stop; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch 2008-07-04 03:01:41 UTC (rev 8) @@ -0,0 +1,1917 @@ +diff --git a/Makefile b/Makefile +index 5bfc101..d495f16 100644 +--- a/Makefile ++++ b/Makefile +@@ -583,7 +583,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -688,10 +688,30 @@ $(KALLSYMS): scripts ; + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug +index cf069b7..d39d5c5 100644 +--- a/arch/i386/Kconfig.debug ++++ b/arch/i386/Kconfig.debug +@@ -29,6 +29,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index e8db99c..b846b21 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -53,6 +53,7 @@ SECTIONS + /* writeable */ + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug +index 2782b48..17d4a44 100644 +--- a/arch/ia64/Kconfig.debug ++++ b/arch/ia64/Kconfig.debug +@@ -12,6 +12,12 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. + + choice + prompt "Physical memory granularity" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index a676e79..c6ea47e 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -193,7 +193,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug +index 860472a..5c3a9f5 100644 +--- a/arch/ppc64/Kconfig.debug ++++ b/arch/ppc64/Kconfig.debug +@@ -16,6 +16,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S +index 4103cc1..76d0bf0 100644 +--- a/arch/ppc64/kernel/vmlinux.lds.S ++++ b/arch/ppc64/kernel/vmlinux.lds.S +@@ -118,6 +118,7 @@ SECTIONS + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug +index cd8d39f..f39bf8d 100644 +--- a/arch/sparc64/Kconfig.debug ++++ b/arch/sparc64/Kconfig.debug +@@ -21,6 +21,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_DCFLUSH + bool "D-cache flush debugging" + depends on DEBUG_KERNEL +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index a710d38..d4b3b62 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug +index cb33186..d0260cb 100644 +--- a/arch/x86_64/Kconfig.debug ++++ b/arch/x86_64/Kconfig.debug +@@ -55,6 +55,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on DEBUG_KERNEL +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index 053c826..3b3928f 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -33,6 +33,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 181f3d2..c7c5dc6 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -158,7 +158,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -171,8 +171,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -945,12 +945,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -965,7 +966,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1762,7 +1763,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1795,13 +1796,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1820,7 +1822,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2208,15 +2211,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 4c60b17..2bb477e 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -105,7 +105,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -161,7 +162,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -174,12 +175,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index bb340cb..fd6e57f 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -6,10 +6,18 @@ + #define VMLINUX_SYMBOL(_sym_) _sym_ + #endif + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ + #define RODATA \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 8da8948..2ad5efd 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -328,6 +329,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + #else /* !CONFIG_MODULES... */ + #define EXPORT_SYMBOL(sym) + #define EXPORT_SYMBOL_GPL(sym) +@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) + static inline void print_modules(void) + { + } ++ ++static inline void module_update_markers(void) ++{ ++} + #endif /* CONFIG_MODULES */ + + #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 91057d6..bcd0acb 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +@@ -102,6 +115,7 @@ struct rcu_data { + struct rcu_head *donelist; + struct rcu_head **donetail; + int cpu; ++ struct rcu_head barrier; + }; + + DECLARE_PER_CPU(struct rcu_data, rcu_data); +@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, + extern void FASTCALL(call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head))); + extern void synchronize_kernel(void); +- ++extern void rcu_barrier(void); + #endif /* __KERNEL__ */ + #endif /* __LINUX_RCUPDATE_H */ +diff --git a/kernel/Makefile b/kernel/Makefile +index 0b8c8ca..f8248bc 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o + obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_MARKERS) += marker.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index 7f0ccd8..1cd4c54 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1505,6 +1505,8 @@ static struct module *load_module(void __user *umod, + struct exception_table_entry *extable; + mm_segment_t old_fs; + int gpgsig_ok; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); +@@ -1735,6 +1737,9 @@ static struct module *load_module(void __user *umod, + tainted |= TAINT_FORCED_MODULE; + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1758,6 +1763,12 @@ static struct module *load_module(void __user *umod, + goto cleanup; + } + ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif ++ + /* Set up and sort exception table */ + mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); + mod->extable = extable = (void *)sechdrs[exindex].sh_addr; +@@ -1769,6 +1780,12 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif ++ + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2186,6 +2203,20 @@ void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif + ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ down(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ up(&module_mutex); ++} ++#endif ++ + static int __init modules_init(void) + { + return subsystem_register(&module_subsys); +diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c +index 1b16bfc..20ade31 100644 +--- a/kernel/rcupdate.c ++++ b/kernel/rcupdate.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + /* Definition for rcupdate control block. */ + struct rcu_ctrlblk rcu_ctrlblk = +@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, + local_irq_restore(flags); + } + ++static atomic_t rcu_barrier_cpu_count; ++static DEFINE_MUTEX(rcu_barrier_mutex); ++static struct completion rcu_barrier_completion; ++ + /** + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. +@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, + return NOTIFY_OK; + } + ++static void rcu_barrier_callback(struct rcu_head *notused) ++{ ++ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) ++ complete(&rcu_barrier_completion); ++} ++ ++/* ++ * Called with preemption disabled, and from cross-cpu IRQ context. ++ */ ++static void rcu_barrier_func(void *notused) ++{ ++ int cpu = smp_processor_id(); ++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); ++ struct rcu_head *head; ++ ++ head = &rdp->barrier; ++ atomic_inc(&rcu_barrier_cpu_count); ++ call_rcu(head, rcu_barrier_callback); ++} ++ ++/** ++ * rcu_barrier - Wait until all the in-flight RCUs are complete. ++ */ ++void rcu_barrier(void) ++{ ++ BUG_ON(in_interrupt()); ++ /* Take cpucontrol mutex to protect against CPU hotplug */ ++ mutex_lock(&rcu_barrier_mutex); ++ init_completion(&rcu_barrier_completion); ++ atomic_set(&rcu_barrier_cpu_count, 0); ++ on_each_cpu(rcu_barrier_func, NULL, 0, 1); ++ wait_for_completion(&rcu_barrier_completion); ++ mutex_unlock(&rcu_barrier_mutex); ++} ++EXPORT_SYMBOL_GPL(rcu_barrier); ++ ++ ++ + static struct notifier_block __devinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, + }; +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index b3d31b5..b100a32 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -40,6 +41,11 @@ include scripts/Makefile.lib + + symverfile := $(objtree)/Module.symvers + ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) ++ + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) + modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) +@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST + cmd_modpost = scripts/mod/modpost \ + $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + .PHONY: __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 2a174e5..c25948c 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + +@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) + if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { + info->modinfo = (void *)hdr + sechdrs[i].sh_offset; + info->modinfo_len = sechdrs[i].sh_size; +- } ++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) ++ info->markers_strings_sec = i; ++ + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + +@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) + return strcmp(myname, "vmlinux") == 0; + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ ++ + void + read_symbols(char *modname) + { +@@ -426,6 +486,7 @@ read_symbols(char *modname) + } + maybe_frob_version(modname, info.modinfo, info.modinfo_len, + (void *)info.modinfo - (void *)info.hdr); ++ get_markers(&info, mod); + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -682,6 +743,92 @@ write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if (mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ + int + main(int argc, char **argv) + { +@@ -690,8 +837,10 @@ main(int argc, char **argv) + char fname[SZ]; + char *dump_read = NULL, *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { ++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { + switch(opt) { + case 'i': + dump_read = optarg; +@@ -702,6 +851,12 @@ main(int argc, char **argv) + case 'o': + dump_write = optarg; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -732,6 +887,12 @@ main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } + +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index 4871343..d79d7ea 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -18,6 +18,7 @@ + #define Elf_Sym Elf32_Sym + #define ELF_ST_BIND ELF32_ST_BIND + #define ELF_ST_TYPE ELF32_ST_TYPE ++#define Elf_Section Elf32_Half + + #else + +@@ -26,7 +27,7 @@ + #define Elf_Sym Elf64_Sym + #define ELF_ST_BIND ELF64_ST_BIND + #define ELF_ST_TYPE ELF64_ST_TYPE +- ++#define Elf_Section Elf64_Half + #endif + + #if KERNEL_ELFDATA != HOST_ELFDATA +@@ -77,6 +78,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + }; + + struct elf_info { +@@ -85,6 +88,7 @@ struct elf_info { + Elf_Shdr *sechdrs; + Elf_Sym *symtab_start; + Elf_Sym *symtab_stop; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len; Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch =================================================================== --- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch (rev 0) +++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch 2008-07-04 03:01:41 UTC (rev 8) @@ -0,0 +1,1917 @@ +diff --git a/Makefile b/Makefile +index 06b517d..afa3d2c 100644 +--- a/Makefile ++++ b/Makefile +@@ -583,7 +583,7 @@ quiet_cmd_vmlinux__ ?= LD $@ + cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ + -T $(vmlinux-lds) $(vmlinux-init) \ + --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) ++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) + + # Generate new vmlinux version + quiet_cmd_vmlinux_version = GEN .version +@@ -688,10 +688,30 @@ $(KALLSYMS): scripts ; + + endif # ifdef CONFIG_KALLSYMS + ++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has ++# relevant sections renamed as per the linker script. ++quiet_cmd_vmlinux-modpost = LD $@ ++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ ++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ ++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^) ++define rule_vmlinux-modpost ++ : ++ +$(call cmd,vmlinux-modpost) ++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ ++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd ++endef ++ ++ + # vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE ++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE ++ $(call vmlinux-modpost) + $(call if_changed_rule,vmlinux__) + ++# build vmlinux.o first to catch section mismatch errors early ++$(kallsyms.o): vmlinux.o ++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ++ $(call if_changed_rule,vmlinux-modpost) ++ + # The actual objects are generated when descending, + # make sure no implicit rule kicks in + $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug +index cf069b7..d39d5c5 100644 +--- a/arch/i386/Kconfig.debug ++++ b/arch/i386/Kconfig.debug +@@ -29,6 +29,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index e8db99c..b846b21 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -53,6 +53,7 @@ SECTIONS + /* writeable */ + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug +index 2782b48..17d4a44 100644 +--- a/arch/ia64/Kconfig.debug ++++ b/arch/ia64/Kconfig.debug +@@ -12,6 +12,12 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. + + choice + prompt "Physical memory granularity" +diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S +index a676e79..c6ea47e 100644 +--- a/arch/ia64/kernel/vmlinux.lds.S ++++ b/arch/ia64/kernel/vmlinux.lds.S +@@ -193,7 +193,7 @@ SECTIONS + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) +- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } ++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) +diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug +index 860472a..5c3a9f5 100644 +--- a/arch/ppc64/Kconfig.debug ++++ b/arch/ppc64/Kconfig.debug +@@ -16,6 +16,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL +diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S +index 4103cc1..76d0bf0 100644 +--- a/arch/ppc64/kernel/vmlinux.lds.S ++++ b/arch/ppc64/kernel/vmlinux.lds.S +@@ -118,6 +118,7 @@ SECTIONS + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) ++ MARKER + } + + .opd : { +diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug +index cd8d39f..f39bf8d 100644 +--- a/arch/sparc64/Kconfig.debug ++++ b/arch/sparc64/Kconfig.debug +@@ -21,6 +21,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config DEBUG_DCFLUSH + bool "D-cache flush debugging" + depends on DEBUG_KERNEL +diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S +index a710d38..d4b3b62 100644 +--- a/arch/sparc64/kernel/vmlinux.lds.S ++++ b/arch/sparc64/kernel/vmlinux.lds.S +@@ -27,6 +27,7 @@ SECTIONS + .data : + { + *(.data) ++ MARKER + CONSTRUCTORS + } + .data1 : { *(.data1) } +diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug +index cb33186..d0260cb 100644 +--- a/arch/x86_64/Kconfig.debug ++++ b/arch/x86_64/Kconfig.debug +@@ -55,6 +55,13 @@ config KPROBES + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + ++config MARKERS ++ bool "Activate markers" ++ default y ++ help ++ Place an empty function call at each marker site. Can be ++ dynamically changed for a probe function. ++ + config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on DEBUG_KERNEL +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S +index 053c826..3b3928f 100644 +--- a/arch/x86_64/kernel/vmlinux.lds.S ++++ b/arch/x86_64/kernel/vmlinux.lds.S +@@ -33,6 +33,7 @@ SECTIONS + + .data : { /* Data */ + *(.data) ++ MARKER + CONSTRUCTORS + } + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index 7a4d28d..991a1ef 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -159,7 +159,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + // ================= main 802.3ad protocol functions ================== + static int ad_lacpdu_send(struct port *port); +-static int ad_marker_send(struct port *port, struct marker *marker); ++static int ad_marker_send(struct port *port, struct bond_marker *marker); + static void ad_mux_machine(struct port *port); + static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); + static void ad_tx_machine(struct port *port); +@@ -172,8 +172,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast); + static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); + static void ad_enable_collecting_distributing(struct port *port); + static void ad_disable_collecting_distributing(struct port *port); +-static void ad_marker_info_received(struct marker *marker_info, struct port *port); +-static void ad_marker_response_received(struct marker *marker, struct port *port); ++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); ++static void ad_marker_response_received(struct bond_marker *marker, struct port *port); + + + ///////////////////////////////////////////////////////////////////////////////// +@@ -954,12 +954,13 @@ static int ad_lacpdu_send(struct port *port) + * Returns: 0 on success + * < 0 on error + */ +-static int ad_marker_send(struct port *port, struct marker *marker) ++static int ad_marker_send(struct port *port, struct bond_marker *marker) + { + struct slave *slave = port->slave; + struct sk_buff *skb; +- struct marker_header *marker_header; +- int length = sizeof(struct marker_header); ++ struct bond_marker_header *marker_header; ++ int length = sizeof(struct bond_marker_header); ++ + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); +@@ -974,7 +975,7 @@ static int ad_marker_send(struct port *port, struct marker *marker) + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + +- marker_header = (struct marker_header *)skb_put(skb, length); ++ marker_header = (struct bond_marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it +@@ -1771,7 +1772,7 @@ static void ad_disable_collecting_distributing(struct port *port) + */ + static void ad_marker_info_send(struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + u16 index; + + // fill the marker PDU with the appropriate values +@@ -1804,13 +1805,14 @@ static void ad_marker_info_send(struct port *port) + * @port: the port we're looking at + * + */ +-static void ad_marker_info_received(struct marker *marker_info,struct port *port) ++static void ad_marker_info_received(struct bond_marker *marker_info, ++ struct port *port) + { +- struct marker marker; ++ struct bond_marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; +- memcpy(&marker, marker_info, sizeof(struct marker)); ++ memcpy(&marker, marker_info, sizeof(struct bond_marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response +@@ -1829,7 +1831,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +-static void ad_marker_response_received(struct marker *marker, struct port *port) ++static void ad_marker_response_received(struct bond_marker *marker, ++ struct port *port) + { + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler +@@ -2217,15 +2220,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + +- switch (((struct marker *)lacpdu)->tlv_type) { ++ switch (((struct bond_marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + dprintk("Received Marker Information on port %d\n", port->actor_port_number); +- ad_marker_info_received((struct marker *)lacpdu, port); ++ ad_marker_info_received((struct bond_marker *)lacpdu, port); ++ + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + dprintk("Received Marker Response on port %d\n", port->actor_port_number); +- ad_marker_response_received((struct marker *)lacpdu, port); ++ ad_marker_response_received((struct bond_marker *)lacpdu, port); + break; + + default: +diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h +index 4c60b17..2bb477e 100644 +--- a/drivers/net/bonding/bond_3ad.h ++++ b/drivers/net/bonding/bond_3ad.h +@@ -105,7 +105,8 @@ typedef enum { + typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +-} marker_subtype_t; ++} bond_marker_subtype_t; ++ + + // timers types(43.4.9 in the 802.3ad standard) + typedef enum { +@@ -161,7 +162,7 @@ typedef struct lacpdu_header { + } lacpdu_header_t; + + // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +-typedef struct marker { ++typedef struct bond_marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) +@@ -174,12 +175,12 @@ typedef struct marker { + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +-} marker_t; ++} bond_marker_t; + +-typedef struct marker_header { ++typedef struct bond_marker_header { + struct ad_header ad_header; +- struct marker marker; +-} marker_header_t; ++ struct bond_marker marker; ++} bond_marker_header_t; + + #pragma pack() + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 165a02f..eaf230e 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -6,10 +6,18 @@ + #define VMLINUX_SYMBOL(_sym_) _sym_ + #endif + ++/* Kernel markers : pointers */ ++#define MARKER \ ++ . = ALIGN(8); \ ++ VMLINUX_SYMBOL(__start___markers) = .; \ ++ *(__markers) \ ++ VMLINUX_SYMBOL(__stop___markers) = .; ++ + #define RODATA \ + .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + *(.rodata) *(.rodata.*) \ + *(__vermagic) /* Kernel version magic */ \ ++ *(__markers_strings) /* Markers: strings */ \ + } \ + \ + .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ +diff --git a/include/linux/marker.h b/include/linux/marker.h +new file mode 100644 +index 0000000..efbc82b +--- /dev/null ++++ b/include/linux/marker.h +@@ -0,0 +1,139 @@ ++#ifndef _LINUX_MARKER_H ++#define _LINUX_MARKER_H ++ ++/* ++ * Code markup for dynamic and static tracing. ++ * ++ * See Documentation/marker.txt. ++ * ++ * (C) Copyright 2006 Mathieu Desnoyers ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ */ ++ ++#include ++ ++struct module; ++struct marker; ++ ++/** ++ * marker_probe_func - Type of a marker probe function ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @args: variable argument list pointer. Use a pointer to overcome C's ++ * inability to pass this around as a pointer in a portable manner in ++ * the callee otherwise. ++ * ++ * Type of marker probe functions. They receive the mdata and need to parse the ++ * format string to recover the variable argument list. ++ */ ++typedef void marker_probe_func(void *probe_private, void *call_private, ++ const char *fmt, va_list *args); ++ ++struct marker_probe_closure { ++ marker_probe_func *func; /* Callback */ ++ void *probe_private; /* Private probe data */ ++}; ++ ++struct marker { ++ const char *name; /* Marker name */ ++ const char *format; /* Marker format string, describing the ++ * variable argument list. ++ */ ++ char state; /* Marker state. */ ++ char ptype; /* probe type : 0 : single, 1 : multi */ ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++} __attribute__((aligned(8))); ++ ++#ifdef CONFIG_MARKERS ++ ++/* ++ * Note : the empty asm volatile with read constraint is used here instead of a ++ * "used" attribute to fix a gcc 4.1.x bug. ++ * Make sure the alignment of the structure in the __markers section will ++ * not add unwanted padding between the beginning of the section and the ++ * structure. Force alignment to the same alignment as the section start. ++ */ ++#define __trace_mark(name, call_private, format, args...) \ ++ do { \ ++ static const char __mstrtab_##name[] \ ++ __attribute__((section("__markers_strings"))) \ ++ = #name "\0" format; \ ++ static struct marker __mark_##name \ ++ __attribute__((section("__markers"), aligned(8))) = \ ++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ ++ 0, 0, marker_probe_cb, \ ++ { __mark_empty_function, NULL}, NULL }; \ ++ __mark_check_format(format, ## args); \ ++ if (unlikely(__mark_##name.state)) { \ ++ (*__mark_##name.call) \ ++ (&__mark_##name, call_private, \ ++ format, ## args); \ ++ } \ ++ } while (0) ++ ++extern void marker_update_probe_range(struct marker *begin, ++ struct marker *end); ++#else /* !CONFIG_MARKERS */ ++#define __trace_mark(name, call_private, format, args...) \ ++ __mark_check_format(format, ## args) ++static inline void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ } ++#endif /* CONFIG_MARKERS */ ++ ++/** ++ * trace_mark - Marker ++ * @name: marker name, not quoted. ++ * @format: format string ++ * @args...: variable argument list ++ * ++ * Places a marker. ++ */ ++#define trace_mark(name, format, args...) \ ++ __trace_mark(name, NULL, format, ## args) ++ ++/** ++ * MARK_NOARGS - Format string for a marker with no argument. ++ */ ++#define MARK_NOARGS " " ++ ++/* To be used for string format validity checking with gcc */ ++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...) ++{ ++} ++ ++extern marker_probe_func __mark_empty_function; ++ ++extern void marker_probe_cb(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++extern void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...); ++ ++/* ++ * Connect a probe to a marker. ++ * private data pointer must be a valid allocated memory address, or NULL. ++ */ ++extern int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private); ++ ++/* ++ * Returns the private data given to marker_probe_register. ++ */ ++extern int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private); ++/* ++ * Unregister a marker by providing the registered private data. ++ */ ++extern int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private); ++ ++extern void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num); ++ ++#endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 8da8948..2ad5efd 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -328,6 +329,10 @@ struct module + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; ++#ifdef CONFIG_MARKERS ++ struct marker *markers; ++ unsigned int num_markers; ++#endif + }; + + /* FIXME: It'd be nice to isolate modules during init, too, so they +@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb); + int unregister_module_notifier(struct notifier_block * nb); + + extern void print_modules(void); ++extern void module_update_markers(void); + #else /* !CONFIG_MODULES... */ + #define EXPORT_SYMBOL(sym) + #define EXPORT_SYMBOL_GPL(sym) +@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb) + static inline void print_modules(void) + { + } ++ ++static inline void module_update_markers(void) ++{ ++} + #endif /* CONFIG_MODULES */ + + #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 91057d6..bcd0acb 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -42,6 +42,19 @@ + #include + #include + ++/* ++ * Prevent the compiler from merging or refetching accesses. The compiler ++ * is also forbidden from reordering successive instances of ACCESS_ONCE(), ++ * but only when the compiler is aware of some particular ordering. One way ++ * to make the compiler aware of ordering is to put the two invocations of ++ * ACCESS_ONCE() in different C statements. ++ * ++ * This macro does absolutely -nothing- to prevent the CPU from reordering, ++ * merging, or refetching absolutely anything at any time. ++ * ++ */ ++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) ++ + /** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list +@@ -102,6 +115,7 @@ struct rcu_data { + struct rcu_head *donelist; + struct rcu_head **donetail; + int cpu; ++ struct rcu_head barrier; + }; + + DECLARE_PER_CPU(struct rcu_data, rcu_data); +@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, + extern void FASTCALL(call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head))); + extern void synchronize_kernel(void); +- ++extern void rcu_barrier(void); + #endif /* __KERNEL__ */ + #endif /* __LINUX_RCUPDATE_H */ +diff --git a/kernel/Makefile b/kernel/Makefile +index 0b8c8ca..f8248bc 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o + obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_MARKERS) += marker.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +diff --git a/kernel/marker.c b/kernel/marker.c +new file mode 100644 +index 0000000..c4c2cd8 +--- /dev/null ++++ b/kernel/marker.c +@@ -0,0 +1,851 @@ ++/* ++ * Copyright (C) 2007 Mathieu Desnoyers ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct marker __start___markers[]; ++extern struct marker __stop___markers[]; ++ ++/* Set to 1 to enable marker debug output */ ++const int marker_debug; ++ ++/* ++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin ++ * and module markers and the hash table. ++ */ ++static DEFINE_MUTEX(markers_mutex); ++ ++/* ++ * Marker hash table, containing the active markers. ++ * Protected by module_mutex. ++ */ ++#define MARKER_HASH_BITS 6 ++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) ++ ++/* ++ * Note about RCU : ++ * It is used to make sure every handler has finished using its private data ++ * between two consecutive operation (add or remove) on a given marker. It is ++ * also used to delay the free of multiple probes array until a quiescent state ++ * is reached. ++ * marker entries modifications are protected by the markers_mutex. ++ */ ++struct marker_entry { ++ struct hlist_node hlist; ++ char *format; ++ void (*call)(const struct marker *mdata, /* Probe wrapper */ ++ void *call_private, const char *fmt, ...); ++ struct marker_probe_closure single; ++ struct marker_probe_closure *multi; ++ int refcount; /* Number of times armed. 0 if disarmed. */ ++ struct rcu_head rcu; ++ void *oldptr; ++ char rcu_pending:1; ++ char ptype:1; ++ char name[0]; /* Contains name'\0'format'\0' */ ++}; ++ ++static struct hlist_head marker_table[MARKER_TABLE_SIZE]; ++ ++/** ++ * __mark_empty_function - Empty probe callback ++ * @probe_private: probe private data ++ * @call_private: call site private data ++ * @fmt: format string ++ * @...: variable argument list ++ * ++ * Empty callback provided as a probe to the markers. By providing this to a ++ * disabled marker, we make sure the execution flow is always valid even ++ * though the function pointer change and the marker enabling are two distinct ++ * operations that modifies the execution flow of preemptible code. ++ */ ++void __mark_empty_function(void *probe_private, void *call_private, ++ const char *fmt, va_list *args) ++{ ++} ++EXPORT_SYMBOL_GPL(__mark_empty_function); ++ ++/* ++ * marker_probe_cb Callback that prepares the variable argument list for probes. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we ++ * need to put a full smp_rmb() in this branch. This is why we do not use ++ * rcu_dereference() for the pointer read. ++ */ ++void marker_probe_cb(const struct marker *mdata, void *call_private, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char ptype; ++ ++ /* ++ * disabling preemption to make sure the teardown of the callbacks can ++ * be done correctly when they are in modules and they insure RCU read ++ * coherency. ++ */ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ va_start(args, fmt); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ va_end(args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) { ++ va_start(args, fmt); ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ va_end(args); ++ } ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb); ++ ++/* ++ * marker_probe_cb Callback that does not prepare the variable argument list. ++ * @mdata: pointer of type struct marker ++ * @call_private: caller site private data ++ * @fmt: format string ++ * @...: Variable argument list. ++ * ++ * Should be connected to markers "MARK_NOARGS". ++ */ ++void marker_probe_cb_noarg(const struct marker *mdata, ++ void *call_private, const char *fmt, ...) ++{ ++ va_list args; /* not initialized */ ++ char ptype; ++ ++ preempt_disable(); ++ ptype = ACCESS_ONCE(mdata->ptype); ++ if (likely(!ptype)) { ++ marker_probe_func *func; ++ /* Must read the ptype before ptr. They are not data dependant, ++ * so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func = ACCESS_ONCE(mdata->single.func); ++ /* Must read the ptr before private data. They are not data ++ * dependant, so we put an explicit smp_rmb() here. */ ++ smp_rmb(); ++ func(mdata->single.probe_private, call_private, fmt, &args); ++ } else { ++ struct marker_probe_closure *multi; ++ int i; ++ /* ++ * multi points to an array, therefore accessing the array ++ * depends on reading multi. However, even in this case, ++ * we must insure that the pointer is read _before_ the array ++ * data. Same as rcu_dereference, but we need a full smp_rmb() ++ * in the fast path, so put the explicit barrier here. ++ */ ++ smp_read_barrier_depends(); ++ multi = ACCESS_ONCE(mdata->multi); ++ for (i = 0; multi[i].func; i++) ++ multi[i].func(multi[i].probe_private, call_private, fmt, ++ &args); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); ++ ++static void free_old_closure(struct rcu_head *head) ++{ ++ struct marker_entry *entry = container_of(head, ++ struct marker_entry, rcu); ++ kfree(entry->oldptr); ++ /* Make sure we free the data before setting the pending flag to 0 */ ++ smp_wmb(); ++ entry->rcu_pending = 0; ++} ++ ++static void debug_print_probes(struct marker_entry *entry) ++{ ++ int i; ++ ++ if (!marker_debug) ++ return; ++ ++ if (!entry->ptype) { ++ printk(KERN_DEBUG "Single probe : %p %p\n", ++ entry->single.func, ++ entry->single.probe_private); ++ } else { ++ for (i = 0; entry->multi[i].func; i++) ++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, ++ entry->multi[i].func, ++ entry->multi[i].probe_private); ++ } ++} ++ ++static struct marker_probe_closure * ++marker_entry_add_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0; ++ struct marker_probe_closure *old, *new; ++ ++ WARN_ON(!probe); ++ ++ debug_print_probes(entry); ++ old = entry->multi; ++ if (!entry->ptype) { ++ if (entry->single.func == probe && ++ entry->single.probe_private == probe_private) ++ return ERR_PTR(-EBUSY); ++ if (entry->single.func == __mark_empty_function) { ++ /* 0 -> 1 probes */ ++ entry->single.func = probe; ++ entry->single.probe_private = probe_private; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* 1 -> 2 probes */ ++ nr_probes = 1; ++ old = NULL; ++ } ++ } else { ++ /* (N -> N+1), (N != 0, 1) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) ++ if (old[nr_probes].func == probe ++ && old[nr_probes].probe_private ++ == probe_private) ++ return ERR_PTR(-EBUSY); ++ } ++ /* + 2 : one for new probe, one for NULL func */ ++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), ++ GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ if (!old) ++ new[0] = entry->single; ++ else ++ memcpy(new, old, ++ nr_probes * sizeof(struct marker_probe_closure)); ++ new[nr_probes].func = probe; ++ new[nr_probes].probe_private = probe_private; ++ entry->refcount = nr_probes + 1; ++ entry->multi = new; ++ entry->ptype = 1; ++ debug_print_probes(entry); ++ return old; ++} ++ ++static struct marker_probe_closure * ++marker_entry_remove_probe(struct marker_entry *entry, ++ marker_probe_func *probe, void *probe_private) ++{ ++ int nr_probes = 0, nr_del = 0, i; ++ struct marker_probe_closure *old, *new; ++ ++ old = entry->multi; ++ ++ debug_print_probes(entry); ++ if (!entry->ptype) { ++ /* 0 -> N is an error */ ++ WARN_ON(entry->single.func == __mark_empty_function); ++ /* 1 -> 0 probes */ ++ WARN_ON(probe && entry->single.func != probe); ++ WARN_ON(entry->single.probe_private != probe_private); ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ debug_print_probes(entry); ++ return NULL; ++ } else { ++ /* (N -> M), (N > 1, M >= 0) probes */ ++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) { ++ if ((!probe || old[nr_probes].func == probe) ++ && old[nr_probes].probe_private ++ == probe_private) ++ nr_del++; ++ } ++ } ++ ++ if (nr_probes - nr_del == 0) { ++ /* N -> 0, (N > 1) */ ++ entry->single.func = __mark_empty_function; ++ entry->refcount = 0; ++ entry->ptype = 0; ++ } else if (nr_probes - nr_del == 1) { ++ /* N -> 1, (N > 1) */ ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ entry->single = old[i]; ++ entry->refcount = 1; ++ entry->ptype = 0; ++ } else { ++ int j = 0; ++ /* N -> M, (N > 1, M > 1) */ ++ /* + 1 for NULL */ ++ new = kzalloc((nr_probes - nr_del + 1) ++ * sizeof(struct marker_probe_closure), GFP_KERNEL); ++ if (new == NULL) ++ return ERR_PTR(-ENOMEM); ++ for (i = 0; old[i].func; i++) ++ if ((probe && old[i].func != probe) || ++ old[i].probe_private != probe_private) ++ new[j++] = old[i]; ++ entry->refcount = nr_probes - nr_del; ++ entry->ptype = 1; ++ entry->multi = new; ++ } ++ debug_print_probes(entry); ++ return old; ++} ++ ++/* ++ * Get marker if the marker is present in the marker hash table. ++ * Must be called with markers_mutex held. ++ * Returns NULL if not present. ++ */ ++static struct marker_entry *get_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ u32 hash = jhash(name, strlen(name), 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) ++ return e; ++ } ++ return NULL; ++} ++ ++/* ++ * Add the marker to the marker hash table. Must be called with markers_mutex ++ * held. ++ */ ++static struct marker_entry *add_marker(const char *name, const char *format) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ size_t format_len = 0; ++ u32 hash = jhash(name, name_len-1, 0); ++ ++ if (format) ++ format_len = strlen(format) + 1; ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ printk(KERN_NOTICE ++ "Marker %s busy\n", name); ++ return ERR_PTR(-EBUSY); /* Already there */ ++ } ++ } ++ /* ++ * Using kmalloc here to allocate a variable length element. Could ++ * cause some memory fragmentation if overused. ++ */ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return ERR_PTR(-ENOMEM); ++ memcpy(&e->name[0], name, name_len); ++ if (format) { ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ } else { ++ e->format = NULL; ++ e->call = marker_probe_cb; ++ } ++ e->single.func = __mark_empty_function; ++ e->single.probe_private = NULL; ++ e->multi = NULL; ++ e->ptype = 0; ++ e->refcount = 0; ++ e->rcu_pending = 0; ++ hlist_add_head(&e->hlist, head); ++ return e; ++} ++ ++/* ++ * Remove the marker from the marker hash table. Must be called with mutex_lock ++ * held. ++ */ ++static int remove_marker(const char *name) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ int found = 0; ++ size_t len = strlen(name) + 1; ++ u32 hash = jhash(name, len-1, 0); ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) ++ return -ENOENT; ++ if (e->single.func != __mark_empty_function) ++ return -EBUSY; ++ hlist_del(&e->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if (e->rcu_pending) ++ rcu_barrier(); ++ kfree(e); ++ return 0; ++} ++ ++/* ++ * Set the mark_entry format to the format found in the element. ++ */ ++static int marker_set_format(struct marker_entry **entry, const char *format) ++{ ++ struct marker_entry *e; ++ size_t name_len = strlen((*entry)->name) + 1; ++ size_t format_len = strlen(format) + 1; ++ ++ ++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, ++ GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ memcpy(&e->name[0], (*entry)->name, name_len); ++ e->format = &e->name[name_len]; ++ memcpy(e->format, format, format_len); ++ if (strcmp(e->format, MARK_NOARGS) == 0) ++ e->call = marker_probe_cb_noarg; ++ else ++ e->call = marker_probe_cb; ++ e->single = (*entry)->single; ++ e->multi = (*entry)->multi; ++ e->ptype = (*entry)->ptype; ++ e->refcount = (*entry)->refcount; ++ e->rcu_pending = 0; ++ hlist_add_before(&e->hlist, &(*entry)->hlist); ++ hlist_del(&(*entry)->hlist); ++ /* Make sure the call_rcu has been executed */ ++ if ((*entry)->rcu_pending) ++ rcu_barrier(); ++ kfree(*entry); ++ *entry = e; ++ trace_mark(core_marker_format, "name %s format %s", ++ e->name, e->format); ++ return 0; ++} ++ ++/* ++ * Sets the probe callback corresponding to one marker. ++ */ ++static int set_marker(struct marker_entry **entry, struct marker *elem, ++ int active) ++{ ++ int ret; ++ WARN_ON(strcmp((*entry)->name, elem->name) != 0); ++ ++ if ((*entry)->format) { ++ if (strcmp((*entry)->format, elem->format) != 0) { ++ printk(KERN_NOTICE ++ "Format mismatch for probe %s " ++ "(%s), marker (%s)\n", ++ (*entry)->name, ++ (*entry)->format, ++ elem->format); ++ return -EPERM; ++ } ++ } else { ++ ret = marker_set_format(entry, elem->format); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * probe_cb setup (statically known) is done here. It is ++ * asynchronous with the rest of execution, therefore we only ++ * pass from a "safe" callback (with argument) to an "unsafe" ++ * callback (does not set arguments). ++ */ ++ elem->call = (*entry)->call; ++ /* ++ * Sanity check : ++ * We only update the single probe private data when the ptr is ++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) ++ */ ++ WARN_ON(elem->single.func != __mark_empty_function ++ && elem->single.probe_private ++ != (*entry)->single.probe_private && ++ !elem->ptype); ++ elem->single.probe_private = (*entry)->single.probe_private; ++ /* ++ * Make sure the private data is valid when we update the ++ * single probe ptr. ++ */ ++ smp_wmb(); ++ elem->single.func = (*entry)->single.func; ++ /* ++ * We also make sure that the new probe callbacks array is consistent ++ * before setting a pointer to it. ++ */ ++ rcu_assign_pointer(elem->multi, (*entry)->multi); ++ /* ++ * Update the function or multi probe array pointer before setting the ++ * ptype. ++ */ ++ smp_wmb(); ++ elem->ptype = (*entry)->ptype; ++ elem->state = active; ++ ++ return 0; ++} ++ ++/* ++ * Disable a marker and its probe callback. ++ * Note: only after a synchronize_sched() issued after setting elem->call to the ++ * empty function insures that the original callback is not used anymore. This ++ * insured by preemption disabling around the call site. ++ */ ++static void disable_marker(struct marker *elem) ++{ ++ /* leave "call" as is. It is known statically. */ ++ elem->state = 0; ++ elem->single.func = __mark_empty_function; ++ /* Update the function before setting the ptype */ ++ smp_wmb(); ++ elem->ptype = 0; /* single probe */ ++ /* ++ * Leave the private data and id there, because removal is racy and ++ * should be done only after a synchronize_sched(). These are never used ++ * until the next initialization anyway. ++ */ ++} ++ ++/** ++ * marker_update_probe_range - Update a probe range ++ * @begin: beginning of the range ++ * @end: end of the range ++ * ++ * Updates the probe callback corresponding to a range of markers. ++ */ ++void marker_update_probe_range(struct marker *begin, ++ struct marker *end) ++{ ++ struct marker *iter; ++ struct marker_entry *mark_entry; ++ ++ mutex_lock(&markers_mutex); ++ for (iter = begin; iter < end; iter++) { ++ mark_entry = get_marker(iter->name); ++ if (mark_entry) { ++ set_marker(&mark_entry, iter, ++ !!mark_entry->refcount); ++ /* ++ * ignore error, continue ++ */ ++ } else { ++ disable_marker(iter); ++ } ++ } ++ mutex_unlock(&markers_mutex); ++} ++ ++/* ++ * Update probes, removing the faulty probes. ++ * Issues a synchronize_sched() when no reference to the module passed ++ * as parameter is found in the probes so the probe module can be ++ * safely unloaded from now on. ++ * ++ * Internal callback only changed before the first probe is connected to it. ++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 ++ * transitions. All other transitions will leave the old private data valid. ++ * This makes the non-atomicity of the callback/private data updates valid. ++ * ++ * "special case" updates : ++ * 0 -> 1 callback ++ * 1 -> 0 callback ++ * 1 -> 2 callbacks ++ * 2 -> 1 callbacks ++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. ++ * Site effect : marker_set_format may delete the marker entry (creating a ++ * replacement). ++ */ ++static void marker_update_probes(void) ++{ ++ /* Core kernel markers */ ++ marker_update_probe_range(__start___markers, __stop___markers); ++ /* Markers in modules. */ ++ module_update_markers(); ++} ++ ++/** ++ * marker_probe_register - Connect a probe to a marker ++ * @name: marker name ++ * @format: format string ++ * @probe: probe handler ++ * @probe_private: probe private data ++ * ++ * private data must be a valid allocated memory address, or NULL. ++ * Returns 0 if ok, error value on error. ++ * The probe address must at least be aligned on the architecture pointer size. ++ */ ++int marker_probe_register(const char *name, const char *format, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ entry = add_marker(name, format); ++ if (IS_ERR(entry)) { ++ ret = PTR_ERR(entry); ++ goto end; ++ } ++ } ++ /* ++ * If we detect that a call_rcu is pending for this marker, ++ * make sure it's executed now. ++ */ ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_add_probe(entry, probe, probe_private); ++ if (IS_ERR(old)) { ++ ret = PTR_ERR(old); ++ goto end; ++ } ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_register); ++ ++/** ++ * marker_probe_unregister - Disconnect a probe from a marker ++ * @name: marker name ++ * @probe: probe function pointer ++ * @probe_private: probe private data ++ * ++ * Returns the private data given to marker_probe_register, or an ERR_PTR(). ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister(const char *name, ++ marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ struct marker_probe_closure *old; ++ int ret = 0; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, probe, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker(name); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister); ++ ++static struct marker_entry * ++get_marker_from_private_data(marker_probe_func *probe, void *probe_private) ++{ ++ struct marker_entry *entry; ++ unsigned int i; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ ++ for (i = 0; i < MARKER_TABLE_SIZE; i++) { ++ head = &marker_table[i]; ++ hlist_for_each_entry(entry, node, head, hlist) { ++ if (!entry->ptype) { ++ if (entry->single.func == probe ++ && entry->single.probe_private ++ == probe_private) ++ return entry; ++ } else { ++ struct marker_probe_closure *closure; ++ closure = entry->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func == probe && ++ closure[i].probe_private ++ == probe_private) ++ return entry; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * marker_probe_unregister_private_data - Disconnect a probe from a marker ++ * @probe: probe function ++ * @probe_private: probe private data ++ * ++ * Unregister a probe by providing the registered private data. ++ * Only removes the first marker found in hash table. ++ * Return 0 on success or error value. ++ * We do not need to call a synchronize_sched to make sure the probes have ++ * finished running before doing a module unload, because the module unload ++ * itself uses stop_machine(), which insures that every preempt disabled section ++ * have finished. ++ */ ++int marker_probe_unregister_private_data(marker_probe_func *probe, ++ void *probe_private) ++{ ++ struct marker_entry *entry; ++ int ret = 0; ++ struct marker_probe_closure *old; ++ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ if (!entry) { ++ ret = -ENOENT; ++ goto end; ++ } ++ if (entry->rcu_pending) ++ rcu_barrier(); ++ old = marker_entry_remove_probe(entry, NULL, probe_private); ++ mutex_unlock(&markers_mutex); ++ marker_update_probes(); /* may update entry */ ++ mutex_lock(&markers_mutex); ++ entry = get_marker_from_private_data(probe, probe_private); ++ WARN_ON(!entry); ++ entry->oldptr = old; ++ entry->rcu_pending = 1; ++ /* write rcu_pending before calling the RCU callback */ ++ smp_wmb(); ++ call_rcu(&entry->rcu, free_old_closure); ++ remove_marker(entry->name); /* Ignore busy error message */ ++end: ++ mutex_unlock(&markers_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); ++ ++/** ++ * marker_get_private_data - Get a marker's probe private data ++ * @name: marker name ++ * @probe: probe to match ++ * @num: get the nth matching probe's private data ++ * ++ * Returns the nth private data pointer (starting from 0) matching, or an ++ * ERR_PTR. ++ * Returns the private data pointer, or an ERR_PTR. ++ * The private data pointer should _only_ be dereferenced if the caller is the ++ * owner of the data, or its content could vanish. This is mostly used to ++ * confirm that a caller is the owner of a registered probe. ++ */ ++void *marker_get_private_data(const char *name, marker_probe_func *probe, ++ int num) ++{ ++ struct hlist_head *head; ++ struct hlist_node *node; ++ struct marker_entry *e; ++ size_t name_len = strlen(name) + 1; ++ u32 hash = jhash(name, name_len-1, 0); ++ int i; ++ ++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; ++ hlist_for_each_entry(e, node, head, hlist) { ++ if (!strcmp(name, e->name)) { ++ if (!e->ptype) { ++ if (num == 0 && e->single.func == probe) ++ return e->single.probe_private; ++ else ++ break; ++ } else { ++ struct marker_probe_closure *closure; ++ int match = 0; ++ closure = e->multi; ++ for (i = 0; closure[i].func; i++) { ++ if (closure[i].func != probe) ++ continue; ++ if (match++ == num) ++ return closure[i].probe_private; ++ } ++ } ++ } ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(marker_get_private_data); +diff --git a/kernel/module.c b/kernel/module.c +index 624e7ee..ae16b04 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1507,6 +1507,8 @@ static struct module *load_module(void __user *umod, + struct exception_table_entry *extable; + mm_segment_t old_fs; + int gpgsig_ok; ++ unsigned int markersindex; ++ unsigned int markersstringsindex; + + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); +@@ -1739,6 +1741,9 @@ static struct module *load_module(void __user *umod, + tainted |= TAINT_FORCED_MODULE; + } + #endif ++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); ++ markersstringsindex = find_sec(hdr, sechdrs, secstrings, ++ "__markers_strings"); + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { +@@ -1762,6 +1767,12 @@ static struct module *load_module(void __user *umod, + goto cleanup; + } + ++#ifdef CONFIG_MARKERS ++ mod->markers = (void *)sechdrs[markersindex].sh_addr; ++ mod->num_markers = ++ sechdrs[markersindex].sh_size / sizeof(*mod->markers); ++#endif ++ + /* Set up and sort exception table */ + mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); + mod->extable = extable = (void *)sechdrs[exindex].sh_addr; +@@ -1773,6 +1784,12 @@ static struct module *load_module(void __user *umod, + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_MARKERS ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++#endif ++ + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -2190,6 +2207,20 @@ void struct_module(struct module *mod) { return; } + EXPORT_SYMBOL(struct_module); + #endif + ++#ifdef CONFIG_MARKERS ++void module_update_markers(void) ++{ ++ struct module *mod; ++ ++ down(&module_mutex); ++ list_for_each_entry(mod, &modules, list) ++ if (!tainted) ++ marker_update_probe_range(mod->markers, ++ mod->markers + mod->num_markers); ++ up(&module_mutex); ++} ++#endif ++ + static int __init modules_init(void) + { + return subsystem_register(&module_subsys); +diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c +index 1b16bfc..20ade31 100644 +--- a/kernel/rcupdate.c ++++ b/kernel/rcupdate.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + /* Definition for rcupdate control block. */ + struct rcu_ctrlblk rcu_ctrlblk = +@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, + local_irq_restore(flags); + } + ++static atomic_t rcu_barrier_cpu_count; ++static DEFINE_MUTEX(rcu_barrier_mutex); ++static struct completion rcu_barrier_completion; ++ + /** + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. +@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, + return NOTIFY_OK; + } + ++static void rcu_barrier_callback(struct rcu_head *notused) ++{ ++ if (atomic_dec_and_test(&rcu_barrier_cpu_count)) ++ complete(&rcu_barrier_completion); ++} ++ ++/* ++ * Called with preemption disabled, and from cross-cpu IRQ context. ++ */ ++static void rcu_barrier_func(void *notused) ++{ ++ int cpu = smp_processor_id(); ++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); ++ struct rcu_head *head; ++ ++ head = &rdp->barrier; ++ atomic_inc(&rcu_barrier_cpu_count); ++ call_rcu(head, rcu_barrier_callback); ++} ++ ++/** ++ * rcu_barrier - Wait until all the in-flight RCUs are complete. ++ */ ++void rcu_barrier(void) ++{ ++ BUG_ON(in_interrupt()); ++ /* Take cpucontrol mutex to protect against CPU hotplug */ ++ mutex_lock(&rcu_barrier_mutex); ++ init_completion(&rcu_barrier_completion); ++ atomic_set(&rcu_barrier_cpu_count, 0); ++ on_each_cpu(rcu_barrier_func, NULL, 0, 1); ++ wait_for_completion(&rcu_barrier_completion); ++ mutex_unlock(&rcu_barrier_mutex); ++} ++EXPORT_SYMBOL_GPL(rcu_barrier); ++ ++ ++ + static struct notifier_block __devinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, + }; +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index b3d31b5..b100a32 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -13,6 +13,7 @@ + # 2) modpost is then used to + # 3) create one .mod.c file pr. module + # 4) create one Module.symvers file with CRC for all exported symbols ++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers + # 5) compile all .mod.c files + # 6) final link of the module to a file + +@@ -40,6 +41,11 @@ include scripts/Makefile.lib + + symverfile := $(objtree)/Module.symvers + ++kernelmarkersfile := $(objtree)/Module.markers ++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers ++ ++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) ++ + # Step 1), find all modules listed in $(MODVERDIR)/ + __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) + modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) +@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST + cmd_modpost = scripts/mod/modpost \ + $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \ ++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ ++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \ + $(filter-out FORCE,$^) + + .PHONY: __modpost + __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE + $(call cmd,modpost) + ++quiet_cmd_kernel-mod = MODPOST $@ ++ cmd_kernel-mod = $(cmd_modpost) $@ ++ ++vmlinux.o: FORCE ++ $(call cmd,kernel-mod) ++ + # Declare generated files as targets for modpost + $(symverfile): __modpost ; + $(modules:.ko=.mod.c): __modpost ; + ++ifdef CONFIG_MARKERS ++$(markersfile): __modpost ; ++endif + + # Step 5), compile all *.mod.c files + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 2a174e5..c25948c 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -10,7 +10,8 @@ + * + * Usage: modpost vmlinux module1.o module2.o ... + */ +- ++#define _GNU_SOURCE ++#include + #include + #include "modpost.h" + +@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename) + if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) { + info->modinfo = (void *)hdr + sechdrs[i].sh_offset; + info->modinfo_len = sechdrs[i].sh_size; +- } ++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0) ++ info->markers_strings_sec = i; ++ + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + +@@ -397,6 +400,63 @@ is_vmlinux(const char *modname) + return strcmp(myname, "vmlinux") == 0; + } + ++static void get_markers(struct elf_info *info, struct module *mod) ++{ ++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; ++ const char *strings = (const char *) info->hdr + sh->sh_offset; ++ const Elf_Sym *sym, *first_sym, *last_sym; ++ size_t n; ++ ++ ++ if (!info->markers_strings_sec) ++ return; ++ ++ /* ++ * First count the strings. We look for all the symbols defined ++ * in the __markers_strings section named __mstrtab_*. For ++ * these local names, the compiler puts a random .NNN suffix on, ++ * so the names don't correspond exactly. ++ */ ++ first_sym = last_sym = NULL; ++ n = 0; ++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ if (first_sym == NULL) ++ first_sym = sym; ++ last_sym = sym; ++ ++n; ++ } ++ ++ if (n == 0) ++ return; ++ /* ++ * Now collect each name and format into a line for the output. ++ * Lines look like: ++ * marker_name vmlinux marker %s format %d ++ * The format string after the second \t can use whitespace. ++ */ ++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); ++ mod->nmarkers = n; ++ ++ n = 0; ++ for (sym = first_sym; sym <= last_sym; sym++) ++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && ++ sym->st_shndx == info->markers_strings_sec && ++ !strncmp(info->strtab + sym->st_name, ++ "__mstrtab_", sizeof "__mstrtab_" - 1)) { ++ const char *name = strings + sym->st_value; ++ const char *fmt = strchr(name, '\0') + 1; ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ mod->markers[n++] = line; ++ } ++} ++ ++ + void + read_symbols(char *modname) + { +@@ -426,6 +486,7 @@ read_symbols(char *modname) + } + maybe_frob_version(modname, info.modinfo, info.modinfo_len, + (void *)info.modinfo - (void *)info.hdr); ++ get_markers(&info, mod); + parse_elf_finish(&info); + + /* Our trick to get versioning for struct_module - it's +@@ -682,6 +743,92 @@ write_dump(const char *fname) + write_if_changed(&buf, fname); + } + ++static void add_marker(struct module *mod, const char *name, const char *fmt) ++{ ++ char *line = NULL; ++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); ++ NOFAIL(line); ++ ++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * ++ sizeof mod->markers[0]))); ++ mod->markers[mod->nmarkers++] = line; ++} ++ ++static void read_markers(const char *fname) ++{ ++ unsigned long size, pos = 0; ++ void *file = grab_file(fname, &size); ++ char *line; ++ ++ if (!file) /* No old markers, silently ignore */ ++ return; ++ ++ while ((line = get_next_line(&pos, file, size))) { ++ char *marker, *modname, *fmt; ++ struct module *mod; ++ ++ marker = line; ++ modname = strchr(marker, '\t'); ++ if (!modname) ++ goto fail; ++ *modname++ = '\0'; ++ fmt = strchr(modname, '\t'); ++ if (!fmt) ++ goto fail; ++ *fmt++ = '\0'; ++ if (*marker == '\0' || *modname == '\0') ++ goto fail; ++ ++ mod = find_module(modname); ++ if (!mod) { ++ if (is_vmlinux(modname)) ++ have_vmlinux = 1; ++ mod = new_module(NOFAIL(strdup(modname))); ++ mod->skip = 1; ++ } ++ ++ add_marker(mod, marker, fmt); ++ } ++ return; ++fail: ++ fatal("parse error in markers list file\n"); ++} ++ ++static int compare_strings(const void *a, const void *b) ++{ ++ return strcmp(*(const char **) a, *(const char **) b); ++} ++ ++static void write_markers(const char *fname) ++{ ++ struct buffer buf = { }; ++ struct module *mod; ++ size_t i; ++ ++ for (mod = modules; mod; mod = mod->next) ++ if (mod->markers != NULL) { ++ /* ++ * Sort the strings so we can skip duplicates when ++ * we write them out. ++ */ ++ qsort(mod->markers, mod->nmarkers, ++ sizeof mod->markers[0], &compare_strings); ++ for (i = 0; i < mod->nmarkers; ++i) { ++ char *line = mod->markers[i]; ++ buf_write(&buf, line, strlen(line)); ++ while (i + 1 < mod->nmarkers && ++ !strcmp(mod->markers[i], ++ mod->markers[i + 1])) ++ free(mod->markers[i++]); ++ free(mod->markers[i]); ++ } ++ free(mod->markers); ++ mod->markers = NULL; ++ } ++ ++ write_if_changed(&buf, fname); ++} ++ + int + main(int argc, char **argv) + { +@@ -690,8 +837,10 @@ main(int argc, char **argv) + char fname[SZ]; + char *dump_read = NULL, *dump_write = NULL; + int opt; ++ char *markers_read = NULL; ++ char *markers_write = NULL; + +- while ((opt = getopt(argc, argv, "i:mo:")) != -1) { ++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) { + switch(opt) { + case 'i': + dump_read = optarg; +@@ -702,6 +851,12 @@ main(int argc, char **argv) + case 'o': + dump_write = optarg; + break; ++ case 'M': ++ markers_write = optarg; ++ break; ++ case 'K': ++ markers_read = optarg; ++ break; + default: + exit(1); + } +@@ -732,6 +887,12 @@ main(int argc, char **argv) + if (dump_write) + write_dump(dump_write); + ++ if (markers_read) ++ read_markers(markers_read); ++ ++ if (markers_write) ++ write_markers(markers_write); ++ + return 0; + } + +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index 4871343..d79d7ea 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -18,6 +18,7 @@ + #define Elf_Sym Elf32_Sym + #define ELF_ST_BIND ELF32_ST_BIND + #define ELF_ST_TYPE ELF32_ST_TYPE ++#define Elf_Section Elf32_Half + + #else + +@@ -26,7 +27,7 @@ + #define Elf_Sym Elf64_Sym + #define ELF_ST_BIND ELF64_ST_BIND + #define ELF_ST_TYPE ELF64_ST_TYPE +- ++#define Elf_Section Elf64_Half + #endif + + #if KERNEL_ELFDATA != HOST_ELFDATA +@@ -77,6 +78,8 @@ struct module { + int has_init; + int has_cleanup; + struct buffer dev_table_buf; ++ char **markers; ++ size_t nmarkers; + }; + + struct elf_info { +@@ -85,6 +88,7 @@ struct elf_info { + Elf_Shdr *sechdrs; + Elf_Sym *symtab_start; + Elf_Sym *symtab_stop; ++ Elf_Section markers_strings_sec; + const char *strtab; + char *modinfo; + unsigned int modinfo_len;