[Kernelpatches-commits] wjhuang commits r6 - in trunk: . wjhuang wjhuang/marker_patches
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Jul 3 19:18:23 PDT 2008
Author: wjhuang
Date: 2008-07-03 19:18:23 -0700 (Thu, 03 Jul 2008)
New Revision: 6
Added:
trunk/wjhuang/
trunk/wjhuang/marker_patches/
trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch
trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch
trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch
trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch
trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch
Log:
created personal directory
Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch
===================================================================
--- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch (rev 0)
+++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5.patch 2008-07-04 02:18:23 UTC (rev 6)
@@ -0,0 +1,1882 @@
+diff --git a/Makefile b/Makefile
+index 7682056..d1b7d78 100644
+--- a/Makefile
++++ b/Makefile
+@@ -601,7 +601,7 @@ quiet_cmd_vmlinux__ ?= LD $@
+ cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
+ -T $(vmlinux-lds) $(vmlinux-init) \
+ --start-group $(vmlinux-main) --end-group \
+- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^)
++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^)
+
+ # Generate new vmlinux version
+ quiet_cmd_vmlinux_version = GEN .version
+@@ -725,11 +725,30 @@ debug_kallsyms: .tmp_map$(last_kallsyms)
+
+ endif # ifdef CONFIG_KALLSYMS
+
++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has
++# relevant sections renamed as per the linker script.
++quiet_cmd_vmlinux-modpost = LD $@
++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \
++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \
++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^)
++define rule_vmlinux-modpost
++ :
++ +$(call cmd,vmlinux-modpost)
++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@
++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd
++endef
++
+ # vmlinux image - including updated kernel symbols
+-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE
++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE
++ $(call vmlinux-modpost)
+ $(call if_changed_rule,vmlinux__)
+ $(Q)rm -f .old_version
+
++# build vmlinux.o first to catch section mismatch errors early
++$(kallsyms.o): vmlinux.o
++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE
++ $(call if_changed_rule,vmlinux-modpost)
++
+ # The actual objects are generated when descending,
+ # make sure no implicit rule kicks in
+ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
+diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
+index 7dec8f0..73f833c 100644
+--- a/arch/i386/Kconfig
++++ b/arch/i386/Kconfig
+@@ -1205,6 +1205,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/i386/Kconfig.debug"
+diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
+index a44d95d..23d73bb 100644
+--- a/arch/i386/kernel/vmlinux.lds.S
++++ b/arch/i386/kernel/vmlinux.lds.S
+@@ -61,6 +61,7 @@ SECTIONS
+ /* writeable */
+ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ } :data
+
+diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
+index c45b7b5..aee46db 100644
+--- a/arch/ia64/Kconfig
++++ b/arch/ia64/Kconfig
+@@ -587,6 +587,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/ia64/Kconfig.debug"
+diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
+index 5b0d5f6..9c63f83 100644
+--- a/arch/ia64/kernel/vmlinux.lds.S
++++ b/arch/ia64/kernel/vmlinux.lds.S
+@@ -206,7 +206,7 @@ SECTIONS
+
+ data : { } :data
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
+- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS }
+
+ . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index 41024aa..4ab8d59 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -1095,6 +1095,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/powerpc/Kconfig.debug"
+diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
+index c02298a..709bc47 100644
+--- a/arch/powerpc/kernel/vmlinux.lds.S
++++ b/arch/powerpc/kernel/vmlinux.lds.S
+@@ -176,11 +176,13 @@ SECTIONS
+ *(.data)
+ *(.sdata)
+ *(.got.plt) *(.got)
++ MARKER
+ }
+ #else
+ .data : {
+ *(.data .data.rel* .toc1)
+ *(.branch_lt)
++ MARKER
+ }
+
+ .opd : {
+diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
+index 974474a..c81600c 100644
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -505,6 +505,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/s390/Kconfig.debug"
+diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
+index df0c16a..69826b1 100644
+--- a/arch/s390/kernel/vmlinux.lds.S
++++ b/arch/s390/kernel/vmlinux.lds.S
+@@ -46,6 +46,7 @@ SECTIONS
+
+ .data : { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+
+diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
+index b627f8d..e0467d1 100644
+--- a/arch/sparc64/Kconfig
++++ b/arch/sparc64/Kconfig
+@@ -427,6 +427,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/sparc64/Kconfig.debug"
+diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
+index b097379..1f10e43 100644
+--- a/arch/sparc64/kernel/vmlinux.lds.S
++++ b/arch/sparc64/kernel/vmlinux.lds.S
+@@ -27,6 +27,7 @@ SECTIONS
+ .data :
+ {
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+ .data1 : { *(.data1) }
+diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
+index d284a9a..e556e06 100644
+--- a/arch/x86_64/Kconfig
++++ b/arch/x86_64/Kconfig
+@@ -704,6 +704,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/x86_64/Kconfig.debug"
+diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
+index f17185f..a33251a 100644
+--- a/arch/x86_64/kernel/vmlinux.lds.S
++++ b/arch/x86_64/kernel/vmlinux.lds.S
+@@ -67,6 +67,7 @@ SECTIONS
+ /* Data */
+ .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ } :data
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 6a40707..9b0fab5 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -125,7 +125,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator);
+
+ // ================= main 802.3ad protocol functions ==================
+ static int ad_lacpdu_send(struct port *port);
+-static int ad_marker_send(struct port *port, struct marker *marker);
++static int ad_marker_send(struct port *port, struct bond_marker *marker);
+ static void ad_mux_machine(struct port *port);
+ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
+ static void ad_tx_machine(struct port *port);
+@@ -138,8 +138,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast);
+ static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
+ static void ad_enable_collecting_distributing(struct port *port);
+ static void ad_disable_collecting_distributing(struct port *port);
+-static void ad_marker_info_received(struct marker *marker_info, struct port *port);
+-static void ad_marker_response_received(struct marker *marker, struct port *port);
++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
++static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
+
+
+ /////////////////////////////////////////////////////////////////////////////////
+@@ -903,12 +903,13 @@ static int ad_lacpdu_send(struct port *port)
+ * Returns: 0 on success
+ * < 0 on error
+ */
+-static int ad_marker_send(struct port *port, struct marker *marker)
++static int ad_marker_send(struct port *port, struct bond_marker *marker)
+ {
+ struct slave *slave = port->slave;
+ struct sk_buff *skb;
+- struct marker_header *marker_header;
+- int length = sizeof(struct marker_header);
++ struct bond_marker_header *marker_header;
++ int length = sizeof(struct bond_marker_header);
++
+ struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
+
+ skb = dev_alloc_skb(length + 16);
+@@ -923,7 +924,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
+ skb->nh.raw = skb->data + ETH_HLEN;
+ skb->protocol = PKT_TYPE_LACPDU;
+
+- marker_header = (struct marker_header *)skb_put(skb, length);
++ marker_header = (struct bond_marker_header *)skb_put(skb, length);
+
+ marker_header->ad_header.destination_address = lacpdu_multicast_address;
+ /* Note: source addres is set to be the member's PERMANENT address, because we use it
+@@ -1723,7 +1724,7 @@ static void ad_disable_collecting_distributing(struct port *port)
+ */
+ static void ad_marker_info_send(struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+ u16 index;
+
+ // fill the marker PDU with the appropriate values
+@@ -1756,13 +1757,14 @@ static void ad_marker_info_send(struct port *port)
+ * @port: the port we're looking at
+ *
+ */
+-static void ad_marker_info_received(struct marker *marker_info,struct port *port)
++static void ad_marker_info_received(struct bond_marker *marker_info,
++ struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+
+ // copy the received marker data to the response marker
+ //marker = *marker_info;
+- memcpy(&marker, marker_info, sizeof(struct marker));
++ memcpy(&marker, marker_info, sizeof(struct bond_marker));
+ // change the marker subtype to marker response
+ marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
+ // send the marker response
+@@ -1781,7 +1783,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port
+ * response for marker PDU's, in this stage, but only to respond to marker
+ * information.
+ */
+-static void ad_marker_response_received(struct marker *marker, struct port *port)
++static void ad_marker_response_received(struct bond_marker *marker,
++ struct port *port)
+ {
+ marker=NULL; // just to satisfy the compiler
+ port=NULL; // just to satisfy the compiler
+@@ -2179,15 +2182,16 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
+ case AD_TYPE_MARKER:
+ // No need to convert fields to Little Endian since we don't use the marker's fields.
+
+- switch (((struct marker *)lacpdu)->tlv_type) {
++ switch (((struct bond_marker *)lacpdu)->tlv_type) {
+ case AD_MARKER_INFORMATION_SUBTYPE:
+ dprintk("Received Marker Information on port %d\n", port->actor_port_number);
+- ad_marker_info_received((struct marker *)lacpdu, port);
++ ad_marker_info_received((struct bond_marker *)lacpdu, port);
++
+ break;
+
+ case AD_MARKER_RESPONSE_SUBTYPE:
+ dprintk("Received Marker Response on port %d\n", port->actor_port_number);
+- ad_marker_response_received((struct marker *)lacpdu, port);
++ ad_marker_response_received((struct bond_marker *)lacpdu, port);
+ break;
+
+ default:
+diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
+index 6ad5ad6..bf93e7e 100644
+--- a/drivers/net/bonding/bond_3ad.h
++++ b/drivers/net/bonding/bond_3ad.h
+@@ -92,7 +92,8 @@ typedef enum {
+ typedef enum {
+ AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
+ AD_MARKER_RESPONSE_SUBTYPE // marker response subtype
+-} marker_subtype_t;
++} bond_marker_subtype_t;
++
+
+ // timers types(43.4.9 in the 802.3ad standard)
+ typedef enum {
+@@ -148,7 +149,7 @@ typedef struct lacpdu_header {
+ } lacpdu_header_t;
+
+ // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
+-typedef struct marker {
++typedef struct bond_marker {
+ u8 subtype; // = 0x02 (marker PDU)
+ u8 version_number; // = 0x01
+ u8 tlv_type; // = 0x01 (marker information)
+@@ -161,12 +162,12 @@ typedef struct marker {
+ u8 tlv_type_terminator; // = 0x00
+ u8 terminator_length; // = 0x00
+ u8 reserved_90[90]; // = 0
+-} marker_t;
++} bond_marker_t;
+
+-typedef struct marker_header {
++typedef struct bond_marker_header {
+ struct ad_header ad_header;
+- struct marker marker;
+-} marker_header_t;
++ struct bond_marker marker;
++} bond_marker_header_t;
+
+ #pragma pack()
+
+diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
+index 5102c6b..aed4049 100644
+--- a/drivers/scsi/qla4xxx/ql4_fw.h
++++ b/drivers/scsi/qla4xxx/ql4_fw.h
+@@ -744,7 +744,7 @@ struct continuation_t1_entry {
+ #define ET_CONTINUE ET_CONT_T1
+
+ /* Marker entry structure*/
+-struct marker_entry {
++struct qla4_marker_entry {
+ struct qla4_header hdr; /* 00-03 */
+
+ uint32_t system_defined; /* 04-07 */
+diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c
+index 4e532b4..43860eb 100644
+--- a/drivers/scsi/qla4xxx/ql4_iocb.c
++++ b/drivers/scsi/qla4xxx/ql4_iocb.c
+@@ -65,7 +65,8 @@ int qla4xxx_get_req_pkt(struct scsi_qla_host *ha,
+ int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha,
+ struct ddb_entry *ddb_entry, int lun)
+ {
+- struct marker_entry *marker_entry;
++ struct qla4_marker_entry *marker_entry;
++
+ unsigned long flags = 0;
+ uint8_t status = QLA_SUCCESS;
+
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index b9e964c..a175f1e 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -9,12 +9,21 @@
+ /* Align . to a 8 byte boundary equals to maximum function alignment. */
+ #define ALIGN_FUNCTION() . = ALIGN(8)
+
++/* Kernel markers : pointers */
++#define MARKER \
++ . = ALIGN(8); \
++ VMLINUX_SYMBOL(__start___markers) = .; \
++ *(__markers) \
++ VMLINUX_SYMBOL(__stop___markers) = .;
++
++
+ #define RODATA \
+ . = ALIGN(4096); \
+ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start_rodata) = .; \
+ *(.rodata) *(.rodata.*) \
+ *(__vermagic) /* Kernel version magic */ \
++ *(__markers_strings) /* Markers: strings */ \
+ } \
+ \
+ .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
+diff --git a/include/linux/marker.h b/include/linux/marker.h
+new file mode 100644
+index 0000000..efbc82b
+--- /dev/null
++++ b/include/linux/marker.h
+@@ -0,0 +1,139 @@
++#ifndef _LINUX_MARKER_H
++#define _LINUX_MARKER_H
++
++/*
++ * Code markup for dynamic and static tracing.
++ *
++ * See Documentation/marker.txt.
++ *
++ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
++ *
++ * This file is released under the GPLv2.
++ * See the file COPYING for more details.
++ */
++
++#include <linux/types.h>
++
++struct module;
++struct marker;
++
++/**
++ * marker_probe_func - Type of a marker probe function
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @args: variable argument list pointer. Use a pointer to overcome C's
++ * inability to pass this around as a pointer in a portable manner in
++ * the callee otherwise.
++ *
++ * Type of marker probe functions. They receive the mdata and need to parse the
++ * format string to recover the variable argument list.
++ */
++typedef void marker_probe_func(void *probe_private, void *call_private,
++ const char *fmt, va_list *args);
++
++struct marker_probe_closure {
++ marker_probe_func *func; /* Callback */
++ void *probe_private; /* Private probe data */
++};
++
++struct marker {
++ const char *name; /* Marker name */
++ const char *format; /* Marker format string, describing the
++ * variable argument list.
++ */
++ char state; /* Marker state. */
++ char ptype; /* probe type : 0 : single, 1 : multi */
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++} __attribute__((aligned(8)));
++
++#ifdef CONFIG_MARKERS
++
++/*
++ * Note : the empty asm volatile with read constraint is used here instead of a
++ * "used" attribute to fix a gcc 4.1.x bug.
++ * Make sure the alignment of the structure in the __markers section will
++ * not add unwanted padding between the beginning of the section and the
++ * structure. Force alignment to the same alignment as the section start.
++ */
++#define __trace_mark(name, call_private, format, args...) \
++ do { \
++ static const char __mstrtab_##name[] \
++ __attribute__((section("__markers_strings"))) \
++ = #name "\0" format; \
++ static struct marker __mark_##name \
++ __attribute__((section("__markers"), aligned(8))) = \
++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
++ 0, 0, marker_probe_cb, \
++ { __mark_empty_function, NULL}, NULL }; \
++ __mark_check_format(format, ## args); \
++ if (unlikely(__mark_##name.state)) { \
++ (*__mark_##name.call) \
++ (&__mark_##name, call_private, \
++ format, ## args); \
++ } \
++ } while (0)
++
++extern void marker_update_probe_range(struct marker *begin,
++ struct marker *end);
++#else /* !CONFIG_MARKERS */
++#define __trace_mark(name, call_private, format, args...) \
++ __mark_check_format(format, ## args)
++static inline void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{ }
++#endif /* CONFIG_MARKERS */
++
++/**
++ * trace_mark - Marker
++ * @name: marker name, not quoted.
++ * @format: format string
++ * @args...: variable argument list
++ *
++ * Places a marker.
++ */
++#define trace_mark(name, format, args...) \
++ __trace_mark(name, NULL, format, ## args)
++
++/**
++ * MARK_NOARGS - Format string for a marker with no argument.
++ */
++#define MARK_NOARGS " "
++
++/* To be used for string format validity checking with gcc */
++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...)
++{
++}
++
++extern marker_probe_func __mark_empty_function;
++
++extern void marker_probe_cb(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++extern void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++
++/*
++ * Connect a probe to a marker.
++ * private data pointer must be a valid allocated memory address, or NULL.
++ */
++extern int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private);
++
++/*
++ * Returns the private data given to marker_probe_register.
++ */
++extern int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private);
++/*
++ * Unregister a marker by providing the registered private data.
++ */
++extern int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private);
++
++extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num);
++
++#endif
+diff --git a/include/linux/module.h b/include/linux/module.h
+index c458418..627afb9 100644
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -17,6 +17,7 @@
+ #include <linux/stringify.h>
+ #include <linux/kobject.h>
+ #include <linux/moduleparam.h>
++#include <linux/marker.h>
+ #include <asm/local.h>
+
+ #include <asm/module.h>
+@@ -348,6 +349,10 @@ struct module
+ /* The command line arguments (may be mangled). People like
+ keeping pointers to this stuff */
+ char *args;
++#ifdef CONFIG_MARKERS
++ struct marker *markers;
++ unsigned int num_markers;
++#endif
+ };
+
+ /* FIXME: It'd be nice to isolate modules during init, too, so they
+@@ -467,6 +472,7 @@ int register_module_notifier(struct notifier_block * nb);
+ int unregister_module_notifier(struct notifier_block * nb);
+
+ extern void print_modules(void);
++extern void module_update_markers(void);
+
+ struct device_driver;
+ void module_add_driver(struct module *, struct device_driver *);
+@@ -569,6 +575,10 @@ static inline void print_modules(void)
+ {
+ }
+
++static inline void module_update_markers(void)
++{
++}
++
+ struct device_driver;
+ struct module;
+
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index b4ca73d..5f12d1b 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -42,6 +42,19 @@
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+
++/*
++ * Prevent the compiler from merging or refetching accesses. The compiler
++ * is also forbidden from reordering successive instances of ACCESS_ONCE(),
++ * but only when the compiler is aware of some particular ordering. One way
++ * to make the compiler aware of ordering is to put the two invocations of
++ * ACCESS_ONCE() in different C statements.
++ *
++ * This macro does absolutely -nothing- to prevent the CPU from reordering,
++ * merging, or refetching absolutely anything at any time.
++ *
++ */
++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
++
+ /**
+ * struct rcu_head - callback structure for use with RCU
+ * @next: next update requests in a list
+diff --git a/kernel/Makefile b/kernel/Makefile
+index ed4af9c..b4ad7a7 100644
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -50,6 +50,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+ obj-$(CONFIG_RELAY) += relay.o
+ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
+ obj-$(CONFIG_TASKSTATS) += taskstats.o
++obj-$(CONFIG_MARKERS) += marker.o
+ obj-$(CONFIG_UTRACE) += utrace.o
+
+ ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
+diff --git a/kernel/marker.c b/kernel/marker.c
+new file mode 100644
+index 0000000..c4c2cd8
+--- /dev/null
++++ b/kernel/marker.c
+@@ -0,0 +1,851 @@
++/*
++ * Copyright (C) 2007 Mathieu Desnoyers
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/types.h>
++#include <linux/jhash.h>
++#include <linux/list.h>
++#include <linux/rcupdate.h>
++#include <linux/marker.h>
++#include <linux/err.h>
++
++extern struct marker __start___markers[];
++extern struct marker __stop___markers[];
++
++/* Set to 1 to enable marker debug output */
++const int marker_debug;
++
++/*
++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
++ * and module markers and the hash table.
++ */
++static DEFINE_MUTEX(markers_mutex);
++
++/*
++ * Marker hash table, containing the active markers.
++ * Protected by module_mutex.
++ */
++#define MARKER_HASH_BITS 6
++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
++
++/*
++ * Note about RCU :
++ * It is used to make sure every handler has finished using its private data
++ * between two consecutive operation (add or remove) on a given marker. It is
++ * also used to delay the free of multiple probes array until a quiescent state
++ * is reached.
++ * marker entries modifications are protected by the markers_mutex.
++ */
++struct marker_entry {
++ struct hlist_node hlist;
++ char *format;
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++ int refcount; /* Number of times armed. 0 if disarmed. */
++ struct rcu_head rcu;
++ void *oldptr;
++ char rcu_pending:1;
++ char ptype:1;
++ char name[0]; /* Contains name'\0'format'\0' */
++};
++
++static struct hlist_head marker_table[MARKER_TABLE_SIZE];
++
++/**
++ * __mark_empty_function - Empty probe callback
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @...: variable argument list
++ *
++ * Empty callback provided as a probe to the markers. By providing this to a
++ * disabled marker, we make sure the execution flow is always valid even
++ * though the function pointer change and the marker enabling are two distinct
++ * operations that modifies the execution flow of preemptible code.
++ */
++void __mark_empty_function(void *probe_private, void *call_private,
++ const char *fmt, va_list *args)
++{
++}
++EXPORT_SYMBOL_GPL(__mark_empty_function);
++
++/*
++ * marker_probe_cb Callback that prepares the variable argument list for probes.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
++ * need to put a full smp_rmb() in this branch. This is why we do not use
++ * rcu_dereference() for the pointer read.
++ */
++void marker_probe_cb(const struct marker *mdata, void *call_private,
++ const char *fmt, ...)
++{
++ va_list args;
++ char ptype;
++
++ /*
++ * disabling preemption to make sure the teardown of the callbacks can
++ * be done correctly when they are in modules and they insure RCU read
++ * coherency.
++ */
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ va_start(args, fmt);
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ va_end(args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++) {
++ va_start(args, fmt);
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ va_end(args);
++ }
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb);
++
++/*
++ * marker_probe_cb Callback that does not prepare the variable argument list.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Should be connected to markers "MARK_NOARGS".
++ */
++void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...)
++{
++ va_list args; /* not initialized */
++ char ptype;
++
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++)
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
++
++static void free_old_closure(struct rcu_head *head)
++{
++ struct marker_entry *entry = container_of(head,
++ struct marker_entry, rcu);
++ kfree(entry->oldptr);
++ /* Make sure we free the data before setting the pending flag to 0 */
++ smp_wmb();
++ entry->rcu_pending = 0;
++}
++
++static void debug_print_probes(struct marker_entry *entry)
++{
++ int i;
++
++ if (!marker_debug)
++ return;
++
++ if (!entry->ptype) {
++ printk(KERN_DEBUG "Single probe : %p %p\n",
++ entry->single.func,
++ entry->single.probe_private);
++ } else {
++ for (i = 0; entry->multi[i].func; i++)
++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
++ entry->multi[i].func,
++ entry->multi[i].probe_private);
++ }
++}
++
++static struct marker_probe_closure *
++marker_entry_add_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0;
++ struct marker_probe_closure *old, *new;
++
++ WARN_ON(!probe);
++
++ debug_print_probes(entry);
++ old = entry->multi;
++ if (!entry->ptype) {
++ if (entry->single.func == probe &&
++ entry->single.probe_private == probe_private)
++ return ERR_PTR(-EBUSY);
++ if (entry->single.func == __mark_empty_function) {
++ /* 0 -> 1 probes */
++ entry->single.func = probe;
++ entry->single.probe_private = probe_private;
++ entry->refcount = 1;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* 1 -> 2 probes */
++ nr_probes = 1;
++ old = NULL;
++ }
++ } else {
++ /* (N -> N+1), (N != 0, 1) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++)
++ if (old[nr_probes].func == probe
++ && old[nr_probes].probe_private
++ == probe_private)
++ return ERR_PTR(-EBUSY);
++ }
++ /* + 2 : one for new probe, one for NULL func */
++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
++ GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ if (!old)
++ new[0] = entry->single;
++ else
++ memcpy(new, old,
++ nr_probes * sizeof(struct marker_probe_closure));
++ new[nr_probes].func = probe;
++ new[nr_probes].probe_private = probe_private;
++ entry->refcount = nr_probes + 1;
++ entry->multi = new;
++ entry->ptype = 1;
++ debug_print_probes(entry);
++ return old;
++}
++
++static struct marker_probe_closure *
++marker_entry_remove_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0, nr_del = 0, i;
++ struct marker_probe_closure *old, *new;
++
++ old = entry->multi;
++
++ debug_print_probes(entry);
++ if (!entry->ptype) {
++ /* 0 -> N is an error */
++ WARN_ON(entry->single.func == __mark_empty_function);
++ /* 1 -> 0 probes */
++ WARN_ON(probe && entry->single.func != probe);
++ WARN_ON(entry->single.probe_private != probe_private);
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* (N -> M), (N > 1, M >= 0) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
++ if ((!probe || old[nr_probes].func == probe)
++ && old[nr_probes].probe_private
++ == probe_private)
++ nr_del++;
++ }
++ }
++
++ if (nr_probes - nr_del == 0) {
++ /* N -> 0, (N > 1) */
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ } else if (nr_probes - nr_del == 1) {
++ /* N -> 1, (N > 1) */
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ entry->single = old[i];
++ entry->refcount = 1;
++ entry->ptype = 0;
++ } else {
++ int j = 0;
++ /* N -> M, (N > 1, M > 1) */
++ /* + 1 for NULL */
++ new = kzalloc((nr_probes - nr_del + 1)
++ * sizeof(struct marker_probe_closure), GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ new[j++] = old[i];
++ entry->refcount = nr_probes - nr_del;
++ entry->ptype = 1;
++ entry->multi = new;
++ }
++ debug_print_probes(entry);
++ return old;
++}
++
++/*
++ * Get marker if the marker is present in the marker hash table.
++ * Must be called with markers_mutex held.
++ * Returns NULL if not present.
++ */
++static struct marker_entry *get_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ u32 hash = jhash(name, strlen(name), 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name))
++ return e;
++ }
++ return NULL;
++}
++
++/*
++ * Add the marker to the marker hash table. Must be called with markers_mutex
++ * held.
++ */
++static struct marker_entry *add_marker(const char *name, const char *format)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ size_t format_len = 0;
++ u32 hash = jhash(name, name_len-1, 0);
++
++ if (format)
++ format_len = strlen(format) + 1;
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ printk(KERN_NOTICE
++ "Marker %s busy\n", name);
++ return ERR_PTR(-EBUSY); /* Already there */
++ }
++ }
++ /*
++ * Using kmalloc here to allocate a variable length element. Could
++ * cause some memory fragmentation if overused.
++ */
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return ERR_PTR(-ENOMEM);
++ memcpy(&e->name[0], name, name_len);
++ if (format) {
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ } else {
++ e->format = NULL;
++ e->call = marker_probe_cb;
++ }
++ e->single.func = __mark_empty_function;
++ e->single.probe_private = NULL;
++ e->multi = NULL;
++ e->ptype = 0;
++ e->refcount = 0;
++ e->rcu_pending = 0;
++ hlist_add_head(&e->hlist, head);
++ return e;
++}
++
++/*
++ * Remove the marker from the marker hash table. Must be called with mutex_lock
++ * held.
++ */
++static int remove_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ int found = 0;
++ size_t len = strlen(name) + 1;
++ u32 hash = jhash(name, len-1, 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ found = 1;
++ break;
++ }
++ }
++ if (!found)
++ return -ENOENT;
++ if (e->single.func != __mark_empty_function)
++ return -EBUSY;
++ hlist_del(&e->hlist);
++ /* Make sure the call_rcu has been executed */
++ if (e->rcu_pending)
++ rcu_barrier();
++ kfree(e);
++ return 0;
++}
++
++/*
++ * Set the mark_entry format to the format found in the element.
++ */
++static int marker_set_format(struct marker_entry **entry, const char *format)
++{
++ struct marker_entry *e;
++ size_t name_len = strlen((*entry)->name) + 1;
++ size_t format_len = strlen(format) + 1;
++
++
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return -ENOMEM;
++ memcpy(&e->name[0], (*entry)->name, name_len);
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ e->single = (*entry)->single;
++ e->multi = (*entry)->multi;
++ e->ptype = (*entry)->ptype;
++ e->refcount = (*entry)->refcount;
++ e->rcu_pending = 0;
++ hlist_add_before(&e->hlist, &(*entry)->hlist);
++ hlist_del(&(*entry)->hlist);
++ /* Make sure the call_rcu has been executed */
++ if ((*entry)->rcu_pending)
++ rcu_barrier();
++ kfree(*entry);
++ *entry = e;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ return 0;
++}
++
++/*
++ * Sets the probe callback corresponding to one marker.
++ */
++static int set_marker(struct marker_entry **entry, struct marker *elem,
++ int active)
++{
++ int ret;
++ WARN_ON(strcmp((*entry)->name, elem->name) != 0);
++
++ if ((*entry)->format) {
++ if (strcmp((*entry)->format, elem->format) != 0) {
++ printk(KERN_NOTICE
++ "Format mismatch for probe %s "
++ "(%s), marker (%s)\n",
++ (*entry)->name,
++ (*entry)->format,
++ elem->format);
++ return -EPERM;
++ }
++ } else {
++ ret = marker_set_format(entry, elem->format);
++ if (ret)
++ return ret;
++ }
++
++ /*
++ * probe_cb setup (statically known) is done here. It is
++ * asynchronous with the rest of execution, therefore we only
++ * pass from a "safe" callback (with argument) to an "unsafe"
++ * callback (does not set arguments).
++ */
++ elem->call = (*entry)->call;
++ /*
++ * Sanity check :
++ * We only update the single probe private data when the ptr is
++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
++ */
++ WARN_ON(elem->single.func != __mark_empty_function
++ && elem->single.probe_private
++ != (*entry)->single.probe_private &&
++ !elem->ptype);
++ elem->single.probe_private = (*entry)->single.probe_private;
++ /*
++ * Make sure the private data is valid when we update the
++ * single probe ptr.
++ */
++ smp_wmb();
++ elem->single.func = (*entry)->single.func;
++ /*
++ * We also make sure that the new probe callbacks array is consistent
++ * before setting a pointer to it.
++ */
++ rcu_assign_pointer(elem->multi, (*entry)->multi);
++ /*
++ * Update the function or multi probe array pointer before setting the
++ * ptype.
++ */
++ smp_wmb();
++ elem->ptype = (*entry)->ptype;
++ elem->state = active;
++
++ return 0;
++}
++
++/*
++ * Disable a marker and its probe callback.
++ * Note: only after a synchronize_sched() issued after setting elem->call to the
++ * empty function insures that the original callback is not used anymore. This
++ * insured by preemption disabling around the call site.
++ */
++static void disable_marker(struct marker *elem)
++{
++ /* leave "call" as is. It is known statically. */
++ elem->state = 0;
++ elem->single.func = __mark_empty_function;
++ /* Update the function before setting the ptype */
++ smp_wmb();
++ elem->ptype = 0; /* single probe */
++ /*
++ * Leave the private data and id there, because removal is racy and
++ * should be done only after a synchronize_sched(). These are never used
++ * until the next initialization anyway.
++ */
++}
++
++/**
++ * marker_update_probe_range - Update a probe range
++ * @begin: beginning of the range
++ * @end: end of the range
++ *
++ * Updates the probe callback corresponding to a range of markers.
++ */
++void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{
++ struct marker *iter;
++ struct marker_entry *mark_entry;
++
++ mutex_lock(&markers_mutex);
++ for (iter = begin; iter < end; iter++) {
++ mark_entry = get_marker(iter->name);
++ if (mark_entry) {
++ set_marker(&mark_entry, iter,
++ !!mark_entry->refcount);
++ /*
++ * ignore error, continue
++ */
++ } else {
++ disable_marker(iter);
++ }
++ }
++ mutex_unlock(&markers_mutex);
++}
++
++/*
++ * Update probes, removing the faulty probes.
++ * Issues a synchronize_sched() when no reference to the module passed
++ * as parameter is found in the probes so the probe module can be
++ * safely unloaded from now on.
++ *
++ * Internal callback only changed before the first probe is connected to it.
++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
++ * transitions. All other transitions will leave the old private data valid.
++ * This makes the non-atomicity of the callback/private data updates valid.
++ *
++ * "special case" updates :
++ * 0 -> 1 callback
++ * 1 -> 0 callback
++ * 1 -> 2 callbacks
++ * 2 -> 1 callbacks
++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
++ * Site effect : marker_set_format may delete the marker entry (creating a
++ * replacement).
++ */
++static void marker_update_probes(void)
++{
++ /* Core kernel markers */
++ marker_update_probe_range(__start___markers, __stop___markers);
++ /* Markers in modules. */
++ module_update_markers();
++}
++
++/**
++ * marker_probe_register - Connect a probe to a marker
++ * @name: marker name
++ * @format: format string
++ * @probe: probe handler
++ * @probe_private: probe private data
++ *
++ * private data must be a valid allocated memory address, or NULL.
++ * Returns 0 if ok, error value on error.
++ * The probe address must at least be aligned on the architecture pointer size.
++ */
++int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ entry = add_marker(name, format);
++ if (IS_ERR(entry)) {
++ ret = PTR_ERR(entry);
++ goto end;
++ }
++ }
++ /*
++ * If we detect that a call_rcu is pending for this marker,
++ * make sure it's executed now.
++ */
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_add_probe(entry, probe, probe_private);
++ if (IS_ERR(old)) {
++ ret = PTR_ERR(old);
++ goto end;
++ }
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_register);
++
++/**
++ * marker_probe_unregister - Disconnect a probe from a marker
++ * @name: marker name
++ * @probe: probe function pointer
++ * @probe_private: probe private data
++ *
++ * Returns the private data given to marker_probe_register, or an ERR_PTR().
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ struct marker_probe_closure *old;
++ int ret = 0;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, probe, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister);
++
++static struct marker_entry *
++get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ unsigned int i;
++ struct hlist_head *head;
++ struct hlist_node *node;
++
++ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
++ head = &marker_table[i];
++ hlist_for_each_entry(entry, node, head, hlist) {
++ if (!entry->ptype) {
++ if (entry->single.func == probe
++ && entry->single.probe_private
++ == probe_private)
++ return entry;
++ } else {
++ struct marker_probe_closure *closure;
++ closure = entry->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func == probe &&
++ closure[i].probe_private
++ == probe_private)
++ return entry;
++ }
++ }
++ }
++ }
++ return NULL;
++}
++
++/**
++ * marker_probe_unregister_private_data - Disconnect a probe from a marker
++ * @probe: probe function
++ * @probe_private: probe private data
++ *
++ * Unregister a probe by providing the registered private data.
++ * Only removes the first marker found in hash table.
++ * Return 0 on success or error value.
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, NULL, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(entry->name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
++
++/**
++ * marker_get_private_data - Get a marker's probe private data
++ * @name: marker name
++ * @probe: probe to match
++ * @num: get the nth matching probe's private data
++ *
++ * Returns the nth private data pointer (starting from 0) matching, or an
++ * ERR_PTR.
++ * Returns the private data pointer, or an ERR_PTR.
++ * The private data pointer should _only_ be dereferenced if the caller is the
++ * owner of the data, or its content could vanish. This is mostly used to
++ * confirm that a caller is the owner of a registered probe.
++ */
++void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ u32 hash = jhash(name, name_len-1, 0);
++ int i;
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ if (!e->ptype) {
++ if (num == 0 && e->single.func == probe)
++ return e->single.probe_private;
++ else
++ break;
++ } else {
++ struct marker_probe_closure *closure;
++ int match = 0;
++ closure = e->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func != probe)
++ continue;
++ if (match++ == num)
++ return closure[i].probe_private;
++ }
++ }
++ }
++ }
++ return ERR_PTR(-ENOENT);
++}
++EXPORT_SYMBOL_GPL(marker_get_private_data);
+diff --git a/kernel/module.c b/kernel/module.c
+index f9a5987..ac99222 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -1488,6 +1488,8 @@ static struct module *load_module(void __user *umod,
+ unsigned int unusedcrcindex;
+ unsigned int unusedgplindex;
+ unsigned int unusedgplcrcindex;
++ unsigned int markersindex;
++ unsigned int markersstringsindex;
+ struct module *mod;
+ long err = 0;
+ void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+@@ -1756,6 +1758,9 @@ static struct module *load_module(void __user *umod,
+ add_taint(TAINT_FORCED_MODULE);
+ }
+ #endif
++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
++ markersstringsindex = find_sec(hdr, sechdrs, secstrings,
++ "__markers_strings");
+
+ /* Now do relocations. */
+ for (i = 1; i < hdr->e_shnum; i++) {
+@@ -1778,6 +1783,11 @@ static struct module *load_module(void __user *umod,
+ if (err < 0)
+ goto cleanup;
+ }
++#ifdef CONFIG_MARKERS
++ mod->markers = (void *)sechdrs[markersindex].sh_addr;
++ mod->num_markers =
++ sechdrs[markersindex].sh_size / sizeof(*mod->markers);
++#endif
+
+ /* Find duplicate symbols */
+ err = verify_export_symbols(mod);
+@@ -1796,6 +1806,11 @@ static struct module *load_module(void __user *umod,
+
+ add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+
++#ifdef CONFIG_MARKERS
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++#endif
+ err = module_finalize(hdr, sechdrs, mod);
+ if (err < 0)
+ goto cleanup;
+@@ -2259,3 +2274,18 @@ EXPORT_SYMBOL(module_remove_driver);
+ void struct_module(struct module *mod) { return; }
+ EXPORT_SYMBOL(struct_module);
+ #endif
++
++#ifdef CONFIG_MARKERS
++void module_update_markers(void)
++{
++ struct module *mod;
++
++ mutex_lock(&module_mutex);
++ list_for_each_entry(mod, &modules, list)
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++ mutex_unlock(&module_mutex);
++}
++#endif
++
+diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
+index 0a64688..886b70c 100644
+--- a/scripts/Makefile.modpost
++++ b/scripts/Makefile.modpost
+@@ -13,6 +13,7 @@
+ # 2) modpost is then used to
+ # 3) create one <module>.mod.c file pr. module
+ # 4) create one Module.symvers file with CRC for all exported symbols
++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
+ # 5) compile all <module>.mod.c files
+ # 6) final link of the module to a <module.ko> file
+
+@@ -41,6 +42,10 @@ include scripts/Makefile.lib
+
+ kernelsymfile := $(objtree)/Module.symvers
+ modulesymfile := $(KBUILD_EXTMOD)/Module.symvers
++kernelmarkersfile := $(objtree)/Module.markers
++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
++
++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
+
+ # Step 1), find all modules listed in $(MODVERDIR)/
+ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
+@@ -58,16 +63,27 @@ quiet_cmd_modpost = MODPOST
+ $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \
+ $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \
+ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \
++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \
+ $(filter-out FORCE,$^)
+
+ PHONY += __modpost
+ __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE
+ $(call cmd,modpost)
+
++quiet_cmd_kernel-mod = MODPOST $@
++ cmd_kernel-mod = $(cmd_modpost) $@
++
++vmlinux.o: FORCE
++ $(call cmd,kernel-mod)
++
+ # Declare generated files as targets for modpost
+ $(symverfile): __modpost ;
+ $(modules:.ko=.mod.c): __modpost ;
+
++ifdef CONFIG_MARKERS
++$(markersfile): __modpost ;
++endif
+
+ # Step 5), compile all *.mod.c files
+
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index dfde0e8..81e5910 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -10,7 +10,8 @@
+ *
+ * Usage: modpost vmlinux module1.o module2.o ...
+ */
+-
++#define _GNU_SOURCE
++#include <stdio.h>
+ #include <ctype.h>
+ #include "modpost.h"
+ #include "../../include/linux/license.h"
+@@ -385,6 +386,8 @@ static void parse_elf(struct elf_info *info, const char *filename)
+ info->export_unused_gpl_sec = i;
+ else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
+ info->export_gpl_future_sec = i;
++ else if (strcmp(secname, "__markers_strings") == 0)
++ info->markers_strings_sec = i;
+
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+@@ -859,6 +862,62 @@ static void check_sec_ref(struct module *mod, const char *modname,
+ }
+ }
+
++static void get_markers(struct elf_info *info, struct module *mod)
++{
++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
++ const char *strings = (const char *) info->hdr + sh->sh_offset;
++ const Elf_Sym *sym, *first_sym, *last_sym;
++ size_t n;
++
++ if (!info->markers_strings_sec)
++ return;
++
++ /*
++ * First count the strings. We look for all the symbols defined
++ * in the __markers_strings section named __mstrtab_*. For
++ * these local names, the compiler puts a random .NNN suffix on,
++ * so the names don't correspond exactly.
++ */
++ first_sym = last_sym = NULL;
++ n = 0;
++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ if (first_sym == NULL)
++ first_sym = sym;
++ last_sym = sym;
++ ++n;
++ }
++
++ if (n == 0)
++ return;
++
++ /*
++ * Now collect each name and format into a line for the output.
++ * Lines look like:
++ * marker_name vmlinux marker %s format %d
++ * The format string after the second \t can use whitespace.
++ */
++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
++ mod->nmarkers = n;
++
++ n = 0;
++ for (sym = first_sym; sym <= last_sym; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ const char *name = strings + sym->st_value;
++ const char *fmt = strchr(name, '\0') + 1;
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++ mod->markers[n++] = line;
++ }
++}
++
+ /**
+ * Functions used only during module init is marked __init and is stored in
+ * a .init.text section. Likewise data is marked __initdata and stored in
+@@ -1061,6 +1120,8 @@ static void read_symbols(char *modname)
+ get_src_version(modname, mod->srcversion,
+ sizeof(mod->srcversion)-1);
+
++ get_markers(&info, mod);
++
+ parse_elf_finish(&info);
+
+ /* Our trick to get versioning for struct_module - it's
+@@ -1394,6 +1455,93 @@ static void write_dump(const char *fname)
+ write_if_changed(&buf, fname);
+ }
+
++static void add_marker(struct module *mod, const char *name, const char *fmt)
++{
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++
++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
++ sizeof mod->markers[0])));
++ mod->markers[mod->nmarkers++] = line;
++}
++
++static void read_markers(const char *fname)
++{
++ unsigned long size, pos = 0;
++ void *file = grab_file(fname, &size);
++ char *line;
++
++ if (!file) /* No old markers, silently ignore */
++ return;
++
++ while ((line = get_next_line(&pos, file, size))) {
++ char *marker, *modname, *fmt;
++ struct module *mod;
++
++ marker = line;
++ modname = strchr(marker, '\t');
++ if (!modname)
++ goto fail;
++ *modname++ = '\0';
++ fmt = strchr(modname, '\t');
++ if (!fmt)
++ goto fail;
++ *fmt++ = '\0';
++ if (*marker == '\0' || *modname == '\0')
++ goto fail;
++
++ mod = find_module(modname);
++ if (!mod) {
++ if (is_vmlinux(modname))
++ have_vmlinux = 1;
++ mod = new_module(NOFAIL(strdup(modname)));
++ mod->skip = 1;
++ }
++
++ add_marker(mod, marker, fmt);
++ }
++ return;
++fail:
++ fatal("parse error in markers list file\n");
++}
++
++static int compare_strings(const void *a, const void *b)
++{
++ return strcmp(*(const char **) a, *(const char **) b);
++}
++
++static void write_markers(const char *fname)
++{
++ struct buffer buf = { };
++ struct module *mod;
++ size_t i;
++
++ for (mod = modules; mod; mod = mod->next)
++ if ((!external_module || !mod->skip) && mod->markers != NULL) {
++ /*
++ * Sort the strings so we can skip duplicates when
++ * we write them out.
++ */
++ qsort(mod->markers, mod->nmarkers,
++ sizeof mod->markers[0], &compare_strings);
++ for (i = 0; i < mod->nmarkers; ++i) {
++ char *line = mod->markers[i];
++ buf_write(&buf, line, strlen(line));
++ while (i + 1 < mod->nmarkers &&
++ !strcmp(mod->markers[i],
++ mod->markers[i + 1]))
++ free(mod->markers[i++]);
++ free(mod->markers[i]);
++ }
++ free(mod->markers);
++ mod->markers = NULL;
++ }
++
++ write_if_changed(&buf, fname);
++}
++
++
+ int main(int argc, char **argv)
+ {
+ struct module *mod;
+@@ -1402,8 +1550,10 @@ int main(int argc, char **argv)
+ char *kernel_read = NULL, *module_read = NULL;
+ char *dump_write = NULL;
+ int opt;
++ char *markers_read = NULL;
++ char *markers_write = NULL;
+
+- while ((opt = getopt(argc, argv, "i:I:mo:a")) != -1) {
++ while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) {
+ switch(opt) {
+ case 'i':
+ kernel_read = optarg;
+@@ -1421,6 +1571,12 @@ int main(int argc, char **argv)
+ case 'a':
+ all_versions = 1;
+ break;
++ case 'M':
++ markers_write = optarg;
++ break;
++ case 'K':
++ markers_read = optarg;
++ break;
+ default:
+ exit(1);
+ }
+@@ -1460,5 +1616,11 @@ int main(int argc, char **argv)
+ if (dump_write)
+ write_dump(dump_write);
+
++ if (markers_read)
++ read_markers(markers_read);
++
++ if (markers_write)
++ write_markers(markers_write);
++
+ return 0;
+ }
+diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
+index d398c61..27b05e6 100644
+--- a/scripts/mod/modpost.h
++++ b/scripts/mod/modpost.h
+@@ -107,6 +107,8 @@ struct module {
+ int has_init;
+ int has_cleanup;
+ struct buffer dev_table_buf;
++ char **markers;
++ size_t nmarkers;
+ char srcversion[25];
+ };
+
+@@ -121,6 +123,7 @@ struct elf_info {
+ Elf_Section export_gpl_sec;
+ Elf_Section export_unused_gpl_sec;
+ Elf_Section export_gpl_future_sec;
++ Elf_Section markers_strings_sec;
+ const char *strtab;
+ char *modinfo;
+ unsigned int modinfo_len;
Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch
===================================================================
--- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch (rev 0)
+++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.18.el5u1.patch 2008-07-04 02:18:23 UTC (rev 6)
@@ -0,0 +1,1882 @@
+diff --git a/Makefile b/Makefile
+index 93bffdf..59693ac 100644
+--- a/Makefile
++++ b/Makefile
+@@ -603,7 +603,7 @@ quiet_cmd_vmlinux__ ?= LD $@
+ cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
+ -T $(vmlinux-lds) $(vmlinux-init) \
+ --start-group $(vmlinux-main) --end-group \
+- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^)
++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^)
+
+ # Generate new vmlinux version
+ quiet_cmd_vmlinux_version = GEN .version
+@@ -727,11 +727,30 @@ debug_kallsyms: .tmp_map$(last_kallsyms)
+
+ endif # ifdef CONFIG_KALLSYMS
+
++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has
++# relevant sections renamed as per the linker script.
++quiet_cmd_vmlinux-modpost = LD $@
++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \
++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \
++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^)
++define rule_vmlinux-modpost
++ :
++ +$(call cmd,vmlinux-modpost)
++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@
++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd
++endef
++
+ # vmlinux image - including updated kernel symbols
+-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE
++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE
++ $(call vmlinux-modpost)
+ $(call if_changed_rule,vmlinux__)
+ $(Q)rm -f .old_version
+
++# build vmlinux.o first to catch section mismatch errors early
++$(kallsyms.o): vmlinux.o
++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE
++ $(call if_changed_rule,vmlinux-modpost)
++
+ # The actual objects are generated when descending,
+ # make sure no implicit rule kicks in
+ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
+diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
+index d04fcc6..ad73af5 100644
+--- a/arch/i386/Kconfig
++++ b/arch/i386/Kconfig
+@@ -1212,6 +1212,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/i386/Kconfig.debug"
+diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
+index 7f4ca6b..3978095 100644
+--- a/arch/i386/kernel/vmlinux.lds.S
++++ b/arch/i386/kernel/vmlinux.lds.S
+@@ -67,6 +67,7 @@ SECTIONS
+ . = ALIGN(4096);
+ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ } :data
+
+diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
+index f916fba..b81a36b 100644
+--- a/arch/ia64/Kconfig
++++ b/arch/ia64/Kconfig
+@@ -591,6 +591,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/ia64/Kconfig.debug"
+diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
+index 69cba94..2c0a5c9 100644
+--- a/arch/ia64/kernel/vmlinux.lds.S
++++ b/arch/ia64/kernel/vmlinux.lds.S
+@@ -207,7 +207,7 @@ SECTIONS
+
+ data : { } :data
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
+- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS }
+
+ . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index 7295252..fb8a2b9 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -1091,6 +1091,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/powerpc/Kconfig.debug"
+diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
+index c02298a..709bc47 100644
+--- a/arch/powerpc/kernel/vmlinux.lds.S
++++ b/arch/powerpc/kernel/vmlinux.lds.S
+@@ -176,11 +176,13 @@ SECTIONS
+ *(.data)
+ *(.sdata)
+ *(.got.plt) *(.got)
++ MARKER
+ }
+ #else
+ .data : {
+ *(.data .data.rel* .toc1)
+ *(.branch_lt)
++ MARKER
+ }
+
+ .opd : {
+diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
+index d121403..d8d276a 100644
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -503,6 +503,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/s390/Kconfig.debug"
+diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
+index af9e69a..8443156 100644
+--- a/arch/s390/kernel/vmlinux.lds.S
++++ b/arch/s390/kernel/vmlinux.lds.S
+@@ -46,6 +46,7 @@ SECTIONS
+
+ .data : { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+
+diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
+index b627f8d..e0467d1 100644
+--- a/arch/sparc64/Kconfig
++++ b/arch/sparc64/Kconfig
+@@ -427,6 +427,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/sparc64/Kconfig.debug"
+diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
+index b097379..1f10e43 100644
+--- a/arch/sparc64/kernel/vmlinux.lds.S
++++ b/arch/sparc64/kernel/vmlinux.lds.S
+@@ -27,6 +27,7 @@ SECTIONS
+ .data :
+ {
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+ .data1 : { *(.data1) }
+diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
+index 651d6cb..0e6570c 100644
+--- a/arch/x86_64/Kconfig
++++ b/arch/x86_64/Kconfig
+@@ -711,6 +711,14 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ endmenu
+
+ source "arch/x86_64/Kconfig.debug"
+diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
+index 57e3255..867fce1 100644
+--- a/arch/x86_64/kernel/vmlinux.lds.S
++++ b/arch/x86_64/kernel/vmlinux.lds.S
+@@ -68,6 +68,7 @@ SECTIONS
+ /* Data */
+ .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ } :data
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 3fb354d..e884942 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -126,7 +126,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator);
+
+ // ================= main 802.3ad protocol functions ==================
+ static int ad_lacpdu_send(struct port *port);
+-static int ad_marker_send(struct port *port, struct marker *marker);
++static int ad_marker_send(struct port *port, struct bond_marker *marker);
+ static void ad_mux_machine(struct port *port);
+ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
+ static void ad_tx_machine(struct port *port);
+@@ -139,8 +139,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast);
+ static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
+ static void ad_enable_collecting_distributing(struct port *port);
+ static void ad_disable_collecting_distributing(struct port *port);
+-static void ad_marker_info_received(struct marker *marker_info, struct port *port);
+-static void ad_marker_response_received(struct marker *marker, struct port *port);
++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
++static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
+
+
+ /////////////////////////////////////////////////////////////////////////////////
+@@ -912,12 +912,13 @@ static int ad_lacpdu_send(struct port *port)
+ * Returns: 0 on success
+ * < 0 on error
+ */
+-static int ad_marker_send(struct port *port, struct marker *marker)
++static int ad_marker_send(struct port *port, struct bond_marker *marker)
+ {
+ struct slave *slave = port->slave;
+ struct sk_buff *skb;
+- struct marker_header *marker_header;
+- int length = sizeof(struct marker_header);
++ struct bond_marker_header *marker_header;
++ int length = sizeof(struct bond_marker_header);
++
+ struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
+
+ skb = dev_alloc_skb(length + 16);
+@@ -932,7 +933,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
+ skb->nh.raw = skb->data + ETH_HLEN;
+ skb->protocol = PKT_TYPE_LACPDU;
+
+- marker_header = (struct marker_header *)skb_put(skb, length);
++ marker_header = (struct bond_marker_header *)skb_put(skb, length);
+
+ marker_header->ad_header.destination_address = lacpdu_multicast_address;
+ /* Note: source addres is set to be the member's PERMANENT address, because we use it
+@@ -1732,7 +1733,7 @@ static void ad_disable_collecting_distributing(struct port *port)
+ */
+ static void ad_marker_info_send(struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+ u16 index;
+
+ // fill the marker PDU with the appropriate values
+@@ -1765,13 +1766,14 @@ static void ad_marker_info_send(struct port *port)
+ * @port: the port we're looking at
+ *
+ */
+-static void ad_marker_info_received(struct marker *marker_info,struct port *port)
++static void ad_marker_info_received(struct bond_marker *marker_info,
++ struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+
+ // copy the received marker data to the response marker
+ //marker = *marker_info;
+- memcpy(&marker, marker_info, sizeof(struct marker));
++ memcpy(&marker, marker_info, sizeof(struct bond_marker));
+ // change the marker subtype to marker response
+ marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
+ // send the marker response
+@@ -1790,7 +1792,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port
+ * response for marker PDU's, in this stage, but only to respond to marker
+ * information.
+ */
+-static void ad_marker_response_received(struct marker *marker, struct port *port)
++static void ad_marker_response_received(struct bond_marker *marker,
++ struct port *port)
+ {
+ marker=NULL; // just to satisfy the compiler
+ port=NULL; // just to satisfy the compiler
+@@ -2187,15 +2190,16 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
+ case AD_TYPE_MARKER:
+ // No need to convert fields to Little Endian since we don't use the marker's fields.
+
+- switch (((struct marker *)lacpdu)->tlv_type) {
++ switch (((struct bond_marker *)lacpdu)->tlv_type) {
+ case AD_MARKER_INFORMATION_SUBTYPE:
+ dprintk("Received Marker Information on port %d\n", port->actor_port_number);
+- ad_marker_info_received((struct marker *)lacpdu, port);
++ ad_marker_info_received((struct bond_marker *)lacpdu, port);
++
+ break;
+
+ case AD_MARKER_RESPONSE_SUBTYPE:
+ dprintk("Received Marker Response on port %d\n", port->actor_port_number);
+- ad_marker_response_received((struct marker *)lacpdu, port);
++ ad_marker_response_received((struct bond_marker *)lacpdu, port);
+ break;
+
+ default:
+diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
+index 6ad5ad6..bf93e7e 100644
+--- a/drivers/net/bonding/bond_3ad.h
++++ b/drivers/net/bonding/bond_3ad.h
+@@ -92,7 +92,8 @@ typedef enum {
+ typedef enum {
+ AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
+ AD_MARKER_RESPONSE_SUBTYPE // marker response subtype
+-} marker_subtype_t;
++} bond_marker_subtype_t;
++
+
+ // timers types(43.4.9 in the 802.3ad standard)
+ typedef enum {
+@@ -148,7 +149,7 @@ typedef struct lacpdu_header {
+ } lacpdu_header_t;
+
+ // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
+-typedef struct marker {
++typedef struct bond_marker {
+ u8 subtype; // = 0x02 (marker PDU)
+ u8 version_number; // = 0x01
+ u8 tlv_type; // = 0x01 (marker information)
+@@ -161,12 +162,12 @@ typedef struct marker {
+ u8 tlv_type_terminator; // = 0x00
+ u8 terminator_length; // = 0x00
+ u8 reserved_90[90]; // = 0
+-} marker_t;
++} bond_marker_t;
+
+-typedef struct marker_header {
++typedef struct bond_marker_header {
+ struct ad_header ad_header;
+- struct marker marker;
+-} marker_header_t;
++ struct bond_marker marker;
++} bond_marker_header_t;
+
+ #pragma pack()
+
+diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
+index 8c6f9b6..efc3d79 100644
+--- a/drivers/scsi/qla4xxx/ql4_fw.h
++++ b/drivers/scsi/qla4xxx/ql4_fw.h
+@@ -671,7 +671,7 @@ struct continuation_t1_entry {
+ #define ET_CONTINUE ET_CONT_T1
+
+ /* Marker entry structure*/
+-struct marker_entry {
++struct qla4_marker_entry {
+ struct qla4_header hdr; /* 00-03 */
+
+ uint32_t system_defined; /* 04-07 */
+diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c
+index d45e4d0..b85a484 100644
+--- a/drivers/scsi/qla4xxx/ql4_iocb.c
++++ b/drivers/scsi/qla4xxx/ql4_iocb.c
+@@ -70,7 +70,8 @@ int qla4xxx_get_req_pkt(struct scsi_qla_host *ha,
+ int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha,
+ struct ddb_entry *ddb_entry, int lun)
+ {
+- struct marker_entry *marker_entry;
++ struct qla4_marker_entry *marker_entry;
++
+ unsigned long flags = 0;
+ uint8_t status = QLA_SUCCESS;
+
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index b9e964c..a175f1e 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -9,12 +9,21 @@
+ /* Align . to a 8 byte boundary equals to maximum function alignment. */
+ #define ALIGN_FUNCTION() . = ALIGN(8)
+
++/* Kernel markers : pointers */
++#define MARKER \
++ . = ALIGN(8); \
++ VMLINUX_SYMBOL(__start___markers) = .; \
++ *(__markers) \
++ VMLINUX_SYMBOL(__stop___markers) = .;
++
++
+ #define RODATA \
+ . = ALIGN(4096); \
+ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start_rodata) = .; \
+ *(.rodata) *(.rodata.*) \
+ *(__vermagic) /* Kernel version magic */ \
++ *(__markers_strings) /* Markers: strings */ \
+ } \
+ \
+ .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
+diff --git a/include/linux/marker.h b/include/linux/marker.h
+new file mode 100644
+index 0000000..efbc82b
+--- /dev/null
++++ b/include/linux/marker.h
+@@ -0,0 +1,139 @@
++#ifndef _LINUX_MARKER_H
++#define _LINUX_MARKER_H
++
++/*
++ * Code markup for dynamic and static tracing.
++ *
++ * See Documentation/marker.txt.
++ *
++ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
++ *
++ * This file is released under the GPLv2.
++ * See the file COPYING for more details.
++ */
++
++#include <linux/types.h>
++
++struct module;
++struct marker;
++
++/**
++ * marker_probe_func - Type of a marker probe function
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @args: variable argument list pointer. Use a pointer to overcome C's
++ * inability to pass this around as a pointer in a portable manner in
++ * the callee otherwise.
++ *
++ * Type of marker probe functions. They receive the mdata and need to parse the
++ * format string to recover the variable argument list.
++ */
++typedef void marker_probe_func(void *probe_private, void *call_private,
++ const char *fmt, va_list *args);
++
++struct marker_probe_closure {
++ marker_probe_func *func; /* Callback */
++ void *probe_private; /* Private probe data */
++};
++
++struct marker {
++ const char *name; /* Marker name */
++ const char *format; /* Marker format string, describing the
++ * variable argument list.
++ */
++ char state; /* Marker state. */
++ char ptype; /* probe type : 0 : single, 1 : multi */
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++} __attribute__((aligned(8)));
++
++#ifdef CONFIG_MARKERS
++
++/*
++ * Note : the empty asm volatile with read constraint is used here instead of a
++ * "used" attribute to fix a gcc 4.1.x bug.
++ * Make sure the alignment of the structure in the __markers section will
++ * not add unwanted padding between the beginning of the section and the
++ * structure. Force alignment to the same alignment as the section start.
++ */
++#define __trace_mark(name, call_private, format, args...) \
++ do { \
++ static const char __mstrtab_##name[] \
++ __attribute__((section("__markers_strings"))) \
++ = #name "\0" format; \
++ static struct marker __mark_##name \
++ __attribute__((section("__markers"), aligned(8))) = \
++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
++ 0, 0, marker_probe_cb, \
++ { __mark_empty_function, NULL}, NULL }; \
++ __mark_check_format(format, ## args); \
++ if (unlikely(__mark_##name.state)) { \
++ (*__mark_##name.call) \
++ (&__mark_##name, call_private, \
++ format, ## args); \
++ } \
++ } while (0)
++
++extern void marker_update_probe_range(struct marker *begin,
++ struct marker *end);
++#else /* !CONFIG_MARKERS */
++#define __trace_mark(name, call_private, format, args...) \
++ __mark_check_format(format, ## args)
++static inline void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{ }
++#endif /* CONFIG_MARKERS */
++
++/**
++ * trace_mark - Marker
++ * @name: marker name, not quoted.
++ * @format: format string
++ * @args...: variable argument list
++ *
++ * Places a marker.
++ */
++#define trace_mark(name, format, args...) \
++ __trace_mark(name, NULL, format, ## args)
++
++/**
++ * MARK_NOARGS - Format string for a marker with no argument.
++ */
++#define MARK_NOARGS " "
++
++/* To be used for string format validity checking with gcc */
++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...)
++{
++}
++
++extern marker_probe_func __mark_empty_function;
++
++extern void marker_probe_cb(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++extern void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++
++/*
++ * Connect a probe to a marker.
++ * private data pointer must be a valid allocated memory address, or NULL.
++ */
++extern int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private);
++
++/*
++ * Returns the private data given to marker_probe_register.
++ */
++extern int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private);
++/*
++ * Unregister a marker by providing the registered private data.
++ */
++extern int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private);
++
++extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num);
++
++#endif
+diff --git a/include/linux/module.h b/include/linux/module.h
+index 0460634..de5b51e 100644
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -17,6 +17,7 @@
+ #include <linux/stringify.h>
+ #include <linux/kobject.h>
+ #include <linux/moduleparam.h>
++#include <linux/marker.h>
+ #include <asm/local.h>
+
+ #include <asm/module.h>
+@@ -353,6 +354,10 @@ struct module
+ /* The command line arguments (may be mangled). People like
+ keeping pointers to this stuff */
+ char *args;
++#ifdef CONFIG_MARKERS
++ struct marker *markers;
++ unsigned int num_markers;
++#endif
+ };
+
+ /* FIXME: It'd be nice to isolate modules during init, too, so they
+@@ -472,6 +477,7 @@ int register_module_notifier(struct notifier_block * nb);
+ int unregister_module_notifier(struct notifier_block * nb);
+
+ extern void print_modules(void);
++extern void module_update_markers(void);
+
+ struct device_driver;
+ void module_add_driver(struct module *, struct device_driver *);
+@@ -574,6 +580,10 @@ static inline void print_modules(void)
+ {
+ }
+
++static inline void module_update_markers(void)
++{
++}
++
+ struct device_driver;
+ struct module;
+
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index b4ca73d..5f12d1b 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -42,6 +42,19 @@
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+
++/*
++ * Prevent the compiler from merging or refetching accesses. The compiler
++ * is also forbidden from reordering successive instances of ACCESS_ONCE(),
++ * but only when the compiler is aware of some particular ordering. One way
++ * to make the compiler aware of ordering is to put the two invocations of
++ * ACCESS_ONCE() in different C statements.
++ *
++ * This macro does absolutely -nothing- to prevent the CPU from reordering,
++ * merging, or refetching absolutely anything at any time.
++ *
++ */
++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
++
+ /**
+ * struct rcu_head - callback structure for use with RCU
+ * @next: next update requests in a list
+diff --git a/kernel/Makefile b/kernel/Makefile
+index ef832fa..5879e8b 100644
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -50,6 +50,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+ obj-$(CONFIG_RELAY) += relay.o
+ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
+ obj-$(CONFIG_TASKSTATS) += taskstats.o
++obj-$(CONFIG_MARKERS) += marker.o
+ obj-$(CONFIG_UTRACE) += utrace.o
+ obj-$(CONFIG_PTRACE) += ptrace.o
+
+diff --git a/kernel/marker.c b/kernel/marker.c
+new file mode 100644
+index 0000000..c4c2cd8
+--- /dev/null
++++ b/kernel/marker.c
+@@ -0,0 +1,851 @@
++/*
++ * Copyright (C) 2007 Mathieu Desnoyers
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/types.h>
++#include <linux/jhash.h>
++#include <linux/list.h>
++#include <linux/rcupdate.h>
++#include <linux/marker.h>
++#include <linux/err.h>
++
++extern struct marker __start___markers[];
++extern struct marker __stop___markers[];
++
++/* Set to 1 to enable marker debug output */
++const int marker_debug;
++
++/*
++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
++ * and module markers and the hash table.
++ */
++static DEFINE_MUTEX(markers_mutex);
++
++/*
++ * Marker hash table, containing the active markers.
++ * Protected by module_mutex.
++ */
++#define MARKER_HASH_BITS 6
++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
++
++/*
++ * Note about RCU :
++ * It is used to make sure every handler has finished using its private data
++ * between two consecutive operation (add or remove) on a given marker. It is
++ * also used to delay the free of multiple probes array until a quiescent state
++ * is reached.
++ * marker entries modifications are protected by the markers_mutex.
++ */
++struct marker_entry {
++ struct hlist_node hlist;
++ char *format;
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++ int refcount; /* Number of times armed. 0 if disarmed. */
++ struct rcu_head rcu;
++ void *oldptr;
++ char rcu_pending:1;
++ char ptype:1;
++ char name[0]; /* Contains name'\0'format'\0' */
++};
++
++static struct hlist_head marker_table[MARKER_TABLE_SIZE];
++
++/**
++ * __mark_empty_function - Empty probe callback
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @...: variable argument list
++ *
++ * Empty callback provided as a probe to the markers. By providing this to a
++ * disabled marker, we make sure the execution flow is always valid even
++ * though the function pointer change and the marker enabling are two distinct
++ * operations that modifies the execution flow of preemptible code.
++ */
++void __mark_empty_function(void *probe_private, void *call_private,
++ const char *fmt, va_list *args)
++{
++}
++EXPORT_SYMBOL_GPL(__mark_empty_function);
++
++/*
++ * marker_probe_cb Callback that prepares the variable argument list for probes.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
++ * need to put a full smp_rmb() in this branch. This is why we do not use
++ * rcu_dereference() for the pointer read.
++ */
++void marker_probe_cb(const struct marker *mdata, void *call_private,
++ const char *fmt, ...)
++{
++ va_list args;
++ char ptype;
++
++ /*
++ * disabling preemption to make sure the teardown of the callbacks can
++ * be done correctly when they are in modules and they insure RCU read
++ * coherency.
++ */
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ va_start(args, fmt);
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ va_end(args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++) {
++ va_start(args, fmt);
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ va_end(args);
++ }
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb);
++
++/*
++ * marker_probe_cb Callback that does not prepare the variable argument list.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Should be connected to markers "MARK_NOARGS".
++ */
++void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...)
++{
++ va_list args; /* not initialized */
++ char ptype;
++
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++)
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
++
++static void free_old_closure(struct rcu_head *head)
++{
++ struct marker_entry *entry = container_of(head,
++ struct marker_entry, rcu);
++ kfree(entry->oldptr);
++ /* Make sure we free the data before setting the pending flag to 0 */
++ smp_wmb();
++ entry->rcu_pending = 0;
++}
++
++static void debug_print_probes(struct marker_entry *entry)
++{
++ int i;
++
++ if (!marker_debug)
++ return;
++
++ if (!entry->ptype) {
++ printk(KERN_DEBUG "Single probe : %p %p\n",
++ entry->single.func,
++ entry->single.probe_private);
++ } else {
++ for (i = 0; entry->multi[i].func; i++)
++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
++ entry->multi[i].func,
++ entry->multi[i].probe_private);
++ }
++}
++
++static struct marker_probe_closure *
++marker_entry_add_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0;
++ struct marker_probe_closure *old, *new;
++
++ WARN_ON(!probe);
++
++ debug_print_probes(entry);
++ old = entry->multi;
++ if (!entry->ptype) {
++ if (entry->single.func == probe &&
++ entry->single.probe_private == probe_private)
++ return ERR_PTR(-EBUSY);
++ if (entry->single.func == __mark_empty_function) {
++ /* 0 -> 1 probes */
++ entry->single.func = probe;
++ entry->single.probe_private = probe_private;
++ entry->refcount = 1;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* 1 -> 2 probes */
++ nr_probes = 1;
++ old = NULL;
++ }
++ } else {
++ /* (N -> N+1), (N != 0, 1) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++)
++ if (old[nr_probes].func == probe
++ && old[nr_probes].probe_private
++ == probe_private)
++ return ERR_PTR(-EBUSY);
++ }
++ /* + 2 : one for new probe, one for NULL func */
++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
++ GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ if (!old)
++ new[0] = entry->single;
++ else
++ memcpy(new, old,
++ nr_probes * sizeof(struct marker_probe_closure));
++ new[nr_probes].func = probe;
++ new[nr_probes].probe_private = probe_private;
++ entry->refcount = nr_probes + 1;
++ entry->multi = new;
++ entry->ptype = 1;
++ debug_print_probes(entry);
++ return old;
++}
++
++static struct marker_probe_closure *
++marker_entry_remove_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0, nr_del = 0, i;
++ struct marker_probe_closure *old, *new;
++
++ old = entry->multi;
++
++ debug_print_probes(entry);
++ if (!entry->ptype) {
++ /* 0 -> N is an error */
++ WARN_ON(entry->single.func == __mark_empty_function);
++ /* 1 -> 0 probes */
++ WARN_ON(probe && entry->single.func != probe);
++ WARN_ON(entry->single.probe_private != probe_private);
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* (N -> M), (N > 1, M >= 0) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
++ if ((!probe || old[nr_probes].func == probe)
++ && old[nr_probes].probe_private
++ == probe_private)
++ nr_del++;
++ }
++ }
++
++ if (nr_probes - nr_del == 0) {
++ /* N -> 0, (N > 1) */
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ } else if (nr_probes - nr_del == 1) {
++ /* N -> 1, (N > 1) */
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ entry->single = old[i];
++ entry->refcount = 1;
++ entry->ptype = 0;
++ } else {
++ int j = 0;
++ /* N -> M, (N > 1, M > 1) */
++ /* + 1 for NULL */
++ new = kzalloc((nr_probes - nr_del + 1)
++ * sizeof(struct marker_probe_closure), GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ new[j++] = old[i];
++ entry->refcount = nr_probes - nr_del;
++ entry->ptype = 1;
++ entry->multi = new;
++ }
++ debug_print_probes(entry);
++ return old;
++}
++
++/*
++ * Get marker if the marker is present in the marker hash table.
++ * Must be called with markers_mutex held.
++ * Returns NULL if not present.
++ */
++static struct marker_entry *get_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ u32 hash = jhash(name, strlen(name), 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name))
++ return e;
++ }
++ return NULL;
++}
++
++/*
++ * Add the marker to the marker hash table. Must be called with markers_mutex
++ * held.
++ */
++static struct marker_entry *add_marker(const char *name, const char *format)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ size_t format_len = 0;
++ u32 hash = jhash(name, name_len-1, 0);
++
++ if (format)
++ format_len = strlen(format) + 1;
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ printk(KERN_NOTICE
++ "Marker %s busy\n", name);
++ return ERR_PTR(-EBUSY); /* Already there */
++ }
++ }
++ /*
++ * Using kmalloc here to allocate a variable length element. Could
++ * cause some memory fragmentation if overused.
++ */
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return ERR_PTR(-ENOMEM);
++ memcpy(&e->name[0], name, name_len);
++ if (format) {
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ } else {
++ e->format = NULL;
++ e->call = marker_probe_cb;
++ }
++ e->single.func = __mark_empty_function;
++ e->single.probe_private = NULL;
++ e->multi = NULL;
++ e->ptype = 0;
++ e->refcount = 0;
++ e->rcu_pending = 0;
++ hlist_add_head(&e->hlist, head);
++ return e;
++}
++
++/*
++ * Remove the marker from the marker hash table. Must be called with mutex_lock
++ * held.
++ */
++static int remove_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ int found = 0;
++ size_t len = strlen(name) + 1;
++ u32 hash = jhash(name, len-1, 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ found = 1;
++ break;
++ }
++ }
++ if (!found)
++ return -ENOENT;
++ if (e->single.func != __mark_empty_function)
++ return -EBUSY;
++ hlist_del(&e->hlist);
++ /* Make sure the call_rcu has been executed */
++ if (e->rcu_pending)
++ rcu_barrier();
++ kfree(e);
++ return 0;
++}
++
++/*
++ * Set the mark_entry format to the format found in the element.
++ */
++static int marker_set_format(struct marker_entry **entry, const char *format)
++{
++ struct marker_entry *e;
++ size_t name_len = strlen((*entry)->name) + 1;
++ size_t format_len = strlen(format) + 1;
++
++
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return -ENOMEM;
++ memcpy(&e->name[0], (*entry)->name, name_len);
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ e->single = (*entry)->single;
++ e->multi = (*entry)->multi;
++ e->ptype = (*entry)->ptype;
++ e->refcount = (*entry)->refcount;
++ e->rcu_pending = 0;
++ hlist_add_before(&e->hlist, &(*entry)->hlist);
++ hlist_del(&(*entry)->hlist);
++ /* Make sure the call_rcu has been executed */
++ if ((*entry)->rcu_pending)
++ rcu_barrier();
++ kfree(*entry);
++ *entry = e;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ return 0;
++}
++
++/*
++ * Sets the probe callback corresponding to one marker.
++ */
++static int set_marker(struct marker_entry **entry, struct marker *elem,
++ int active)
++{
++ int ret;
++ WARN_ON(strcmp((*entry)->name, elem->name) != 0);
++
++ if ((*entry)->format) {
++ if (strcmp((*entry)->format, elem->format) != 0) {
++ printk(KERN_NOTICE
++ "Format mismatch for probe %s "
++ "(%s), marker (%s)\n",
++ (*entry)->name,
++ (*entry)->format,
++ elem->format);
++ return -EPERM;
++ }
++ } else {
++ ret = marker_set_format(entry, elem->format);
++ if (ret)
++ return ret;
++ }
++
++ /*
++ * probe_cb setup (statically known) is done here. It is
++ * asynchronous with the rest of execution, therefore we only
++ * pass from a "safe" callback (with argument) to an "unsafe"
++ * callback (does not set arguments).
++ */
++ elem->call = (*entry)->call;
++ /*
++ * Sanity check :
++ * We only update the single probe private data when the ptr is
++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
++ */
++ WARN_ON(elem->single.func != __mark_empty_function
++ && elem->single.probe_private
++ != (*entry)->single.probe_private &&
++ !elem->ptype);
++ elem->single.probe_private = (*entry)->single.probe_private;
++ /*
++ * Make sure the private data is valid when we update the
++ * single probe ptr.
++ */
++ smp_wmb();
++ elem->single.func = (*entry)->single.func;
++ /*
++ * We also make sure that the new probe callbacks array is consistent
++ * before setting a pointer to it.
++ */
++ rcu_assign_pointer(elem->multi, (*entry)->multi);
++ /*
++ * Update the function or multi probe array pointer before setting the
++ * ptype.
++ */
++ smp_wmb();
++ elem->ptype = (*entry)->ptype;
++ elem->state = active;
++
++ return 0;
++}
++
++/*
++ * Disable a marker and its probe callback.
++ * Note: only after a synchronize_sched() issued after setting elem->call to the
++ * empty function insures that the original callback is not used anymore. This
++ * insured by preemption disabling around the call site.
++ */
++static void disable_marker(struct marker *elem)
++{
++ /* leave "call" as is. It is known statically. */
++ elem->state = 0;
++ elem->single.func = __mark_empty_function;
++ /* Update the function before setting the ptype */
++ smp_wmb();
++ elem->ptype = 0; /* single probe */
++ /*
++ * Leave the private data and id there, because removal is racy and
++ * should be done only after a synchronize_sched(). These are never used
++ * until the next initialization anyway.
++ */
++}
++
++/**
++ * marker_update_probe_range - Update a probe range
++ * @begin: beginning of the range
++ * @end: end of the range
++ *
++ * Updates the probe callback corresponding to a range of markers.
++ */
++void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{
++ struct marker *iter;
++ struct marker_entry *mark_entry;
++
++ mutex_lock(&markers_mutex);
++ for (iter = begin; iter < end; iter++) {
++ mark_entry = get_marker(iter->name);
++ if (mark_entry) {
++ set_marker(&mark_entry, iter,
++ !!mark_entry->refcount);
++ /*
++ * ignore error, continue
++ */
++ } else {
++ disable_marker(iter);
++ }
++ }
++ mutex_unlock(&markers_mutex);
++}
++
++/*
++ * Update probes, removing the faulty probes.
++ * Issues a synchronize_sched() when no reference to the module passed
++ * as parameter is found in the probes so the probe module can be
++ * safely unloaded from now on.
++ *
++ * Internal callback only changed before the first probe is connected to it.
++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
++ * transitions. All other transitions will leave the old private data valid.
++ * This makes the non-atomicity of the callback/private data updates valid.
++ *
++ * "special case" updates :
++ * 0 -> 1 callback
++ * 1 -> 0 callback
++ * 1 -> 2 callbacks
++ * 2 -> 1 callbacks
++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
++ * Site effect : marker_set_format may delete the marker entry (creating a
++ * replacement).
++ */
++static void marker_update_probes(void)
++{
++ /* Core kernel markers */
++ marker_update_probe_range(__start___markers, __stop___markers);
++ /* Markers in modules. */
++ module_update_markers();
++}
++
++/**
++ * marker_probe_register - Connect a probe to a marker
++ * @name: marker name
++ * @format: format string
++ * @probe: probe handler
++ * @probe_private: probe private data
++ *
++ * private data must be a valid allocated memory address, or NULL.
++ * Returns 0 if ok, error value on error.
++ * The probe address must at least be aligned on the architecture pointer size.
++ */
++int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ entry = add_marker(name, format);
++ if (IS_ERR(entry)) {
++ ret = PTR_ERR(entry);
++ goto end;
++ }
++ }
++ /*
++ * If we detect that a call_rcu is pending for this marker,
++ * make sure it's executed now.
++ */
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_add_probe(entry, probe, probe_private);
++ if (IS_ERR(old)) {
++ ret = PTR_ERR(old);
++ goto end;
++ }
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_register);
++
++/**
++ * marker_probe_unregister - Disconnect a probe from a marker
++ * @name: marker name
++ * @probe: probe function pointer
++ * @probe_private: probe private data
++ *
++ * Returns the private data given to marker_probe_register, or an ERR_PTR().
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ struct marker_probe_closure *old;
++ int ret = 0;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, probe, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister);
++
++static struct marker_entry *
++get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ unsigned int i;
++ struct hlist_head *head;
++ struct hlist_node *node;
++
++ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
++ head = &marker_table[i];
++ hlist_for_each_entry(entry, node, head, hlist) {
++ if (!entry->ptype) {
++ if (entry->single.func == probe
++ && entry->single.probe_private
++ == probe_private)
++ return entry;
++ } else {
++ struct marker_probe_closure *closure;
++ closure = entry->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func == probe &&
++ closure[i].probe_private
++ == probe_private)
++ return entry;
++ }
++ }
++ }
++ }
++ return NULL;
++}
++
++/**
++ * marker_probe_unregister_private_data - Disconnect a probe from a marker
++ * @probe: probe function
++ * @probe_private: probe private data
++ *
++ * Unregister a probe by providing the registered private data.
++ * Only removes the first marker found in hash table.
++ * Return 0 on success or error value.
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, NULL, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(entry->name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
++
++/**
++ * marker_get_private_data - Get a marker's probe private data
++ * @name: marker name
++ * @probe: probe to match
++ * @num: get the nth matching probe's private data
++ *
++ * Returns the nth private data pointer (starting from 0) matching, or an
++ * ERR_PTR.
++ * Returns the private data pointer, or an ERR_PTR.
++ * The private data pointer should _only_ be dereferenced if the caller is the
++ * owner of the data, or its content could vanish. This is mostly used to
++ * confirm that a caller is the owner of a registered probe.
++ */
++void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ u32 hash = jhash(name, name_len-1, 0);
++ int i;
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ if (!e->ptype) {
++ if (num == 0 && e->single.func == probe)
++ return e->single.probe_private;
++ else
++ break;
++ } else {
++ struct marker_probe_closure *closure;
++ int match = 0;
++ closure = e->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func != probe)
++ continue;
++ if (match++ == num)
++ return closure[i].probe_private;
++ }
++ }
++ }
++ }
++ return ERR_PTR(-ENOENT);
++}
++EXPORT_SYMBOL_GPL(marker_get_private_data);
+diff --git a/kernel/module.c b/kernel/module.c
+index f9a5987..ac99222 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -1488,6 +1488,8 @@ static struct module *load_module(void __user *umod,
+ unsigned int unusedcrcindex;
+ unsigned int unusedgplindex;
+ unsigned int unusedgplcrcindex;
++ unsigned int markersindex;
++ unsigned int markersstringsindex;
+ struct module *mod;
+ long err = 0;
+ void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+@@ -1756,6 +1758,9 @@ static struct module *load_module(void __user *umod,
+ add_taint(TAINT_FORCED_MODULE);
+ }
+ #endif
++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
++ markersstringsindex = find_sec(hdr, sechdrs, secstrings,
++ "__markers_strings");
+
+ /* Now do relocations. */
+ for (i = 1; i < hdr->e_shnum; i++) {
+@@ -1778,6 +1783,11 @@ static struct module *load_module(void __user *umod,
+ if (err < 0)
+ goto cleanup;
+ }
++#ifdef CONFIG_MARKERS
++ mod->markers = (void *)sechdrs[markersindex].sh_addr;
++ mod->num_markers =
++ sechdrs[markersindex].sh_size / sizeof(*mod->markers);
++#endif
+
+ /* Find duplicate symbols */
+ err = verify_export_symbols(mod);
+@@ -1796,6 +1806,11 @@ static struct module *load_module(void __user *umod,
+
+ add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+
++#ifdef CONFIG_MARKERS
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++#endif
+ err = module_finalize(hdr, sechdrs, mod);
+ if (err < 0)
+ goto cleanup;
+@@ -2259,3 +2274,18 @@ EXPORT_SYMBOL(module_remove_driver);
+ void struct_module(struct module *mod) { return; }
+ EXPORT_SYMBOL(struct_module);
+ #endif
++
++#ifdef CONFIG_MARKERS
++void module_update_markers(void)
++{
++ struct module *mod;
++
++ mutex_lock(&module_mutex);
++ list_for_each_entry(mod, &modules, list)
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++ mutex_unlock(&module_mutex);
++}
++#endif
++
+diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
+index 0a64688..886b70c 100644
+--- a/scripts/Makefile.modpost
++++ b/scripts/Makefile.modpost
+@@ -13,6 +13,7 @@
+ # 2) modpost is then used to
+ # 3) create one <module>.mod.c file pr. module
+ # 4) create one Module.symvers file with CRC for all exported symbols
++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
+ # 5) compile all <module>.mod.c files
+ # 6) final link of the module to a <module.ko> file
+
+@@ -41,6 +42,10 @@ include scripts/Makefile.lib
+
+ kernelsymfile := $(objtree)/Module.symvers
+ modulesymfile := $(KBUILD_EXTMOD)/Module.symvers
++kernelmarkersfile := $(objtree)/Module.markers
++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
++
++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
+
+ # Step 1), find all modules listed in $(MODVERDIR)/
+ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
+@@ -58,16 +63,27 @@ quiet_cmd_modpost = MODPOST
+ $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \
+ $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \
+ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \
++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \
+ $(filter-out FORCE,$^)
+
+ PHONY += __modpost
+ __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE
+ $(call cmd,modpost)
+
++quiet_cmd_kernel-mod = MODPOST $@
++ cmd_kernel-mod = $(cmd_modpost) $@
++
++vmlinux.o: FORCE
++ $(call cmd,kernel-mod)
++
+ # Declare generated files as targets for modpost
+ $(symverfile): __modpost ;
+ $(modules:.ko=.mod.c): __modpost ;
+
++ifdef CONFIG_MARKERS
++$(markersfile): __modpost ;
++endif
+
+ # Step 5), compile all *.mod.c files
+
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index 0ec3321..383f310 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -10,7 +10,8 @@
+ *
+ * Usage: modpost vmlinux module1.o module2.o ...
+ */
+-
++#define _GNU_SOURCE
++#include <stdio.h>
+ #include <ctype.h>
+ #include "modpost.h"
+ #include "../../include/linux/license.h"
+@@ -385,6 +386,8 @@ static void parse_elf(struct elf_info *info, const char *filename)
+ info->export_unused_gpl_sec = i;
+ else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
+ info->export_gpl_future_sec = i;
++ else if (strcmp(secname, "__markers_strings") == 0)
++ info->markers_strings_sec = i;
+
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+@@ -900,6 +903,62 @@ static void check_sec_ref(struct module *mod, const char *modname,
+ }
+ }
+
++static void get_markers(struct elf_info *info, struct module *mod)
++{
++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
++ const char *strings = (const char *) info->hdr + sh->sh_offset;
++ const Elf_Sym *sym, *first_sym, *last_sym;
++ size_t n;
++
++ if (!info->markers_strings_sec)
++ return;
++
++ /*
++ * First count the strings. We look for all the symbols defined
++ * in the __markers_strings section named __mstrtab_*. For
++ * these local names, the compiler puts a random .NNN suffix on,
++ * so the names don't correspond exactly.
++ */
++ first_sym = last_sym = NULL;
++ n = 0;
++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ if (first_sym == NULL)
++ first_sym = sym;
++ last_sym = sym;
++ ++n;
++ }
++
++ if (n == 0)
++ return;
++
++ /*
++ * Now collect each name and format into a line for the output.
++ * Lines look like:
++ * marker_name vmlinux marker %s format %d
++ * The format string after the second \t can use whitespace.
++ */
++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
++ mod->nmarkers = n;
++
++ n = 0;
++ for (sym = first_sym; sym <= last_sym; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ const char *name = strings + sym->st_value;
++ const char *fmt = strchr(name, '\0') + 1;
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++ mod->markers[n++] = line;
++ }
++}
++
+ /**
+ * Functions used only during module init is marked __init and is stored in
+ * a .init.text section. Likewise data is marked __initdata and stored in
+@@ -1103,6 +1162,8 @@ static void read_symbols(char *modname)
+ get_src_version(modname, mod->srcversion,
+ sizeof(mod->srcversion)-1);
+
++ get_markers(&info, mod);
++
+ parse_elf_finish(&info);
+
+ /* Our trick to get versioning for struct_module - it's
+@@ -1436,6 +1497,93 @@ static void write_dump(const char *fname)
+ write_if_changed(&buf, fname);
+ }
+
++static void add_marker(struct module *mod, const char *name, const char *fmt)
++{
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++
++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
++ sizeof mod->markers[0])));
++ mod->markers[mod->nmarkers++] = line;
++}
++
++static void read_markers(const char *fname)
++{
++ unsigned long size, pos = 0;
++ void *file = grab_file(fname, &size);
++ char *line;
++
++ if (!file) /* No old markers, silently ignore */
++ return;
++
++ while ((line = get_next_line(&pos, file, size))) {
++ char *marker, *modname, *fmt;
++ struct module *mod;
++
++ marker = line;
++ modname = strchr(marker, '\t');
++ if (!modname)
++ goto fail;
++ *modname++ = '\0';
++ fmt = strchr(modname, '\t');
++ if (!fmt)
++ goto fail;
++ *fmt++ = '\0';
++ if (*marker == '\0' || *modname == '\0')
++ goto fail;
++
++ mod = find_module(modname);
++ if (!mod) {
++ if (is_vmlinux(modname))
++ have_vmlinux = 1;
++ mod = new_module(NOFAIL(strdup(modname)));
++ mod->skip = 1;
++ }
++
++ add_marker(mod, marker, fmt);
++ }
++ return;
++fail:
++ fatal("parse error in markers list file\n");
++}
++
++static int compare_strings(const void *a, const void *b)
++{
++ return strcmp(*(const char **) a, *(const char **) b);
++}
++
++static void write_markers(const char *fname)
++{
++ struct buffer buf = { };
++ struct module *mod;
++ size_t i;
++
++ for (mod = modules; mod; mod = mod->next)
++ if ((!external_module || !mod->skip) && mod->markers != NULL) {
++ /*
++ * Sort the strings so we can skip duplicates when
++ * we write them out.
++ */
++ qsort(mod->markers, mod->nmarkers,
++ sizeof mod->markers[0], &compare_strings);
++ for (i = 0; i < mod->nmarkers; ++i) {
++ char *line = mod->markers[i];
++ buf_write(&buf, line, strlen(line));
++ while (i + 1 < mod->nmarkers &&
++ !strcmp(mod->markers[i],
++ mod->markers[i + 1]))
++ free(mod->markers[i++]);
++ free(mod->markers[i]);
++ }
++ free(mod->markers);
++ mod->markers = NULL;
++ }
++
++ write_if_changed(&buf, fname);
++}
++
++
+ int main(int argc, char **argv)
+ {
+ struct module *mod;
+@@ -1444,8 +1592,10 @@ int main(int argc, char **argv)
+ char *kernel_read = NULL, *module_read = NULL;
+ char *dump_write = NULL;
+ int opt;
++ char *markers_read = NULL;
++ char *markers_write = NULL;
+
+- while ((opt = getopt(argc, argv, "i:I:mo:a")) != -1) {
++ while ((opt = getopt(argc, argv, "i:I:mo:aM:K:")) != -1) {
+ switch(opt) {
+ case 'i':
+ kernel_read = optarg;
+@@ -1463,6 +1613,12 @@ int main(int argc, char **argv)
+ case 'a':
+ all_versions = 1;
+ break;
++ case 'M':
++ markers_write = optarg;
++ break;
++ case 'K':
++ markers_read = optarg;
++ break;
+ default:
+ exit(1);
+ }
+@@ -1502,5 +1658,11 @@ int main(int argc, char **argv)
+ if (dump_write)
+ write_dump(dump_write);
+
++ if (markers_read)
++ read_markers(markers_read);
++
++ if (markers_write)
++ write_markers(markers_write);
++
+ return 0;
+ }
+diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
+index d398c61..27b05e6 100644
+--- a/scripts/mod/modpost.h
++++ b/scripts/mod/modpost.h
+@@ -107,6 +107,8 @@ struct module {
+ int has_init;
+ int has_cleanup;
+ struct buffer dev_table_buf;
++ char **markers;
++ size_t nmarkers;
+ char srcversion[25];
+ };
+
+@@ -121,6 +123,7 @@ struct elf_info {
+ Elf_Section export_gpl_sec;
+ Elf_Section export_unused_gpl_sec;
+ Elf_Section export_gpl_future_sec;
++ Elf_Section markers_strings_sec;
+ const char *strtab;
+ char *modinfo;
+ unsigned int modinfo_len;
Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch
===================================================================
--- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch (rev 0)
+++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u4.patch 2008-07-04 02:18:23 UTC (rev 6)
@@ -0,0 +1,1917 @@
+diff --git a/Makefile b/Makefile
+index ce04b1f..f059302 100644
+--- a/Makefile
++++ b/Makefile
+@@ -581,7 +581,7 @@ quiet_cmd_vmlinux__ ?= LD $@
+ cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
+ -T $(vmlinux-lds) $(vmlinux-init) \
+ --start-group $(vmlinux-main) --end-group \
+- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^)
++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^)
+
+ # Generate new vmlinux version
+ quiet_cmd_vmlinux_version = GEN .version
+@@ -686,10 +686,30 @@ $(KALLSYMS): scripts ;
+
+ endif # ifdef CONFIG_KALLSYMS
+
++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has
++# relevant sections renamed as per the linker script.
++quiet_cmd_vmlinux-modpost = LD $@
++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \
++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \
++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^)
++define rule_vmlinux-modpost
++ :
++ +$(call cmd,vmlinux-modpost)
++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@
++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd
++endef
++
++
+ # vmlinux image - including updated kernel symbols
+-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE
++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE
++ $(call vmlinux-modpost)
+ $(call if_changed_rule,vmlinux__)
+
++# build vmlinux.o first to catch section mismatch errors early
++$(kallsyms.o): vmlinux.o
++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE
++ $(call if_changed_rule,vmlinux-modpost)
++
+ # The actual objects are generated when descending,
+ # make sure no implicit rule kicks in
+ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
+diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
+index cf069b7..d39d5c5 100644
+--- a/arch/i386/Kconfig.debug
++++ b/arch/i386/Kconfig.debug
+@@ -29,6 +29,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
+index d6cc9c0..7bbda2e 100644
+--- a/arch/i386/kernel/vmlinux.lds.S
++++ b/arch/i386/kernel/vmlinux.lds.S
+@@ -52,6 +52,7 @@ SECTIONS
+ /* writeable */
+ .data : { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+
+diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug
+index 2782b48..17d4a44 100644
+--- a/arch/ia64/Kconfig.debug
++++ b/arch/ia64/Kconfig.debug
+@@ -12,6 +12,12 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
+
+ choice
+ prompt "Physical memory granularity"
+diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
+index a676e79..c6ea47e 100644
+--- a/arch/ia64/kernel/vmlinux.lds.S
++++ b/arch/ia64/kernel/vmlinux.lds.S
+@@ -193,7 +193,7 @@ SECTIONS
+
+ data : { } :data
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
+- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS }
+
+ . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
+diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug
+index 860472a..5c3a9f5 100644
+--- a/arch/ppc64/Kconfig.debug
++++ b/arch/ppc64/Kconfig.debug
+@@ -16,6 +16,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S
+index 4103cc1..76d0bf0 100644
+--- a/arch/ppc64/kernel/vmlinux.lds.S
++++ b/arch/ppc64/kernel/vmlinux.lds.S
+@@ -118,6 +118,7 @@ SECTIONS
+ .data : {
+ *(.data .data.rel* .toc1)
+ *(.branch_lt)
++ MARKER
+ }
+
+ .opd : {
+diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
+index cd8d39f..f39bf8d 100644
+--- a/arch/sparc64/Kconfig.debug
++++ b/arch/sparc64/Kconfig.debug
+@@ -21,6 +21,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_DCFLUSH
+ bool "D-cache flush debugging"
+ depends on DEBUG_KERNEL
+diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
+index a710d38..d4b3b62 100644
+--- a/arch/sparc64/kernel/vmlinux.lds.S
++++ b/arch/sparc64/kernel/vmlinux.lds.S
+@@ -27,6 +27,7 @@ SECTIONS
+ .data :
+ {
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+ .data1 : { *(.data1) }
+diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
+index cb33186..d0260cb 100644
+--- a/arch/x86_64/Kconfig.debug
++++ b/arch/x86_64/Kconfig.debug
+@@ -55,6 +55,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config IOMMU_LEAK
+ bool "IOMMU leak tracing"
+ depends on DEBUG_KERNEL
+diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
+index f656f19..12ce116 100644
+--- a/arch/x86_64/kernel/vmlinux.lds.S
++++ b/arch/x86_64/kernel/vmlinux.lds.S
+@@ -33,6 +33,7 @@ SECTIONS
+
+ .data : { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 54fb0a0..cbdcd93 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -158,7 +158,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator);
+
+ // ================= main 802.3ad protocol functions ==================
+ static int ad_lacpdu_send(struct port *port);
+-static int ad_marker_send(struct port *port, struct marker *marker);
++static int ad_marker_send(struct port *port, struct bond_marker *marker);
+ static void ad_mux_machine(struct port *port);
+ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
+ static void ad_tx_machine(struct port *port);
+@@ -171,8 +171,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast);
+ static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
+ static void ad_enable_collecting_distributing(struct port *port);
+ static void ad_disable_collecting_distributing(struct port *port);
+-static void ad_marker_info_received(struct marker *marker_info, struct port *port);
+-static void ad_marker_response_received(struct marker *marker, struct port *port);
++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
++static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
+
+
+ /////////////////////////////////////////////////////////////////////////////////
+@@ -945,12 +945,13 @@ static int ad_lacpdu_send(struct port *port)
+ * Returns: 0 on success
+ * < 0 on error
+ */
+-static int ad_marker_send(struct port *port, struct marker *marker)
++static int ad_marker_send(struct port *port, struct bond_marker *marker)
+ {
+ struct slave *slave = port->slave;
+ struct sk_buff *skb;
+- struct marker_header *marker_header;
+- int length = sizeof(struct marker_header);
++ struct bond_marker_header *marker_header;
++ int length = sizeof(struct bond_marker_header);
++
+ struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
+
+ skb = dev_alloc_skb(length + 16);
+@@ -965,7 +966,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
+ skb->nh.raw = skb->data + ETH_HLEN;
+ skb->protocol = PKT_TYPE_LACPDU;
+
+- marker_header = (struct marker_header *)skb_put(skb, length);
++ marker_header = (struct bond_marker_header *)skb_put(skb, length);
+
+ marker_header->ad_header.destination_address = lacpdu_multicast_address;
+ /* Note: source addres is set to be the member's PERMANENT address, because we use it
+@@ -1762,7 +1763,7 @@ static void ad_disable_collecting_distributing(struct port *port)
+ */
+ static void ad_marker_info_send(struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+ u16 index;
+
+ // fill the marker PDU with the appropriate values
+@@ -1795,13 +1796,14 @@ static void ad_marker_info_send(struct port *port)
+ * @port: the port we're looking at
+ *
+ */
+-static void ad_marker_info_received(struct marker *marker_info,struct port *port)
++static void ad_marker_info_received(struct bond_marker *marker_info,
++ struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+
+ // copy the received marker data to the response marker
+ //marker = *marker_info;
+- memcpy(&marker, marker_info, sizeof(struct marker));
++ memcpy(&marker, marker_info, sizeof(struct bond_marker));
+ // change the marker subtype to marker response
+ marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
+ // send the marker response
+@@ -1820,7 +1822,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port
+ * response for marker PDU's, in this stage, but only to respond to marker
+ * information.
+ */
+-static void ad_marker_response_received(struct marker *marker, struct port *port)
++static void ad_marker_response_received(struct bond_marker *marker,
++ struct port *port)
+ {
+ marker=NULL; // just to satisfy the compiler
+ port=NULL; // just to satisfy the compiler
+@@ -2208,15 +2211,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng
+ case AD_TYPE_MARKER:
+ // No need to convert fields to Little Endian since we don't use the marker's fields.
+
+- switch (((struct marker *)lacpdu)->tlv_type) {
++ switch (((struct bond_marker *)lacpdu)->tlv_type) {
+ case AD_MARKER_INFORMATION_SUBTYPE:
+ dprintk("Received Marker Information on port %d\n", port->actor_port_number);
+- ad_marker_info_received((struct marker *)lacpdu, port);
++ ad_marker_info_received((struct bond_marker *)lacpdu, port);
++
+ break;
+
+ case AD_MARKER_RESPONSE_SUBTYPE:
+ dprintk("Received Marker Response on port %d\n", port->actor_port_number);
+- ad_marker_response_received((struct marker *)lacpdu, port);
++ ad_marker_response_received((struct bond_marker *)lacpdu, port);
+ break;
+
+ default:
+diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
+index 4119f0f..7e770ff 100644
+--- a/drivers/net/bonding/bond_3ad.h
++++ b/drivers/net/bonding/bond_3ad.h
+@@ -105,7 +105,8 @@ typedef enum {
+ typedef enum {
+ AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
+ AD_MARKER_RESPONSE_SUBTYPE // marker response subtype
+-} marker_subtype_t;
++} bond_marker_subtype_t;
++
+
+ // timers types(43.4.9 in the 802.3ad standard)
+ typedef enum {
+@@ -161,7 +162,7 @@ typedef struct lacpdu_header {
+ } lacpdu_header_t;
+
+ // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
+-typedef struct marker {
++typedef struct bond_marker {
+ u8 subtype; // = 0x02 (marker PDU)
+ u8 version_number; // = 0x01
+ u8 tlv_type; // = 0x01 (marker information)
+@@ -174,12 +175,12 @@ typedef struct marker {
+ u8 tlv_type_terminator; // = 0x00
+ u8 terminator_length; // = 0x00
+ u8 reserved_90[90]; // = 0
+-} marker_t;
++} bond_marker_t;
+
+-typedef struct marker_header {
++typedef struct bond_marker_header {
+ struct ad_header ad_header;
+- struct marker marker;
+-} marker_header_t;
++ struct bond_marker marker;
++} bond_marker_header_t;
+
+ #pragma pack()
+
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index bb340cb..fd6e57f 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -6,10 +6,18 @@
+ #define VMLINUX_SYMBOL(_sym_) _sym_
+ #endif
+
++/* Kernel markers : pointers */
++#define MARKER \
++ . = ALIGN(8); \
++ VMLINUX_SYMBOL(__start___markers) = .; \
++ *(__markers) \
++ VMLINUX_SYMBOL(__stop___markers) = .;
++
+ #define RODATA \
+ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
+ *(.rodata) *(.rodata.*) \
+ *(__vermagic) /* Kernel version magic */ \
++ *(__markers_strings) /* Markers: strings */ \
+ } \
+ \
+ .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
+diff --git a/include/linux/marker.h b/include/linux/marker.h
+new file mode 100644
+index 0000000..efbc82b
+--- /dev/null
++++ b/include/linux/marker.h
+@@ -0,0 +1,139 @@
++#ifndef _LINUX_MARKER_H
++#define _LINUX_MARKER_H
++
++/*
++ * Code markup for dynamic and static tracing.
++ *
++ * See Documentation/marker.txt.
++ *
++ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
++ *
++ * This file is released under the GPLv2.
++ * See the file COPYING for more details.
++ */
++
++#include <linux/types.h>
++
++struct module;
++struct marker;
++
++/**
++ * marker_probe_func - Type of a marker probe function
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @args: variable argument list pointer. Use a pointer to overcome C's
++ * inability to pass this around as a pointer in a portable manner in
++ * the callee otherwise.
++ *
++ * Type of marker probe functions. They receive the mdata and need to parse the
++ * format string to recover the variable argument list.
++ */
++typedef void marker_probe_func(void *probe_private, void *call_private,
++ const char *fmt, va_list *args);
++
++struct marker_probe_closure {
++ marker_probe_func *func; /* Callback */
++ void *probe_private; /* Private probe data */
++};
++
++struct marker {
++ const char *name; /* Marker name */
++ const char *format; /* Marker format string, describing the
++ * variable argument list.
++ */
++ char state; /* Marker state. */
++ char ptype; /* probe type : 0 : single, 1 : multi */
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++} __attribute__((aligned(8)));
++
++#ifdef CONFIG_MARKERS
++
++/*
++ * Note : the empty asm volatile with read constraint is used here instead of a
++ * "used" attribute to fix a gcc 4.1.x bug.
++ * Make sure the alignment of the structure in the __markers section will
++ * not add unwanted padding between the beginning of the section and the
++ * structure. Force alignment to the same alignment as the section start.
++ */
++#define __trace_mark(name, call_private, format, args...) \
++ do { \
++ static const char __mstrtab_##name[] \
++ __attribute__((section("__markers_strings"))) \
++ = #name "\0" format; \
++ static struct marker __mark_##name \
++ __attribute__((section("__markers"), aligned(8))) = \
++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
++ 0, 0, marker_probe_cb, \
++ { __mark_empty_function, NULL}, NULL }; \
++ __mark_check_format(format, ## args); \
++ if (unlikely(__mark_##name.state)) { \
++ (*__mark_##name.call) \
++ (&__mark_##name, call_private, \
++ format, ## args); \
++ } \
++ } while (0)
++
++extern void marker_update_probe_range(struct marker *begin,
++ struct marker *end);
++#else /* !CONFIG_MARKERS */
++#define __trace_mark(name, call_private, format, args...) \
++ __mark_check_format(format, ## args)
++static inline void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{ }
++#endif /* CONFIG_MARKERS */
++
++/**
++ * trace_mark - Marker
++ * @name: marker name, not quoted.
++ * @format: format string
++ * @args...: variable argument list
++ *
++ * Places a marker.
++ */
++#define trace_mark(name, format, args...) \
++ __trace_mark(name, NULL, format, ## args)
++
++/**
++ * MARK_NOARGS - Format string for a marker with no argument.
++ */
++#define MARK_NOARGS " "
++
++/* To be used for string format validity checking with gcc */
++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...)
++{
++}
++
++extern marker_probe_func __mark_empty_function;
++
++extern void marker_probe_cb(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++extern void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++
++/*
++ * Connect a probe to a marker.
++ * private data pointer must be a valid allocated memory address, or NULL.
++ */
++extern int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private);
++
++/*
++ * Returns the private data given to marker_probe_register.
++ */
++extern int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private);
++/*
++ * Unregister a marker by providing the registered private data.
++ */
++extern int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private);
++
++extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num);
++
++#endif
+diff --git a/include/linux/module.h b/include/linux/module.h
+index 8da8948..2ad5efd 100644
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -18,6 +18,7 @@
+ #include <linux/stringify.h>
+ #include <linux/kobject.h>
+ #include <linux/moduleparam.h>
++#include <linux/marker.h>
+ #include <asm/local.h>
+
+ #include <asm/module.h>
+@@ -328,6 +329,10 @@ struct module
+ /* The command line arguments (may be mangled). People like
+ keeping pointers to this stuff */
+ char *args;
++#ifdef CONFIG_MARKERS
++ struct marker *markers;
++ unsigned int num_markers;
++#endif
+ };
+
+ /* FIXME: It'd be nice to isolate modules during init, too, so they
+@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb);
+ int unregister_module_notifier(struct notifier_block * nb);
+
+ extern void print_modules(void);
++extern void module_update_markers(void);
+ #else /* !CONFIG_MODULES... */
+ #define EXPORT_SYMBOL(sym)
+ #define EXPORT_SYMBOL_GPL(sym)
+@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb)
+ static inline void print_modules(void)
+ {
+ }
++
++static inline void module_update_markers(void)
++{
++}
+ #endif /* CONFIG_MODULES */
+
+ #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x)
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index 91057d6..bcd0acb 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -42,6 +42,19 @@
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+
++/*
++ * Prevent the compiler from merging or refetching accesses. The compiler
++ * is also forbidden from reordering successive instances of ACCESS_ONCE(),
++ * but only when the compiler is aware of some particular ordering. One way
++ * to make the compiler aware of ordering is to put the two invocations of
++ * ACCESS_ONCE() in different C statements.
++ *
++ * This macro does absolutely -nothing- to prevent the CPU from reordering,
++ * merging, or refetching absolutely anything at any time.
++ *
++ */
++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
++
+ /**
+ * struct rcu_head - callback structure for use with RCU
+ * @next: next update requests in a list
+@@ -102,6 +115,7 @@ struct rcu_data {
+ struct rcu_head *donelist;
+ struct rcu_head **donetail;
+ int cpu;
++ struct rcu_head barrier;
+ };
+
+ DECLARE_PER_CPU(struct rcu_data, rcu_data);
+@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head,
+ extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
+ void (*func)(struct rcu_head *head)));
+ extern void synchronize_kernel(void);
+-
++extern void rcu_barrier(void);
+ #endif /* __KERNEL__ */
+ #endif /* __LINUX_RCUPDATE_H */
+diff --git a/kernel/Makefile b/kernel/Makefile
+index 0b8c8ca..f8248bc 100644
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o
+ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+ obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o
+ obj-$(CONFIG_KPROBES) += kprobes.o
++obj-$(CONFIG_MARKERS) += marker.o
+
+ ifneq ($(CONFIG_IA64),y)
+ # According to Alan Modra <alan at linuxcare.com.au>, the -fno-omit-frame-pointer is
+diff --git a/kernel/marker.c b/kernel/marker.c
+new file mode 100644
+index 0000000..c4c2cd8
+--- /dev/null
++++ b/kernel/marker.c
+@@ -0,0 +1,851 @@
++/*
++ * Copyright (C) 2007 Mathieu Desnoyers
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/types.h>
++#include <linux/jhash.h>
++#include <linux/list.h>
++#include <linux/rcupdate.h>
++#include <linux/marker.h>
++#include <linux/err.h>
++
++extern struct marker __start___markers[];
++extern struct marker __stop___markers[];
++
++/* Set to 1 to enable marker debug output */
++const int marker_debug;
++
++/*
++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
++ * and module markers and the hash table.
++ */
++static DEFINE_MUTEX(markers_mutex);
++
++/*
++ * Marker hash table, containing the active markers.
++ * Protected by module_mutex.
++ */
++#define MARKER_HASH_BITS 6
++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
++
++/*
++ * Note about RCU :
++ * It is used to make sure every handler has finished using its private data
++ * between two consecutive operation (add or remove) on a given marker. It is
++ * also used to delay the free of multiple probes array until a quiescent state
++ * is reached.
++ * marker entries modifications are protected by the markers_mutex.
++ */
++struct marker_entry {
++ struct hlist_node hlist;
++ char *format;
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++ int refcount; /* Number of times armed. 0 if disarmed. */
++ struct rcu_head rcu;
++ void *oldptr;
++ char rcu_pending:1;
++ char ptype:1;
++ char name[0]; /* Contains name'\0'format'\0' */
++};
++
++static struct hlist_head marker_table[MARKER_TABLE_SIZE];
++
++/**
++ * __mark_empty_function - Empty probe callback
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @...: variable argument list
++ *
++ * Empty callback provided as a probe to the markers. By providing this to a
++ * disabled marker, we make sure the execution flow is always valid even
++ * though the function pointer change and the marker enabling are two distinct
++ * operations that modifies the execution flow of preemptible code.
++ */
++void __mark_empty_function(void *probe_private, void *call_private,
++ const char *fmt, va_list *args)
++{
++}
++EXPORT_SYMBOL_GPL(__mark_empty_function);
++
++/*
++ * marker_probe_cb Callback that prepares the variable argument list for probes.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
++ * need to put a full smp_rmb() in this branch. This is why we do not use
++ * rcu_dereference() for the pointer read.
++ */
++void marker_probe_cb(const struct marker *mdata, void *call_private,
++ const char *fmt, ...)
++{
++ va_list args;
++ char ptype;
++
++ /*
++ * disabling preemption to make sure the teardown of the callbacks can
++ * be done correctly when they are in modules and they insure RCU read
++ * coherency.
++ */
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ va_start(args, fmt);
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ va_end(args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++) {
++ va_start(args, fmt);
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ va_end(args);
++ }
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb);
++
++/*
++ * marker_probe_cb Callback that does not prepare the variable argument list.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Should be connected to markers "MARK_NOARGS".
++ */
++void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...)
++{
++ va_list args; /* not initialized */
++ char ptype;
++
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++)
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
++
++static void free_old_closure(struct rcu_head *head)
++{
++ struct marker_entry *entry = container_of(head,
++ struct marker_entry, rcu);
++ kfree(entry->oldptr);
++ /* Make sure we free the data before setting the pending flag to 0 */
++ smp_wmb();
++ entry->rcu_pending = 0;
++}
++
++static void debug_print_probes(struct marker_entry *entry)
++{
++ int i;
++
++ if (!marker_debug)
++ return;
++
++ if (!entry->ptype) {
++ printk(KERN_DEBUG "Single probe : %p %p\n",
++ entry->single.func,
++ entry->single.probe_private);
++ } else {
++ for (i = 0; entry->multi[i].func; i++)
++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
++ entry->multi[i].func,
++ entry->multi[i].probe_private);
++ }
++}
++
++static struct marker_probe_closure *
++marker_entry_add_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0;
++ struct marker_probe_closure *old, *new;
++
++ WARN_ON(!probe);
++
++ debug_print_probes(entry);
++ old = entry->multi;
++ if (!entry->ptype) {
++ if (entry->single.func == probe &&
++ entry->single.probe_private == probe_private)
++ return ERR_PTR(-EBUSY);
++ if (entry->single.func == __mark_empty_function) {
++ /* 0 -> 1 probes */
++ entry->single.func = probe;
++ entry->single.probe_private = probe_private;
++ entry->refcount = 1;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* 1 -> 2 probes */
++ nr_probes = 1;
++ old = NULL;
++ }
++ } else {
++ /* (N -> N+1), (N != 0, 1) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++)
++ if (old[nr_probes].func == probe
++ && old[nr_probes].probe_private
++ == probe_private)
++ return ERR_PTR(-EBUSY);
++ }
++ /* + 2 : one for new probe, one for NULL func */
++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
++ GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ if (!old)
++ new[0] = entry->single;
++ else
++ memcpy(new, old,
++ nr_probes * sizeof(struct marker_probe_closure));
++ new[nr_probes].func = probe;
++ new[nr_probes].probe_private = probe_private;
++ entry->refcount = nr_probes + 1;
++ entry->multi = new;
++ entry->ptype = 1;
++ debug_print_probes(entry);
++ return old;
++}
++
++static struct marker_probe_closure *
++marker_entry_remove_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0, nr_del = 0, i;
++ struct marker_probe_closure *old, *new;
++
++ old = entry->multi;
++
++ debug_print_probes(entry);
++ if (!entry->ptype) {
++ /* 0 -> N is an error */
++ WARN_ON(entry->single.func == __mark_empty_function);
++ /* 1 -> 0 probes */
++ WARN_ON(probe && entry->single.func != probe);
++ WARN_ON(entry->single.probe_private != probe_private);
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* (N -> M), (N > 1, M >= 0) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
++ if ((!probe || old[nr_probes].func == probe)
++ && old[nr_probes].probe_private
++ == probe_private)
++ nr_del++;
++ }
++ }
++
++ if (nr_probes - nr_del == 0) {
++ /* N -> 0, (N > 1) */
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ } else if (nr_probes - nr_del == 1) {
++ /* N -> 1, (N > 1) */
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ entry->single = old[i];
++ entry->refcount = 1;
++ entry->ptype = 0;
++ } else {
++ int j = 0;
++ /* N -> M, (N > 1, M > 1) */
++ /* + 1 for NULL */
++ new = kzalloc((nr_probes - nr_del + 1)
++ * sizeof(struct marker_probe_closure), GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ new[j++] = old[i];
++ entry->refcount = nr_probes - nr_del;
++ entry->ptype = 1;
++ entry->multi = new;
++ }
++ debug_print_probes(entry);
++ return old;
++}
++
++/*
++ * Get marker if the marker is present in the marker hash table.
++ * Must be called with markers_mutex held.
++ * Returns NULL if not present.
++ */
++static struct marker_entry *get_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ u32 hash = jhash(name, strlen(name), 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name))
++ return e;
++ }
++ return NULL;
++}
++
++/*
++ * Add the marker to the marker hash table. Must be called with markers_mutex
++ * held.
++ */
++static struct marker_entry *add_marker(const char *name, const char *format)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ size_t format_len = 0;
++ u32 hash = jhash(name, name_len-1, 0);
++
++ if (format)
++ format_len = strlen(format) + 1;
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ printk(KERN_NOTICE
++ "Marker %s busy\n", name);
++ return ERR_PTR(-EBUSY); /* Already there */
++ }
++ }
++ /*
++ * Using kmalloc here to allocate a variable length element. Could
++ * cause some memory fragmentation if overused.
++ */
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return ERR_PTR(-ENOMEM);
++ memcpy(&e->name[0], name, name_len);
++ if (format) {
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ } else {
++ e->format = NULL;
++ e->call = marker_probe_cb;
++ }
++ e->single.func = __mark_empty_function;
++ e->single.probe_private = NULL;
++ e->multi = NULL;
++ e->ptype = 0;
++ e->refcount = 0;
++ e->rcu_pending = 0;
++ hlist_add_head(&e->hlist, head);
++ return e;
++}
++
++/*
++ * Remove the marker from the marker hash table. Must be called with mutex_lock
++ * held.
++ */
++static int remove_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ int found = 0;
++ size_t len = strlen(name) + 1;
++ u32 hash = jhash(name, len-1, 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ found = 1;
++ break;
++ }
++ }
++ if (!found)
++ return -ENOENT;
++ if (e->single.func != __mark_empty_function)
++ return -EBUSY;
++ hlist_del(&e->hlist);
++ /* Make sure the call_rcu has been executed */
++ if (e->rcu_pending)
++ rcu_barrier();
++ kfree(e);
++ return 0;
++}
++
++/*
++ * Set the mark_entry format to the format found in the element.
++ */
++static int marker_set_format(struct marker_entry **entry, const char *format)
++{
++ struct marker_entry *e;
++ size_t name_len = strlen((*entry)->name) + 1;
++ size_t format_len = strlen(format) + 1;
++
++
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return -ENOMEM;
++ memcpy(&e->name[0], (*entry)->name, name_len);
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ e->single = (*entry)->single;
++ e->multi = (*entry)->multi;
++ e->ptype = (*entry)->ptype;
++ e->refcount = (*entry)->refcount;
++ e->rcu_pending = 0;
++ hlist_add_before(&e->hlist, &(*entry)->hlist);
++ hlist_del(&(*entry)->hlist);
++ /* Make sure the call_rcu has been executed */
++ if ((*entry)->rcu_pending)
++ rcu_barrier();
++ kfree(*entry);
++ *entry = e;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ return 0;
++}
++
++/*
++ * Sets the probe callback corresponding to one marker.
++ */
++static int set_marker(struct marker_entry **entry, struct marker *elem,
++ int active)
++{
++ int ret;
++ WARN_ON(strcmp((*entry)->name, elem->name) != 0);
++
++ if ((*entry)->format) {
++ if (strcmp((*entry)->format, elem->format) != 0) {
++ printk(KERN_NOTICE
++ "Format mismatch for probe %s "
++ "(%s), marker (%s)\n",
++ (*entry)->name,
++ (*entry)->format,
++ elem->format);
++ return -EPERM;
++ }
++ } else {
++ ret = marker_set_format(entry, elem->format);
++ if (ret)
++ return ret;
++ }
++
++ /*
++ * probe_cb setup (statically known) is done here. It is
++ * asynchronous with the rest of execution, therefore we only
++ * pass from a "safe" callback (with argument) to an "unsafe"
++ * callback (does not set arguments).
++ */
++ elem->call = (*entry)->call;
++ /*
++ * Sanity check :
++ * We only update the single probe private data when the ptr is
++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
++ */
++ WARN_ON(elem->single.func != __mark_empty_function
++ && elem->single.probe_private
++ != (*entry)->single.probe_private &&
++ !elem->ptype);
++ elem->single.probe_private = (*entry)->single.probe_private;
++ /*
++ * Make sure the private data is valid when we update the
++ * single probe ptr.
++ */
++ smp_wmb();
++ elem->single.func = (*entry)->single.func;
++ /*
++ * We also make sure that the new probe callbacks array is consistent
++ * before setting a pointer to it.
++ */
++ rcu_assign_pointer(elem->multi, (*entry)->multi);
++ /*
++ * Update the function or multi probe array pointer before setting the
++ * ptype.
++ */
++ smp_wmb();
++ elem->ptype = (*entry)->ptype;
++ elem->state = active;
++
++ return 0;
++}
++
++/*
++ * Disable a marker and its probe callback.
++ * Note: only after a synchronize_sched() issued after setting elem->call to the
++ * empty function insures that the original callback is not used anymore. This
++ * insured by preemption disabling around the call site.
++ */
++static void disable_marker(struct marker *elem)
++{
++ /* leave "call" as is. It is known statically. */
++ elem->state = 0;
++ elem->single.func = __mark_empty_function;
++ /* Update the function before setting the ptype */
++ smp_wmb();
++ elem->ptype = 0; /* single probe */
++ /*
++ * Leave the private data and id there, because removal is racy and
++ * should be done only after a synchronize_sched(). These are never used
++ * until the next initialization anyway.
++ */
++}
++
++/**
++ * marker_update_probe_range - Update a probe range
++ * @begin: beginning of the range
++ * @end: end of the range
++ *
++ * Updates the probe callback corresponding to a range of markers.
++ */
++void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{
++ struct marker *iter;
++ struct marker_entry *mark_entry;
++
++ mutex_lock(&markers_mutex);
++ for (iter = begin; iter < end; iter++) {
++ mark_entry = get_marker(iter->name);
++ if (mark_entry) {
++ set_marker(&mark_entry, iter,
++ !!mark_entry->refcount);
++ /*
++ * ignore error, continue
++ */
++ } else {
++ disable_marker(iter);
++ }
++ }
++ mutex_unlock(&markers_mutex);
++}
++
++/*
++ * Update probes, removing the faulty probes.
++ * Issues a synchronize_sched() when no reference to the module passed
++ * as parameter is found in the probes so the probe module can be
++ * safely unloaded from now on.
++ *
++ * Internal callback only changed before the first probe is connected to it.
++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
++ * transitions. All other transitions will leave the old private data valid.
++ * This makes the non-atomicity of the callback/private data updates valid.
++ *
++ * "special case" updates :
++ * 0 -> 1 callback
++ * 1 -> 0 callback
++ * 1 -> 2 callbacks
++ * 2 -> 1 callbacks
++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
++ * Site effect : marker_set_format may delete the marker entry (creating a
++ * replacement).
++ */
++static void marker_update_probes(void)
++{
++ /* Core kernel markers */
++ marker_update_probe_range(__start___markers, __stop___markers);
++ /* Markers in modules. */
++ module_update_markers();
++}
++
++/**
++ * marker_probe_register - Connect a probe to a marker
++ * @name: marker name
++ * @format: format string
++ * @probe: probe handler
++ * @probe_private: probe private data
++ *
++ * private data must be a valid allocated memory address, or NULL.
++ * Returns 0 if ok, error value on error.
++ * The probe address must at least be aligned on the architecture pointer size.
++ */
++int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ entry = add_marker(name, format);
++ if (IS_ERR(entry)) {
++ ret = PTR_ERR(entry);
++ goto end;
++ }
++ }
++ /*
++ * If we detect that a call_rcu is pending for this marker,
++ * make sure it's executed now.
++ */
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_add_probe(entry, probe, probe_private);
++ if (IS_ERR(old)) {
++ ret = PTR_ERR(old);
++ goto end;
++ }
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_register);
++
++/**
++ * marker_probe_unregister - Disconnect a probe from a marker
++ * @name: marker name
++ * @probe: probe function pointer
++ * @probe_private: probe private data
++ *
++ * Returns the private data given to marker_probe_register, or an ERR_PTR().
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ struct marker_probe_closure *old;
++ int ret = 0;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, probe, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister);
++
++static struct marker_entry *
++get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ unsigned int i;
++ struct hlist_head *head;
++ struct hlist_node *node;
++
++ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
++ head = &marker_table[i];
++ hlist_for_each_entry(entry, node, head, hlist) {
++ if (!entry->ptype) {
++ if (entry->single.func == probe
++ && entry->single.probe_private
++ == probe_private)
++ return entry;
++ } else {
++ struct marker_probe_closure *closure;
++ closure = entry->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func == probe &&
++ closure[i].probe_private
++ == probe_private)
++ return entry;
++ }
++ }
++ }
++ }
++ return NULL;
++}
++
++/**
++ * marker_probe_unregister_private_data - Disconnect a probe from a marker
++ * @probe: probe function
++ * @probe_private: probe private data
++ *
++ * Unregister a probe by providing the registered private data.
++ * Only removes the first marker found in hash table.
++ * Return 0 on success or error value.
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, NULL, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(entry->name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
++
++/**
++ * marker_get_private_data - Get a marker's probe private data
++ * @name: marker name
++ * @probe: probe to match
++ * @num: get the nth matching probe's private data
++ *
++ * Returns the nth private data pointer (starting from 0) matching, or an
++ * ERR_PTR.
++ * Returns the private data pointer, or an ERR_PTR.
++ * The private data pointer should _only_ be dereferenced if the caller is the
++ * owner of the data, or its content could vanish. This is mostly used to
++ * confirm that a caller is the owner of a registered probe.
++ */
++void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ u32 hash = jhash(name, name_len-1, 0);
++ int i;
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ if (!e->ptype) {
++ if (num == 0 && e->single.func == probe)
++ return e->single.probe_private;
++ else
++ break;
++ } else {
++ struct marker_probe_closure *closure;
++ int match = 0;
++ closure = e->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func != probe)
++ continue;
++ if (match++ == num)
++ return closure[i].probe_private;
++ }
++ }
++ }
++ }
++ return ERR_PTR(-ENOENT);
++}
++EXPORT_SYMBOL_GPL(marker_get_private_data);
+diff --git a/kernel/module.c b/kernel/module.c
+index 18b39bc..096c3dc 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -1504,6 +1504,8 @@ static struct module *load_module(void __user *umod,
+ void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+ struct exception_table_entry *extable;
+ int gpgsig_ok;
++ unsigned int markersindex;
++ unsigned int markersstringsindex;
+
+ DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
+ umod, len, uargs);
+@@ -1734,6 +1736,9 @@ static struct module *load_module(void __user *umod,
+ tainted |= TAINT_FORCED_MODULE;
+ }
+ #endif
++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
++ markersstringsindex = find_sec(hdr, sechdrs, secstrings,
++ "__markers_strings");
+
+ /* Now do relocations. */
+ for (i = 1; i < hdr->e_shnum; i++) {
+@@ -1757,6 +1762,12 @@ static struct module *load_module(void __user *umod,
+ goto cleanup;
+ }
+
++#ifdef CONFIG_MARKERS
++ mod->markers = (void *)sechdrs[markersindex].sh_addr;
++ mod->num_markers =
++ sechdrs[markersindex].sh_size / sizeof(*mod->markers);
++#endif
++
+ /* Set up and sort exception table */
+ mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable);
+ mod->extable = extable = (void *)sechdrs[exindex].sh_addr;
+@@ -1768,6 +1779,12 @@ static struct module *load_module(void __user *umod,
+
+ add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+
++#ifdef CONFIG_MARKERS
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++#endif
++
+ err = module_finalize(hdr, sechdrs, mod);
+ if (err < 0)
+ goto cleanup;
+@@ -2175,6 +2192,20 @@ void struct_module(struct module *mod) { return; }
+ EXPORT_SYMBOL(struct_module);
+ #endif
+
++#ifdef CONFIG_MARKERS
++void module_update_markers(void)
++{
++ struct module *mod;
++
++ down(&module_mutex);
++ list_for_each_entry(mod, &modules, list)
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++ up(&module_mutex);
++}
++#endif
++
+ static int __init modules_init(void)
+ {
+ return subsystem_register(&module_subsys);
+diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
+index 1b16bfc..20ade31 100644
+--- a/kernel/rcupdate.c
++++ b/kernel/rcupdate.c
+@@ -46,6 +46,7 @@
+ #include <linux/notifier.h>
+ #include <linux/rcupdate.h>
+ #include <linux/cpu.h>
++#include <linux/mutex.h>
+
+ /* Definition for rcupdate control block. */
+ struct rcu_ctrlblk rcu_ctrlblk =
+@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head,
+ local_irq_restore(flags);
+ }
+
++static atomic_t rcu_barrier_cpu_count;
++static DEFINE_MUTEX(rcu_barrier_mutex);
++static struct completion rcu_barrier_completion;
++
+ /**
+ * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self,
+ return NOTIFY_OK;
+ }
+
++static void rcu_barrier_callback(struct rcu_head *notused)
++{
++ if (atomic_dec_and_test(&rcu_barrier_cpu_count))
++ complete(&rcu_barrier_completion);
++}
++
++/*
++ * Called with preemption disabled, and from cross-cpu IRQ context.
++ */
++static void rcu_barrier_func(void *notused)
++{
++ int cpu = smp_processor_id();
++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
++ struct rcu_head *head;
++
++ head = &rdp->barrier;
++ atomic_inc(&rcu_barrier_cpu_count);
++ call_rcu(head, rcu_barrier_callback);
++}
++
++/**
++ * rcu_barrier - Wait until all the in-flight RCUs are complete.
++ */
++void rcu_barrier(void)
++{
++ BUG_ON(in_interrupt());
++ /* Take cpucontrol mutex to protect against CPU hotplug */
++ mutex_lock(&rcu_barrier_mutex);
++ init_completion(&rcu_barrier_completion);
++ atomic_set(&rcu_barrier_cpu_count, 0);
++ on_each_cpu(rcu_barrier_func, NULL, 0, 1);
++ wait_for_completion(&rcu_barrier_completion);
++ mutex_unlock(&rcu_barrier_mutex);
++}
++EXPORT_SYMBOL_GPL(rcu_barrier);
++
++
++
+ static struct notifier_block __devinitdata rcu_nb = {
+ .notifier_call = rcu_cpu_notify,
+ };
+diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
+index b3d31b5..b100a32 100644
+--- a/scripts/Makefile.modpost
++++ b/scripts/Makefile.modpost
+@@ -13,6 +13,7 @@
+ # 2) modpost is then used to
+ # 3) create one <module>.mod.c file pr. module
+ # 4) create one Module.symvers file with CRC for all exported symbols
++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
+ # 5) compile all <module>.mod.c files
+ # 6) final link of the module to a <module.ko> file
+
+@@ -40,6 +41,11 @@ include scripts/Makefile.lib
+
+ symverfile := $(objtree)/Module.symvers
+
++kernelmarkersfile := $(objtree)/Module.markers
++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
++
++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
++
+ # Step 1), find all modules listed in $(MODVERDIR)/
+ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
+ modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o)))
+@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST
+ cmd_modpost = scripts/mod/modpost \
+ $(if $(CONFIG_MODVERSIONS),-m) \
+ $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \
++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \
+ $(filter-out FORCE,$^)
+
+ .PHONY: __modpost
+ __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE
+ $(call cmd,modpost)
+
++quiet_cmd_kernel-mod = MODPOST $@
++ cmd_kernel-mod = $(cmd_modpost) $@
++
++vmlinux.o: FORCE
++ $(call cmd,kernel-mod)
++
+ # Declare generated files as targets for modpost
+ $(symverfile): __modpost ;
+ $(modules:.ko=.mod.c): __modpost ;
+
++ifdef CONFIG_MARKERS
++$(markersfile): __modpost ;
++endif
+
+ # Step 5), compile all *.mod.c files
+
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index 2a174e5..c25948c 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -10,7 +10,8 @@
+ *
+ * Usage: modpost vmlinux module1.o module2.o ...
+ */
+-
++#define _GNU_SOURCE
++#include <stdio.h>
+ #include <ctype.h>
+ #include "modpost.h"
+
+@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename)
+ if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) {
+ info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
+ info->modinfo_len = sechdrs[i].sh_size;
+- }
++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0)
++ info->markers_strings_sec = i;
++
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+
+@@ -397,6 +400,63 @@ is_vmlinux(const char *modname)
+ return strcmp(myname, "vmlinux") == 0;
+ }
+
++static void get_markers(struct elf_info *info, struct module *mod)
++{
++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
++ const char *strings = (const char *) info->hdr + sh->sh_offset;
++ const Elf_Sym *sym, *first_sym, *last_sym;
++ size_t n;
++
++
++ if (!info->markers_strings_sec)
++ return;
++
++ /*
++ * First count the strings. We look for all the symbols defined
++ * in the __markers_strings section named __mstrtab_*. For
++ * these local names, the compiler puts a random .NNN suffix on,
++ * so the names don't correspond exactly.
++ */
++ first_sym = last_sym = NULL;
++ n = 0;
++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ if (first_sym == NULL)
++ first_sym = sym;
++ last_sym = sym;
++ ++n;
++ }
++
++ if (n == 0)
++ return;
++ /*
++ * Now collect each name and format into a line for the output.
++ * Lines look like:
++ * marker_name vmlinux marker %s format %d
++ * The format string after the second \t can use whitespace.
++ */
++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
++ mod->nmarkers = n;
++
++ n = 0;
++ for (sym = first_sym; sym <= last_sym; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ const char *name = strings + sym->st_value;
++ const char *fmt = strchr(name, '\0') + 1;
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++ mod->markers[n++] = line;
++ }
++}
++
++
+ void
+ read_symbols(char *modname)
+ {
+@@ -426,6 +486,7 @@ read_symbols(char *modname)
+ }
+ maybe_frob_version(modname, info.modinfo, info.modinfo_len,
+ (void *)info.modinfo - (void *)info.hdr);
++ get_markers(&info, mod);
+ parse_elf_finish(&info);
+
+ /* Our trick to get versioning for struct_module - it's
+@@ -682,6 +743,92 @@ write_dump(const char *fname)
+ write_if_changed(&buf, fname);
+ }
+
++static void add_marker(struct module *mod, const char *name, const char *fmt)
++{
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++
++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
++ sizeof mod->markers[0])));
++ mod->markers[mod->nmarkers++] = line;
++}
++
++static void read_markers(const char *fname)
++{
++ unsigned long size, pos = 0;
++ void *file = grab_file(fname, &size);
++ char *line;
++
++ if (!file) /* No old markers, silently ignore */
++ return;
++
++ while ((line = get_next_line(&pos, file, size))) {
++ char *marker, *modname, *fmt;
++ struct module *mod;
++
++ marker = line;
++ modname = strchr(marker, '\t');
++ if (!modname)
++ goto fail;
++ *modname++ = '\0';
++ fmt = strchr(modname, '\t');
++ if (!fmt)
++ goto fail;
++ *fmt++ = '\0';
++ if (*marker == '\0' || *modname == '\0')
++ goto fail;
++
++ mod = find_module(modname);
++ if (!mod) {
++ if (is_vmlinux(modname))
++ have_vmlinux = 1;
++ mod = new_module(NOFAIL(strdup(modname)));
++ mod->skip = 1;
++ }
++
++ add_marker(mod, marker, fmt);
++ }
++ return;
++fail:
++ fatal("parse error in markers list file\n");
++}
++
++static int compare_strings(const void *a, const void *b)
++{
++ return strcmp(*(const char **) a, *(const char **) b);
++}
++
++static void write_markers(const char *fname)
++{
++ struct buffer buf = { };
++ struct module *mod;
++ size_t i;
++
++ for (mod = modules; mod; mod = mod->next)
++ if (mod->markers != NULL) {
++ /*
++ * Sort the strings so we can skip duplicates when
++ * we write them out.
++ */
++ qsort(mod->markers, mod->nmarkers,
++ sizeof mod->markers[0], &compare_strings);
++ for (i = 0; i < mod->nmarkers; ++i) {
++ char *line = mod->markers[i];
++ buf_write(&buf, line, strlen(line));
++ while (i + 1 < mod->nmarkers &&
++ !strcmp(mod->markers[i],
++ mod->markers[i + 1]))
++ free(mod->markers[i++]);
++ free(mod->markers[i]);
++ }
++ free(mod->markers);
++ mod->markers = NULL;
++ }
++
++ write_if_changed(&buf, fname);
++}
++
+ int
+ main(int argc, char **argv)
+ {
+@@ -690,8 +837,10 @@ main(int argc, char **argv)
+ char fname[SZ];
+ char *dump_read = NULL, *dump_write = NULL;
+ int opt;
++ char *markers_read = NULL;
++ char *markers_write = NULL;
+
+- while ((opt = getopt(argc, argv, "i:mo:")) != -1) {
++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) {
+ switch(opt) {
+ case 'i':
+ dump_read = optarg;
+@@ -702,6 +851,12 @@ main(int argc, char **argv)
+ case 'o':
+ dump_write = optarg;
+ break;
++ case 'M':
++ markers_write = optarg;
++ break;
++ case 'K':
++ markers_read = optarg;
++ break;
+ default:
+ exit(1);
+ }
+@@ -732,6 +887,12 @@ main(int argc, char **argv)
+ if (dump_write)
+ write_dump(dump_write);
+
++ if (markers_read)
++ read_markers(markers_read);
++
++ if (markers_write)
++ write_markers(markers_write);
++
+ return 0;
+ }
+
+diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
+index 4871343..d79d7ea 100644
+--- a/scripts/mod/modpost.h
++++ b/scripts/mod/modpost.h
+@@ -18,6 +18,7 @@
+ #define Elf_Sym Elf32_Sym
+ #define ELF_ST_BIND ELF32_ST_BIND
+ #define ELF_ST_TYPE ELF32_ST_TYPE
++#define Elf_Section Elf32_Half
+
+ #else
+
+@@ -26,7 +27,7 @@
+ #define Elf_Sym Elf64_Sym
+ #define ELF_ST_BIND ELF64_ST_BIND
+ #define ELF_ST_TYPE ELF64_ST_TYPE
+-
++#define Elf_Section Elf64_Half
+ #endif
+
+ #if KERNEL_ELFDATA != HOST_ELFDATA
+@@ -77,6 +78,8 @@ struct module {
+ int has_init;
+ int has_cleanup;
+ struct buffer dev_table_buf;
++ char **markers;
++ size_t nmarkers;
+ };
+
+ struct elf_info {
+@@ -85,6 +88,7 @@ struct elf_info {
+ Elf_Shdr *sechdrs;
+ Elf_Sym *symtab_start;
+ Elf_Sym *symtab_stop;
++ Elf_Section markers_strings_sec;
+ const char *strtab;
+ char *modinfo;
+ unsigned int modinfo_len;
Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch
===================================================================
--- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch (rev 0)
+++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u5.patch 2008-07-04 02:18:23 UTC (rev 6)
@@ -0,0 +1,1917 @@
+diff --git a/Makefile b/Makefile
+index 5bfc101..d495f16 100644
+--- a/Makefile
++++ b/Makefile
+@@ -583,7 +583,7 @@ quiet_cmd_vmlinux__ ?= LD $@
+ cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
+ -T $(vmlinux-lds) $(vmlinux-init) \
+ --start-group $(vmlinux-main) --end-group \
+- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^)
++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^)
+
+ # Generate new vmlinux version
+ quiet_cmd_vmlinux_version = GEN .version
+@@ -688,10 +688,30 @@ $(KALLSYMS): scripts ;
+
+ endif # ifdef CONFIG_KALLSYMS
+
++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has
++# relevant sections renamed as per the linker script.
++quiet_cmd_vmlinux-modpost = LD $@
++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \
++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \
++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^)
++define rule_vmlinux-modpost
++ :
++ +$(call cmd,vmlinux-modpost)
++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@
++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd
++endef
++
++
+ # vmlinux image - including updated kernel symbols
+-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE
++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE
++ $(call vmlinux-modpost)
+ $(call if_changed_rule,vmlinux__)
+
++# build vmlinux.o first to catch section mismatch errors early
++$(kallsyms.o): vmlinux.o
++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE
++ $(call if_changed_rule,vmlinux-modpost)
++
+ # The actual objects are generated when descending,
+ # make sure no implicit rule kicks in
+ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
+diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
+index cf069b7..d39d5c5 100644
+--- a/arch/i386/Kconfig.debug
++++ b/arch/i386/Kconfig.debug
+@@ -29,6 +29,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
+index e8db99c..b846b21 100644
+--- a/arch/i386/kernel/vmlinux.lds.S
++++ b/arch/i386/kernel/vmlinux.lds.S
+@@ -53,6 +53,7 @@ SECTIONS
+ /* writeable */
+ .data : { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+
+diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug
+index 2782b48..17d4a44 100644
+--- a/arch/ia64/Kconfig.debug
++++ b/arch/ia64/Kconfig.debug
+@@ -12,6 +12,12 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
+
+ choice
+ prompt "Physical memory granularity"
+diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
+index a676e79..c6ea47e 100644
+--- a/arch/ia64/kernel/vmlinux.lds.S
++++ b/arch/ia64/kernel/vmlinux.lds.S
+@@ -193,7 +193,7 @@ SECTIONS
+
+ data : { } :data
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
+- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS }
+
+ . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
+diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug
+index 860472a..5c3a9f5 100644
+--- a/arch/ppc64/Kconfig.debug
++++ b/arch/ppc64/Kconfig.debug
+@@ -16,6 +16,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S
+index 4103cc1..76d0bf0 100644
+--- a/arch/ppc64/kernel/vmlinux.lds.S
++++ b/arch/ppc64/kernel/vmlinux.lds.S
+@@ -118,6 +118,7 @@ SECTIONS
+ .data : {
+ *(.data .data.rel* .toc1)
+ *(.branch_lt)
++ MARKER
+ }
+
+ .opd : {
+diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
+index cd8d39f..f39bf8d 100644
+--- a/arch/sparc64/Kconfig.debug
++++ b/arch/sparc64/Kconfig.debug
+@@ -21,6 +21,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_DCFLUSH
+ bool "D-cache flush debugging"
+ depends on DEBUG_KERNEL
+diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
+index a710d38..d4b3b62 100644
+--- a/arch/sparc64/kernel/vmlinux.lds.S
++++ b/arch/sparc64/kernel/vmlinux.lds.S
+@@ -27,6 +27,7 @@ SECTIONS
+ .data :
+ {
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+ .data1 : { *(.data1) }
+diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
+index cb33186..d0260cb 100644
+--- a/arch/x86_64/Kconfig.debug
++++ b/arch/x86_64/Kconfig.debug
+@@ -55,6 +55,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config IOMMU_LEAK
+ bool "IOMMU leak tracing"
+ depends on DEBUG_KERNEL
+diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
+index 053c826..3b3928f 100644
+--- a/arch/x86_64/kernel/vmlinux.lds.S
++++ b/arch/x86_64/kernel/vmlinux.lds.S
+@@ -33,6 +33,7 @@ SECTIONS
+
+ .data : { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 181f3d2..c7c5dc6 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -158,7 +158,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator);
+
+ // ================= main 802.3ad protocol functions ==================
+ static int ad_lacpdu_send(struct port *port);
+-static int ad_marker_send(struct port *port, struct marker *marker);
++static int ad_marker_send(struct port *port, struct bond_marker *marker);
+ static void ad_mux_machine(struct port *port);
+ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
+ static void ad_tx_machine(struct port *port);
+@@ -171,8 +171,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast);
+ static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
+ static void ad_enable_collecting_distributing(struct port *port);
+ static void ad_disable_collecting_distributing(struct port *port);
+-static void ad_marker_info_received(struct marker *marker_info, struct port *port);
+-static void ad_marker_response_received(struct marker *marker, struct port *port);
++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
++static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
+
+
+ /////////////////////////////////////////////////////////////////////////////////
+@@ -945,12 +945,13 @@ static int ad_lacpdu_send(struct port *port)
+ * Returns: 0 on success
+ * < 0 on error
+ */
+-static int ad_marker_send(struct port *port, struct marker *marker)
++static int ad_marker_send(struct port *port, struct bond_marker *marker)
+ {
+ struct slave *slave = port->slave;
+ struct sk_buff *skb;
+- struct marker_header *marker_header;
+- int length = sizeof(struct marker_header);
++ struct bond_marker_header *marker_header;
++ int length = sizeof(struct bond_marker_header);
++
+ struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
+
+ skb = dev_alloc_skb(length + 16);
+@@ -965,7 +966,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
+ skb->nh.raw = skb->data + ETH_HLEN;
+ skb->protocol = PKT_TYPE_LACPDU;
+
+- marker_header = (struct marker_header *)skb_put(skb, length);
++ marker_header = (struct bond_marker_header *)skb_put(skb, length);
+
+ marker_header->ad_header.destination_address = lacpdu_multicast_address;
+ /* Note: source addres is set to be the member's PERMANENT address, because we use it
+@@ -1762,7 +1763,7 @@ static void ad_disable_collecting_distributing(struct port *port)
+ */
+ static void ad_marker_info_send(struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+ u16 index;
+
+ // fill the marker PDU with the appropriate values
+@@ -1795,13 +1796,14 @@ static void ad_marker_info_send(struct port *port)
+ * @port: the port we're looking at
+ *
+ */
+-static void ad_marker_info_received(struct marker *marker_info,struct port *port)
++static void ad_marker_info_received(struct bond_marker *marker_info,
++ struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+
+ // copy the received marker data to the response marker
+ //marker = *marker_info;
+- memcpy(&marker, marker_info, sizeof(struct marker));
++ memcpy(&marker, marker_info, sizeof(struct bond_marker));
+ // change the marker subtype to marker response
+ marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
+ // send the marker response
+@@ -1820,7 +1822,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port
+ * response for marker PDU's, in this stage, but only to respond to marker
+ * information.
+ */
+-static void ad_marker_response_received(struct marker *marker, struct port *port)
++static void ad_marker_response_received(struct bond_marker *marker,
++ struct port *port)
+ {
+ marker=NULL; // just to satisfy the compiler
+ port=NULL; // just to satisfy the compiler
+@@ -2208,15 +2211,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng
+ case AD_TYPE_MARKER:
+ // No need to convert fields to Little Endian since we don't use the marker's fields.
+
+- switch (((struct marker *)lacpdu)->tlv_type) {
++ switch (((struct bond_marker *)lacpdu)->tlv_type) {
+ case AD_MARKER_INFORMATION_SUBTYPE:
+ dprintk("Received Marker Information on port %d\n", port->actor_port_number);
+- ad_marker_info_received((struct marker *)lacpdu, port);
++ ad_marker_info_received((struct bond_marker *)lacpdu, port);
++
+ break;
+
+ case AD_MARKER_RESPONSE_SUBTYPE:
+ dprintk("Received Marker Response on port %d\n", port->actor_port_number);
+- ad_marker_response_received((struct marker *)lacpdu, port);
++ ad_marker_response_received((struct bond_marker *)lacpdu, port);
+ break;
+
+ default:
+diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
+index 4c60b17..2bb477e 100644
+--- a/drivers/net/bonding/bond_3ad.h
++++ b/drivers/net/bonding/bond_3ad.h
+@@ -105,7 +105,8 @@ typedef enum {
+ typedef enum {
+ AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
+ AD_MARKER_RESPONSE_SUBTYPE // marker response subtype
+-} marker_subtype_t;
++} bond_marker_subtype_t;
++
+
+ // timers types(43.4.9 in the 802.3ad standard)
+ typedef enum {
+@@ -161,7 +162,7 @@ typedef struct lacpdu_header {
+ } lacpdu_header_t;
+
+ // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
+-typedef struct marker {
++typedef struct bond_marker {
+ u8 subtype; // = 0x02 (marker PDU)
+ u8 version_number; // = 0x01
+ u8 tlv_type; // = 0x01 (marker information)
+@@ -174,12 +175,12 @@ typedef struct marker {
+ u8 tlv_type_terminator; // = 0x00
+ u8 terminator_length; // = 0x00
+ u8 reserved_90[90]; // = 0
+-} marker_t;
++} bond_marker_t;
+
+-typedef struct marker_header {
++typedef struct bond_marker_header {
+ struct ad_header ad_header;
+- struct marker marker;
+-} marker_header_t;
++ struct bond_marker marker;
++} bond_marker_header_t;
+
+ #pragma pack()
+
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index bb340cb..fd6e57f 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -6,10 +6,18 @@
+ #define VMLINUX_SYMBOL(_sym_) _sym_
+ #endif
+
++/* Kernel markers : pointers */
++#define MARKER \
++ . = ALIGN(8); \
++ VMLINUX_SYMBOL(__start___markers) = .; \
++ *(__markers) \
++ VMLINUX_SYMBOL(__stop___markers) = .;
++
+ #define RODATA \
+ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
+ *(.rodata) *(.rodata.*) \
+ *(__vermagic) /* Kernel version magic */ \
++ *(__markers_strings) /* Markers: strings */ \
+ } \
+ \
+ .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
+diff --git a/include/linux/marker.h b/include/linux/marker.h
+new file mode 100644
+index 0000000..efbc82b
+--- /dev/null
++++ b/include/linux/marker.h
+@@ -0,0 +1,139 @@
++#ifndef _LINUX_MARKER_H
++#define _LINUX_MARKER_H
++
++/*
++ * Code markup for dynamic and static tracing.
++ *
++ * See Documentation/marker.txt.
++ *
++ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
++ *
++ * This file is released under the GPLv2.
++ * See the file COPYING for more details.
++ */
++
++#include <linux/types.h>
++
++struct module;
++struct marker;
++
++/**
++ * marker_probe_func - Type of a marker probe function
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @args: variable argument list pointer. Use a pointer to overcome C's
++ * inability to pass this around as a pointer in a portable manner in
++ * the callee otherwise.
++ *
++ * Type of marker probe functions. They receive the mdata and need to parse the
++ * format string to recover the variable argument list.
++ */
++typedef void marker_probe_func(void *probe_private, void *call_private,
++ const char *fmt, va_list *args);
++
++struct marker_probe_closure {
++ marker_probe_func *func; /* Callback */
++ void *probe_private; /* Private probe data */
++};
++
++struct marker {
++ const char *name; /* Marker name */
++ const char *format; /* Marker format string, describing the
++ * variable argument list.
++ */
++ char state; /* Marker state. */
++ char ptype; /* probe type : 0 : single, 1 : multi */
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++} __attribute__((aligned(8)));
++
++#ifdef CONFIG_MARKERS
++
++/*
++ * Note : the empty asm volatile with read constraint is used here instead of a
++ * "used" attribute to fix a gcc 4.1.x bug.
++ * Make sure the alignment of the structure in the __markers section will
++ * not add unwanted padding between the beginning of the section and the
++ * structure. Force alignment to the same alignment as the section start.
++ */
++#define __trace_mark(name, call_private, format, args...) \
++ do { \
++ static const char __mstrtab_##name[] \
++ __attribute__((section("__markers_strings"))) \
++ = #name "\0" format; \
++ static struct marker __mark_##name \
++ __attribute__((section("__markers"), aligned(8))) = \
++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
++ 0, 0, marker_probe_cb, \
++ { __mark_empty_function, NULL}, NULL }; \
++ __mark_check_format(format, ## args); \
++ if (unlikely(__mark_##name.state)) { \
++ (*__mark_##name.call) \
++ (&__mark_##name, call_private, \
++ format, ## args); \
++ } \
++ } while (0)
++
++extern void marker_update_probe_range(struct marker *begin,
++ struct marker *end);
++#else /* !CONFIG_MARKERS */
++#define __trace_mark(name, call_private, format, args...) \
++ __mark_check_format(format, ## args)
++static inline void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{ }
++#endif /* CONFIG_MARKERS */
++
++/**
++ * trace_mark - Marker
++ * @name: marker name, not quoted.
++ * @format: format string
++ * @args...: variable argument list
++ *
++ * Places a marker.
++ */
++#define trace_mark(name, format, args...) \
++ __trace_mark(name, NULL, format, ## args)
++
++/**
++ * MARK_NOARGS - Format string for a marker with no argument.
++ */
++#define MARK_NOARGS " "
++
++/* To be used for string format validity checking with gcc */
++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...)
++{
++}
++
++extern marker_probe_func __mark_empty_function;
++
++extern void marker_probe_cb(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++extern void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++
++/*
++ * Connect a probe to a marker.
++ * private data pointer must be a valid allocated memory address, or NULL.
++ */
++extern int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private);
++
++/*
++ * Returns the private data given to marker_probe_register.
++ */
++extern int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private);
++/*
++ * Unregister a marker by providing the registered private data.
++ */
++extern int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private);
++
++extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num);
++
++#endif
+diff --git a/include/linux/module.h b/include/linux/module.h
+index 8da8948..2ad5efd 100644
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -18,6 +18,7 @@
+ #include <linux/stringify.h>
+ #include <linux/kobject.h>
+ #include <linux/moduleparam.h>
++#include <linux/marker.h>
+ #include <asm/local.h>
+
+ #include <asm/module.h>
+@@ -328,6 +329,10 @@ struct module
+ /* The command line arguments (may be mangled). People like
+ keeping pointers to this stuff */
+ char *args;
++#ifdef CONFIG_MARKERS
++ struct marker *markers;
++ unsigned int num_markers;
++#endif
+ };
+
+ /* FIXME: It'd be nice to isolate modules during init, too, so they
+@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb);
+ int unregister_module_notifier(struct notifier_block * nb);
+
+ extern void print_modules(void);
++extern void module_update_markers(void);
+ #else /* !CONFIG_MODULES... */
+ #define EXPORT_SYMBOL(sym)
+ #define EXPORT_SYMBOL_GPL(sym)
+@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb)
+ static inline void print_modules(void)
+ {
+ }
++
++static inline void module_update_markers(void)
++{
++}
+ #endif /* CONFIG_MODULES */
+
+ #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x)
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index 91057d6..bcd0acb 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -42,6 +42,19 @@
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+
++/*
++ * Prevent the compiler from merging or refetching accesses. The compiler
++ * is also forbidden from reordering successive instances of ACCESS_ONCE(),
++ * but only when the compiler is aware of some particular ordering. One way
++ * to make the compiler aware of ordering is to put the two invocations of
++ * ACCESS_ONCE() in different C statements.
++ *
++ * This macro does absolutely -nothing- to prevent the CPU from reordering,
++ * merging, or refetching absolutely anything at any time.
++ *
++ */
++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
++
+ /**
+ * struct rcu_head - callback structure for use with RCU
+ * @next: next update requests in a list
+@@ -102,6 +115,7 @@ struct rcu_data {
+ struct rcu_head *donelist;
+ struct rcu_head **donetail;
+ int cpu;
++ struct rcu_head barrier;
+ };
+
+ DECLARE_PER_CPU(struct rcu_data, rcu_data);
+@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head,
+ extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
+ void (*func)(struct rcu_head *head)));
+ extern void synchronize_kernel(void);
+-
++extern void rcu_barrier(void);
+ #endif /* __KERNEL__ */
+ #endif /* __LINUX_RCUPDATE_H */
+diff --git a/kernel/Makefile b/kernel/Makefile
+index 0b8c8ca..f8248bc 100644
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o
+ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+ obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o
+ obj-$(CONFIG_KPROBES) += kprobes.o
++obj-$(CONFIG_MARKERS) += marker.o
+
+ ifneq ($(CONFIG_IA64),y)
+ # According to Alan Modra <alan at linuxcare.com.au>, the -fno-omit-frame-pointer is
+diff --git a/kernel/marker.c b/kernel/marker.c
+new file mode 100644
+index 0000000..c4c2cd8
+--- /dev/null
++++ b/kernel/marker.c
+@@ -0,0 +1,851 @@
++/*
++ * Copyright (C) 2007 Mathieu Desnoyers
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/types.h>
++#include <linux/jhash.h>
++#include <linux/list.h>
++#include <linux/rcupdate.h>
++#include <linux/marker.h>
++#include <linux/err.h>
++
++extern struct marker __start___markers[];
++extern struct marker __stop___markers[];
++
++/* Set to 1 to enable marker debug output */
++const int marker_debug;
++
++/*
++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
++ * and module markers and the hash table.
++ */
++static DEFINE_MUTEX(markers_mutex);
++
++/*
++ * Marker hash table, containing the active markers.
++ * Protected by module_mutex.
++ */
++#define MARKER_HASH_BITS 6
++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
++
++/*
++ * Note about RCU :
++ * It is used to make sure every handler has finished using its private data
++ * between two consecutive operation (add or remove) on a given marker. It is
++ * also used to delay the free of multiple probes array until a quiescent state
++ * is reached.
++ * marker entries modifications are protected by the markers_mutex.
++ */
++struct marker_entry {
++ struct hlist_node hlist;
++ char *format;
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++ int refcount; /* Number of times armed. 0 if disarmed. */
++ struct rcu_head rcu;
++ void *oldptr;
++ char rcu_pending:1;
++ char ptype:1;
++ char name[0]; /* Contains name'\0'format'\0' */
++};
++
++static struct hlist_head marker_table[MARKER_TABLE_SIZE];
++
++/**
++ * __mark_empty_function - Empty probe callback
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @...: variable argument list
++ *
++ * Empty callback provided as a probe to the markers. By providing this to a
++ * disabled marker, we make sure the execution flow is always valid even
++ * though the function pointer change and the marker enabling are two distinct
++ * operations that modifies the execution flow of preemptible code.
++ */
++void __mark_empty_function(void *probe_private, void *call_private,
++ const char *fmt, va_list *args)
++{
++}
++EXPORT_SYMBOL_GPL(__mark_empty_function);
++
++/*
++ * marker_probe_cb Callback that prepares the variable argument list for probes.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
++ * need to put a full smp_rmb() in this branch. This is why we do not use
++ * rcu_dereference() for the pointer read.
++ */
++void marker_probe_cb(const struct marker *mdata, void *call_private,
++ const char *fmt, ...)
++{
++ va_list args;
++ char ptype;
++
++ /*
++ * disabling preemption to make sure the teardown of the callbacks can
++ * be done correctly when they are in modules and they insure RCU read
++ * coherency.
++ */
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ va_start(args, fmt);
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ va_end(args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++) {
++ va_start(args, fmt);
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ va_end(args);
++ }
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb);
++
++/*
++ * marker_probe_cb Callback that does not prepare the variable argument list.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Should be connected to markers "MARK_NOARGS".
++ */
++void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...)
++{
++ va_list args; /* not initialized */
++ char ptype;
++
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++)
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
++
++static void free_old_closure(struct rcu_head *head)
++{
++ struct marker_entry *entry = container_of(head,
++ struct marker_entry, rcu);
++ kfree(entry->oldptr);
++ /* Make sure we free the data before setting the pending flag to 0 */
++ smp_wmb();
++ entry->rcu_pending = 0;
++}
++
++static void debug_print_probes(struct marker_entry *entry)
++{
++ int i;
++
++ if (!marker_debug)
++ return;
++
++ if (!entry->ptype) {
++ printk(KERN_DEBUG "Single probe : %p %p\n",
++ entry->single.func,
++ entry->single.probe_private);
++ } else {
++ for (i = 0; entry->multi[i].func; i++)
++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
++ entry->multi[i].func,
++ entry->multi[i].probe_private);
++ }
++}
++
++static struct marker_probe_closure *
++marker_entry_add_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0;
++ struct marker_probe_closure *old, *new;
++
++ WARN_ON(!probe);
++
++ debug_print_probes(entry);
++ old = entry->multi;
++ if (!entry->ptype) {
++ if (entry->single.func == probe &&
++ entry->single.probe_private == probe_private)
++ return ERR_PTR(-EBUSY);
++ if (entry->single.func == __mark_empty_function) {
++ /* 0 -> 1 probes */
++ entry->single.func = probe;
++ entry->single.probe_private = probe_private;
++ entry->refcount = 1;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* 1 -> 2 probes */
++ nr_probes = 1;
++ old = NULL;
++ }
++ } else {
++ /* (N -> N+1), (N != 0, 1) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++)
++ if (old[nr_probes].func == probe
++ && old[nr_probes].probe_private
++ == probe_private)
++ return ERR_PTR(-EBUSY);
++ }
++ /* + 2 : one for new probe, one for NULL func */
++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
++ GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ if (!old)
++ new[0] = entry->single;
++ else
++ memcpy(new, old,
++ nr_probes * sizeof(struct marker_probe_closure));
++ new[nr_probes].func = probe;
++ new[nr_probes].probe_private = probe_private;
++ entry->refcount = nr_probes + 1;
++ entry->multi = new;
++ entry->ptype = 1;
++ debug_print_probes(entry);
++ return old;
++}
++
++static struct marker_probe_closure *
++marker_entry_remove_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0, nr_del = 0, i;
++ struct marker_probe_closure *old, *new;
++
++ old = entry->multi;
++
++ debug_print_probes(entry);
++ if (!entry->ptype) {
++ /* 0 -> N is an error */
++ WARN_ON(entry->single.func == __mark_empty_function);
++ /* 1 -> 0 probes */
++ WARN_ON(probe && entry->single.func != probe);
++ WARN_ON(entry->single.probe_private != probe_private);
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* (N -> M), (N > 1, M >= 0) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
++ if ((!probe || old[nr_probes].func == probe)
++ && old[nr_probes].probe_private
++ == probe_private)
++ nr_del++;
++ }
++ }
++
++ if (nr_probes - nr_del == 0) {
++ /* N -> 0, (N > 1) */
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ } else if (nr_probes - nr_del == 1) {
++ /* N -> 1, (N > 1) */
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ entry->single = old[i];
++ entry->refcount = 1;
++ entry->ptype = 0;
++ } else {
++ int j = 0;
++ /* N -> M, (N > 1, M > 1) */
++ /* + 1 for NULL */
++ new = kzalloc((nr_probes - nr_del + 1)
++ * sizeof(struct marker_probe_closure), GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ new[j++] = old[i];
++ entry->refcount = nr_probes - nr_del;
++ entry->ptype = 1;
++ entry->multi = new;
++ }
++ debug_print_probes(entry);
++ return old;
++}
++
++/*
++ * Get marker if the marker is present in the marker hash table.
++ * Must be called with markers_mutex held.
++ * Returns NULL if not present.
++ */
++static struct marker_entry *get_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ u32 hash = jhash(name, strlen(name), 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name))
++ return e;
++ }
++ return NULL;
++}
++
++/*
++ * Add the marker to the marker hash table. Must be called with markers_mutex
++ * held.
++ */
++static struct marker_entry *add_marker(const char *name, const char *format)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ size_t format_len = 0;
++ u32 hash = jhash(name, name_len-1, 0);
++
++ if (format)
++ format_len = strlen(format) + 1;
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ printk(KERN_NOTICE
++ "Marker %s busy\n", name);
++ return ERR_PTR(-EBUSY); /* Already there */
++ }
++ }
++ /*
++ * Using kmalloc here to allocate a variable length element. Could
++ * cause some memory fragmentation if overused.
++ */
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return ERR_PTR(-ENOMEM);
++ memcpy(&e->name[0], name, name_len);
++ if (format) {
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ } else {
++ e->format = NULL;
++ e->call = marker_probe_cb;
++ }
++ e->single.func = __mark_empty_function;
++ e->single.probe_private = NULL;
++ e->multi = NULL;
++ e->ptype = 0;
++ e->refcount = 0;
++ e->rcu_pending = 0;
++ hlist_add_head(&e->hlist, head);
++ return e;
++}
++
++/*
++ * Remove the marker from the marker hash table. Must be called with mutex_lock
++ * held.
++ */
++static int remove_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ int found = 0;
++ size_t len = strlen(name) + 1;
++ u32 hash = jhash(name, len-1, 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ found = 1;
++ break;
++ }
++ }
++ if (!found)
++ return -ENOENT;
++ if (e->single.func != __mark_empty_function)
++ return -EBUSY;
++ hlist_del(&e->hlist);
++ /* Make sure the call_rcu has been executed */
++ if (e->rcu_pending)
++ rcu_barrier();
++ kfree(e);
++ return 0;
++}
++
++/*
++ * Set the mark_entry format to the format found in the element.
++ */
++static int marker_set_format(struct marker_entry **entry, const char *format)
++{
++ struct marker_entry *e;
++ size_t name_len = strlen((*entry)->name) + 1;
++ size_t format_len = strlen(format) + 1;
++
++
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return -ENOMEM;
++ memcpy(&e->name[0], (*entry)->name, name_len);
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ e->single = (*entry)->single;
++ e->multi = (*entry)->multi;
++ e->ptype = (*entry)->ptype;
++ e->refcount = (*entry)->refcount;
++ e->rcu_pending = 0;
++ hlist_add_before(&e->hlist, &(*entry)->hlist);
++ hlist_del(&(*entry)->hlist);
++ /* Make sure the call_rcu has been executed */
++ if ((*entry)->rcu_pending)
++ rcu_barrier();
++ kfree(*entry);
++ *entry = e;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ return 0;
++}
++
++/*
++ * Sets the probe callback corresponding to one marker.
++ */
++static int set_marker(struct marker_entry **entry, struct marker *elem,
++ int active)
++{
++ int ret;
++ WARN_ON(strcmp((*entry)->name, elem->name) != 0);
++
++ if ((*entry)->format) {
++ if (strcmp((*entry)->format, elem->format) != 0) {
++ printk(KERN_NOTICE
++ "Format mismatch for probe %s "
++ "(%s), marker (%s)\n",
++ (*entry)->name,
++ (*entry)->format,
++ elem->format);
++ return -EPERM;
++ }
++ } else {
++ ret = marker_set_format(entry, elem->format);
++ if (ret)
++ return ret;
++ }
++
++ /*
++ * probe_cb setup (statically known) is done here. It is
++ * asynchronous with the rest of execution, therefore we only
++ * pass from a "safe" callback (with argument) to an "unsafe"
++ * callback (does not set arguments).
++ */
++ elem->call = (*entry)->call;
++ /*
++ * Sanity check :
++ * We only update the single probe private data when the ptr is
++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
++ */
++ WARN_ON(elem->single.func != __mark_empty_function
++ && elem->single.probe_private
++ != (*entry)->single.probe_private &&
++ !elem->ptype);
++ elem->single.probe_private = (*entry)->single.probe_private;
++ /*
++ * Make sure the private data is valid when we update the
++ * single probe ptr.
++ */
++ smp_wmb();
++ elem->single.func = (*entry)->single.func;
++ /*
++ * We also make sure that the new probe callbacks array is consistent
++ * before setting a pointer to it.
++ */
++ rcu_assign_pointer(elem->multi, (*entry)->multi);
++ /*
++ * Update the function or multi probe array pointer before setting the
++ * ptype.
++ */
++ smp_wmb();
++ elem->ptype = (*entry)->ptype;
++ elem->state = active;
++
++ return 0;
++}
++
++/*
++ * Disable a marker and its probe callback.
++ * Note: only after a synchronize_sched() issued after setting elem->call to the
++ * empty function insures that the original callback is not used anymore. This
++ * insured by preemption disabling around the call site.
++ */
++static void disable_marker(struct marker *elem)
++{
++ /* leave "call" as is. It is known statically. */
++ elem->state = 0;
++ elem->single.func = __mark_empty_function;
++ /* Update the function before setting the ptype */
++ smp_wmb();
++ elem->ptype = 0; /* single probe */
++ /*
++ * Leave the private data and id there, because removal is racy and
++ * should be done only after a synchronize_sched(). These are never used
++ * until the next initialization anyway.
++ */
++}
++
++/**
++ * marker_update_probe_range - Update a probe range
++ * @begin: beginning of the range
++ * @end: end of the range
++ *
++ * Updates the probe callback corresponding to a range of markers.
++ */
++void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{
++ struct marker *iter;
++ struct marker_entry *mark_entry;
++
++ mutex_lock(&markers_mutex);
++ for (iter = begin; iter < end; iter++) {
++ mark_entry = get_marker(iter->name);
++ if (mark_entry) {
++ set_marker(&mark_entry, iter,
++ !!mark_entry->refcount);
++ /*
++ * ignore error, continue
++ */
++ } else {
++ disable_marker(iter);
++ }
++ }
++ mutex_unlock(&markers_mutex);
++}
++
++/*
++ * Update probes, removing the faulty probes.
++ * Issues a synchronize_sched() when no reference to the module passed
++ * as parameter is found in the probes so the probe module can be
++ * safely unloaded from now on.
++ *
++ * Internal callback only changed before the first probe is connected to it.
++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
++ * transitions. All other transitions will leave the old private data valid.
++ * This makes the non-atomicity of the callback/private data updates valid.
++ *
++ * "special case" updates :
++ * 0 -> 1 callback
++ * 1 -> 0 callback
++ * 1 -> 2 callbacks
++ * 2 -> 1 callbacks
++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
++ * Site effect : marker_set_format may delete the marker entry (creating a
++ * replacement).
++ */
++static void marker_update_probes(void)
++{
++ /* Core kernel markers */
++ marker_update_probe_range(__start___markers, __stop___markers);
++ /* Markers in modules. */
++ module_update_markers();
++}
++
++/**
++ * marker_probe_register - Connect a probe to a marker
++ * @name: marker name
++ * @format: format string
++ * @probe: probe handler
++ * @probe_private: probe private data
++ *
++ * private data must be a valid allocated memory address, or NULL.
++ * Returns 0 if ok, error value on error.
++ * The probe address must at least be aligned on the architecture pointer size.
++ */
++int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ entry = add_marker(name, format);
++ if (IS_ERR(entry)) {
++ ret = PTR_ERR(entry);
++ goto end;
++ }
++ }
++ /*
++ * If we detect that a call_rcu is pending for this marker,
++ * make sure it's executed now.
++ */
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_add_probe(entry, probe, probe_private);
++ if (IS_ERR(old)) {
++ ret = PTR_ERR(old);
++ goto end;
++ }
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_register);
++
++/**
++ * marker_probe_unregister - Disconnect a probe from a marker
++ * @name: marker name
++ * @probe: probe function pointer
++ * @probe_private: probe private data
++ *
++ * Returns the private data given to marker_probe_register, or an ERR_PTR().
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ struct marker_probe_closure *old;
++ int ret = 0;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, probe, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister);
++
++static struct marker_entry *
++get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ unsigned int i;
++ struct hlist_head *head;
++ struct hlist_node *node;
++
++ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
++ head = &marker_table[i];
++ hlist_for_each_entry(entry, node, head, hlist) {
++ if (!entry->ptype) {
++ if (entry->single.func == probe
++ && entry->single.probe_private
++ == probe_private)
++ return entry;
++ } else {
++ struct marker_probe_closure *closure;
++ closure = entry->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func == probe &&
++ closure[i].probe_private
++ == probe_private)
++ return entry;
++ }
++ }
++ }
++ }
++ return NULL;
++}
++
++/**
++ * marker_probe_unregister_private_data - Disconnect a probe from a marker
++ * @probe: probe function
++ * @probe_private: probe private data
++ *
++ * Unregister a probe by providing the registered private data.
++ * Only removes the first marker found in hash table.
++ * Return 0 on success or error value.
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, NULL, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(entry->name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
++
++/**
++ * marker_get_private_data - Get a marker's probe private data
++ * @name: marker name
++ * @probe: probe to match
++ * @num: get the nth matching probe's private data
++ *
++ * Returns the nth private data pointer (starting from 0) matching, or an
++ * ERR_PTR.
++ * Returns the private data pointer, or an ERR_PTR.
++ * The private data pointer should _only_ be dereferenced if the caller is the
++ * owner of the data, or its content could vanish. This is mostly used to
++ * confirm that a caller is the owner of a registered probe.
++ */
++void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ u32 hash = jhash(name, name_len-1, 0);
++ int i;
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ if (!e->ptype) {
++ if (num == 0 && e->single.func == probe)
++ return e->single.probe_private;
++ else
++ break;
++ } else {
++ struct marker_probe_closure *closure;
++ int match = 0;
++ closure = e->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func != probe)
++ continue;
++ if (match++ == num)
++ return closure[i].probe_private;
++ }
++ }
++ }
++ }
++ return ERR_PTR(-ENOENT);
++}
++EXPORT_SYMBOL_GPL(marker_get_private_data);
+diff --git a/kernel/module.c b/kernel/module.c
+index 7f0ccd8..1cd4c54 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -1505,6 +1505,8 @@ static struct module *load_module(void __user *umod,
+ struct exception_table_entry *extable;
+ mm_segment_t old_fs;
+ int gpgsig_ok;
++ unsigned int markersindex;
++ unsigned int markersstringsindex;
+
+ DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
+ umod, len, uargs);
+@@ -1735,6 +1737,9 @@ static struct module *load_module(void __user *umod,
+ tainted |= TAINT_FORCED_MODULE;
+ }
+ #endif
++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
++ markersstringsindex = find_sec(hdr, sechdrs, secstrings,
++ "__markers_strings");
+
+ /* Now do relocations. */
+ for (i = 1; i < hdr->e_shnum; i++) {
+@@ -1758,6 +1763,12 @@ static struct module *load_module(void __user *umod,
+ goto cleanup;
+ }
+
++#ifdef CONFIG_MARKERS
++ mod->markers = (void *)sechdrs[markersindex].sh_addr;
++ mod->num_markers =
++ sechdrs[markersindex].sh_size / sizeof(*mod->markers);
++#endif
++
+ /* Set up and sort exception table */
+ mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable);
+ mod->extable = extable = (void *)sechdrs[exindex].sh_addr;
+@@ -1769,6 +1780,12 @@ static struct module *load_module(void __user *umod,
+
+ add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+
++#ifdef CONFIG_MARKERS
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++#endif
++
+ err = module_finalize(hdr, sechdrs, mod);
+ if (err < 0)
+ goto cleanup;
+@@ -2186,6 +2203,20 @@ void struct_module(struct module *mod) { return; }
+ EXPORT_SYMBOL(struct_module);
+ #endif
+
++#ifdef CONFIG_MARKERS
++void module_update_markers(void)
++{
++ struct module *mod;
++
++ down(&module_mutex);
++ list_for_each_entry(mod, &modules, list)
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++ up(&module_mutex);
++}
++#endif
++
+ static int __init modules_init(void)
+ {
+ return subsystem_register(&module_subsys);
+diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
+index 1b16bfc..20ade31 100644
+--- a/kernel/rcupdate.c
++++ b/kernel/rcupdate.c
+@@ -46,6 +46,7 @@
+ #include <linux/notifier.h>
+ #include <linux/rcupdate.h>
+ #include <linux/cpu.h>
++#include <linux/mutex.h>
+
+ /* Definition for rcupdate control block. */
+ struct rcu_ctrlblk rcu_ctrlblk =
+@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head,
+ local_irq_restore(flags);
+ }
+
++static atomic_t rcu_barrier_cpu_count;
++static DEFINE_MUTEX(rcu_barrier_mutex);
++static struct completion rcu_barrier_completion;
++
+ /**
+ * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self,
+ return NOTIFY_OK;
+ }
+
++static void rcu_barrier_callback(struct rcu_head *notused)
++{
++ if (atomic_dec_and_test(&rcu_barrier_cpu_count))
++ complete(&rcu_barrier_completion);
++}
++
++/*
++ * Called with preemption disabled, and from cross-cpu IRQ context.
++ */
++static void rcu_barrier_func(void *notused)
++{
++ int cpu = smp_processor_id();
++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
++ struct rcu_head *head;
++
++ head = &rdp->barrier;
++ atomic_inc(&rcu_barrier_cpu_count);
++ call_rcu(head, rcu_barrier_callback);
++}
++
++/**
++ * rcu_barrier - Wait until all the in-flight RCUs are complete.
++ */
++void rcu_barrier(void)
++{
++ BUG_ON(in_interrupt());
++ /* Take cpucontrol mutex to protect against CPU hotplug */
++ mutex_lock(&rcu_barrier_mutex);
++ init_completion(&rcu_barrier_completion);
++ atomic_set(&rcu_barrier_cpu_count, 0);
++ on_each_cpu(rcu_barrier_func, NULL, 0, 1);
++ wait_for_completion(&rcu_barrier_completion);
++ mutex_unlock(&rcu_barrier_mutex);
++}
++EXPORT_SYMBOL_GPL(rcu_barrier);
++
++
++
+ static struct notifier_block __devinitdata rcu_nb = {
+ .notifier_call = rcu_cpu_notify,
+ };
+diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
+index b3d31b5..b100a32 100644
+--- a/scripts/Makefile.modpost
++++ b/scripts/Makefile.modpost
+@@ -13,6 +13,7 @@
+ # 2) modpost is then used to
+ # 3) create one <module>.mod.c file pr. module
+ # 4) create one Module.symvers file with CRC for all exported symbols
++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
+ # 5) compile all <module>.mod.c files
+ # 6) final link of the module to a <module.ko> file
+
+@@ -40,6 +41,11 @@ include scripts/Makefile.lib
+
+ symverfile := $(objtree)/Module.symvers
+
++kernelmarkersfile := $(objtree)/Module.markers
++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
++
++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
++
+ # Step 1), find all modules listed in $(MODVERDIR)/
+ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
+ modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o)))
+@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST
+ cmd_modpost = scripts/mod/modpost \
+ $(if $(CONFIG_MODVERSIONS),-m) \
+ $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \
++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \
+ $(filter-out FORCE,$^)
+
+ .PHONY: __modpost
+ __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE
+ $(call cmd,modpost)
+
++quiet_cmd_kernel-mod = MODPOST $@
++ cmd_kernel-mod = $(cmd_modpost) $@
++
++vmlinux.o: FORCE
++ $(call cmd,kernel-mod)
++
+ # Declare generated files as targets for modpost
+ $(symverfile): __modpost ;
+ $(modules:.ko=.mod.c): __modpost ;
+
++ifdef CONFIG_MARKERS
++$(markersfile): __modpost ;
++endif
+
+ # Step 5), compile all *.mod.c files
+
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index 2a174e5..c25948c 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -10,7 +10,8 @@
+ *
+ * Usage: modpost vmlinux module1.o module2.o ...
+ */
+-
++#define _GNU_SOURCE
++#include <stdio.h>
+ #include <ctype.h>
+ #include "modpost.h"
+
+@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename)
+ if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) {
+ info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
+ info->modinfo_len = sechdrs[i].sh_size;
+- }
++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0)
++ info->markers_strings_sec = i;
++
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+
+@@ -397,6 +400,63 @@ is_vmlinux(const char *modname)
+ return strcmp(myname, "vmlinux") == 0;
+ }
+
++static void get_markers(struct elf_info *info, struct module *mod)
++{
++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
++ const char *strings = (const char *) info->hdr + sh->sh_offset;
++ const Elf_Sym *sym, *first_sym, *last_sym;
++ size_t n;
++
++
++ if (!info->markers_strings_sec)
++ return;
++
++ /*
++ * First count the strings. We look for all the symbols defined
++ * in the __markers_strings section named __mstrtab_*. For
++ * these local names, the compiler puts a random .NNN suffix on,
++ * so the names don't correspond exactly.
++ */
++ first_sym = last_sym = NULL;
++ n = 0;
++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ if (first_sym == NULL)
++ first_sym = sym;
++ last_sym = sym;
++ ++n;
++ }
++
++ if (n == 0)
++ return;
++ /*
++ * Now collect each name and format into a line for the output.
++ * Lines look like:
++ * marker_name vmlinux marker %s format %d
++ * The format string after the second \t can use whitespace.
++ */
++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
++ mod->nmarkers = n;
++
++ n = 0;
++ for (sym = first_sym; sym <= last_sym; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ const char *name = strings + sym->st_value;
++ const char *fmt = strchr(name, '\0') + 1;
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++ mod->markers[n++] = line;
++ }
++}
++
++
+ void
+ read_symbols(char *modname)
+ {
+@@ -426,6 +486,7 @@ read_symbols(char *modname)
+ }
+ maybe_frob_version(modname, info.modinfo, info.modinfo_len,
+ (void *)info.modinfo - (void *)info.hdr);
++ get_markers(&info, mod);
+ parse_elf_finish(&info);
+
+ /* Our trick to get versioning for struct_module - it's
+@@ -682,6 +743,92 @@ write_dump(const char *fname)
+ write_if_changed(&buf, fname);
+ }
+
++static void add_marker(struct module *mod, const char *name, const char *fmt)
++{
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++
++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
++ sizeof mod->markers[0])));
++ mod->markers[mod->nmarkers++] = line;
++}
++
++static void read_markers(const char *fname)
++{
++ unsigned long size, pos = 0;
++ void *file = grab_file(fname, &size);
++ char *line;
++
++ if (!file) /* No old markers, silently ignore */
++ return;
++
++ while ((line = get_next_line(&pos, file, size))) {
++ char *marker, *modname, *fmt;
++ struct module *mod;
++
++ marker = line;
++ modname = strchr(marker, '\t');
++ if (!modname)
++ goto fail;
++ *modname++ = '\0';
++ fmt = strchr(modname, '\t');
++ if (!fmt)
++ goto fail;
++ *fmt++ = '\0';
++ if (*marker == '\0' || *modname == '\0')
++ goto fail;
++
++ mod = find_module(modname);
++ if (!mod) {
++ if (is_vmlinux(modname))
++ have_vmlinux = 1;
++ mod = new_module(NOFAIL(strdup(modname)));
++ mod->skip = 1;
++ }
++
++ add_marker(mod, marker, fmt);
++ }
++ return;
++fail:
++ fatal("parse error in markers list file\n");
++}
++
++static int compare_strings(const void *a, const void *b)
++{
++ return strcmp(*(const char **) a, *(const char **) b);
++}
++
++static void write_markers(const char *fname)
++{
++ struct buffer buf = { };
++ struct module *mod;
++ size_t i;
++
++ for (mod = modules; mod; mod = mod->next)
++ if (mod->markers != NULL) {
++ /*
++ * Sort the strings so we can skip duplicates when
++ * we write them out.
++ */
++ qsort(mod->markers, mod->nmarkers,
++ sizeof mod->markers[0], &compare_strings);
++ for (i = 0; i < mod->nmarkers; ++i) {
++ char *line = mod->markers[i];
++ buf_write(&buf, line, strlen(line));
++ while (i + 1 < mod->nmarkers &&
++ !strcmp(mod->markers[i],
++ mod->markers[i + 1]))
++ free(mod->markers[i++]);
++ free(mod->markers[i]);
++ }
++ free(mod->markers);
++ mod->markers = NULL;
++ }
++
++ write_if_changed(&buf, fname);
++}
++
+ int
+ main(int argc, char **argv)
+ {
+@@ -690,8 +837,10 @@ main(int argc, char **argv)
+ char fname[SZ];
+ char *dump_read = NULL, *dump_write = NULL;
+ int opt;
++ char *markers_read = NULL;
++ char *markers_write = NULL;
+
+- while ((opt = getopt(argc, argv, "i:mo:")) != -1) {
++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) {
+ switch(opt) {
+ case 'i':
+ dump_read = optarg;
+@@ -702,6 +851,12 @@ main(int argc, char **argv)
+ case 'o':
+ dump_write = optarg;
+ break;
++ case 'M':
++ markers_write = optarg;
++ break;
++ case 'K':
++ markers_read = optarg;
++ break;
+ default:
+ exit(1);
+ }
+@@ -732,6 +887,12 @@ main(int argc, char **argv)
+ if (dump_write)
+ write_dump(dump_write);
+
++ if (markers_read)
++ read_markers(markers_read);
++
++ if (markers_write)
++ write_markers(markers_write);
++
+ return 0;
+ }
+
+diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
+index 4871343..d79d7ea 100644
+--- a/scripts/mod/modpost.h
++++ b/scripts/mod/modpost.h
+@@ -18,6 +18,7 @@
+ #define Elf_Sym Elf32_Sym
+ #define ELF_ST_BIND ELF32_ST_BIND
+ #define ELF_ST_TYPE ELF32_ST_TYPE
++#define Elf_Section Elf32_Half
+
+ #else
+
+@@ -26,7 +27,7 @@
+ #define Elf_Sym Elf64_Sym
+ #define ELF_ST_BIND ELF64_ST_BIND
+ #define ELF_ST_TYPE ELF64_ST_TYPE
+-
++#define Elf_Section Elf64_Half
+ #endif
+
+ #if KERNEL_ELFDATA != HOST_ELFDATA
+@@ -77,6 +78,8 @@ struct module {
+ int has_init;
+ int has_cleanup;
+ struct buffer dev_table_buf;
++ char **markers;
++ size_t nmarkers;
+ };
+
+ struct elf_info {
+@@ -85,6 +88,7 @@ struct elf_info {
+ Elf_Shdr *sechdrs;
+ Elf_Sym *symtab_start;
+ Elf_Sym *symtab_stop;
++ Elf_Section markers_strings_sec;
+ const char *strtab;
+ char *modinfo;
+ unsigned int modinfo_len;
Added: trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch
===================================================================
--- trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch (rev 0)
+++ trunk/wjhuang/marker_patches/linux-kernel-markers-full-2.6.9.el4u6.patch 2008-07-04 02:18:23 UTC (rev 6)
@@ -0,0 +1,1917 @@
+diff --git a/Makefile b/Makefile
+index 06b517d..afa3d2c 100644
+--- a/Makefile
++++ b/Makefile
+@@ -583,7 +583,7 @@ quiet_cmd_vmlinux__ ?= LD $@
+ cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
+ -T $(vmlinux-lds) $(vmlinux-init) \
+ --start-group $(vmlinux-main) --end-group \
+- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^)
++ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^)
+
+ # Generate new vmlinux version
+ quiet_cmd_vmlinux_version = GEN .version
+@@ -688,10 +688,30 @@ $(KALLSYMS): scripts ;
+
+ endif # ifdef CONFIG_KALLSYMS
+
++# Do modpost on a prelinked vmlinux. The finally linked vmlinux has
++# relevant sections renamed as per the linker script.
++quiet_cmd_vmlinux-modpost = LD $@
++ cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \
++ $(vmlinux-init) --start-group $(vmlinux-main) --end-group \
++ $(filter-out $(vmlinux-init) $(vmlinux-main) $(vmlinux-lds) FORCE ,$^)
++define rule_vmlinux-modpost
++ :
++ +$(call cmd,vmlinux-modpost)
++ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@
++ $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd
++endef
++
++
+ # vmlinux image - including updated kernel symbols
+-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE
++vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE
++ $(call vmlinux-modpost)
+ $(call if_changed_rule,vmlinux__)
+
++# build vmlinux.o first to catch section mismatch errors early
++$(kallsyms.o): vmlinux.o
++vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE
++ $(call if_changed_rule,vmlinux-modpost)
++
+ # The actual objects are generated when descending,
+ # make sure no implicit rule kicks in
+ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
+diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
+index cf069b7..d39d5c5 100644
+--- a/arch/i386/Kconfig.debug
++++ b/arch/i386/Kconfig.debug
+@@ -29,6 +29,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
+index e8db99c..b846b21 100644
+--- a/arch/i386/kernel/vmlinux.lds.S
++++ b/arch/i386/kernel/vmlinux.lds.S
+@@ -53,6 +53,7 @@ SECTIONS
+ /* writeable */
+ .data : { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+
+diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug
+index 2782b48..17d4a44 100644
+--- a/arch/ia64/Kconfig.debug
++++ b/arch/ia64/Kconfig.debug
+@@ -12,6 +12,12 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
+
+ choice
+ prompt "Physical memory granularity"
+diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
+index a676e79..c6ea47e 100644
+--- a/arch/ia64/kernel/vmlinux.lds.S
++++ b/arch/ia64/kernel/vmlinux.lds.S
+@@ -193,7 +193,7 @@ SECTIONS
+
+ data : { } :data
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
+- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
++ { *(.data) *(.data1) *(.gnu.linkonce.d*) MARKER CONSTRUCTORS }
+
+ . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
+diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug
+index 860472a..5c3a9f5 100644
+--- a/arch/ppc64/Kconfig.debug
++++ b/arch/ppc64/Kconfig.debug
+@@ -16,6 +16,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S
+index 4103cc1..76d0bf0 100644
+--- a/arch/ppc64/kernel/vmlinux.lds.S
++++ b/arch/ppc64/kernel/vmlinux.lds.S
+@@ -118,6 +118,7 @@ SECTIONS
+ .data : {
+ *(.data .data.rel* .toc1)
+ *(.branch_lt)
++ MARKER
+ }
+
+ .opd : {
+diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
+index cd8d39f..f39bf8d 100644
+--- a/arch/sparc64/Kconfig.debug
++++ b/arch/sparc64/Kconfig.debug
+@@ -21,6 +21,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config DEBUG_DCFLUSH
+ bool "D-cache flush debugging"
+ depends on DEBUG_KERNEL
+diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
+index a710d38..d4b3b62 100644
+--- a/arch/sparc64/kernel/vmlinux.lds.S
++++ b/arch/sparc64/kernel/vmlinux.lds.S
+@@ -27,6 +27,7 @@ SECTIONS
+ .data :
+ {
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+ .data1 : { *(.data1) }
+diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
+index cb33186..d0260cb 100644
+--- a/arch/x86_64/Kconfig.debug
++++ b/arch/x86_64/Kconfig.debug
+@@ -55,6 +55,13 @@ config KPROBES
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
++config MARKERS
++ bool "Activate markers"
++ default y
++ help
++ Place an empty function call at each marker site. Can be
++ dynamically changed for a probe function.
++
+ config IOMMU_LEAK
+ bool "IOMMU leak tracing"
+ depends on DEBUG_KERNEL
+diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
+index 053c826..3b3928f 100644
+--- a/arch/x86_64/kernel/vmlinux.lds.S
++++ b/arch/x86_64/kernel/vmlinux.lds.S
+@@ -33,6 +33,7 @@ SECTIONS
+
+ .data : { /* Data */
+ *(.data)
++ MARKER
+ CONSTRUCTORS
+ }
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 7a4d28d..991a1ef 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -159,7 +159,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator);
+
+ // ================= main 802.3ad protocol functions ==================
+ static int ad_lacpdu_send(struct port *port);
+-static int ad_marker_send(struct port *port, struct marker *marker);
++static int ad_marker_send(struct port *port, struct bond_marker *marker);
+ static void ad_mux_machine(struct port *port);
+ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
+ static void ad_tx_machine(struct port *port);
+@@ -172,8 +172,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast);
+ static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
+ static void ad_enable_collecting_distributing(struct port *port);
+ static void ad_disable_collecting_distributing(struct port *port);
+-static void ad_marker_info_received(struct marker *marker_info, struct port *port);
+-static void ad_marker_response_received(struct marker *marker, struct port *port);
++static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
++static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
+
+
+ /////////////////////////////////////////////////////////////////////////////////
+@@ -954,12 +954,13 @@ static int ad_lacpdu_send(struct port *port)
+ * Returns: 0 on success
+ * < 0 on error
+ */
+-static int ad_marker_send(struct port *port, struct marker *marker)
++static int ad_marker_send(struct port *port, struct bond_marker *marker)
+ {
+ struct slave *slave = port->slave;
+ struct sk_buff *skb;
+- struct marker_header *marker_header;
+- int length = sizeof(struct marker_header);
++ struct bond_marker_header *marker_header;
++ int length = sizeof(struct bond_marker_header);
++
+ struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
+
+ skb = dev_alloc_skb(length + 16);
+@@ -974,7 +975,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
+ skb->nh.raw = skb->data + ETH_HLEN;
+ skb->protocol = PKT_TYPE_LACPDU;
+
+- marker_header = (struct marker_header *)skb_put(skb, length);
++ marker_header = (struct bond_marker_header *)skb_put(skb, length);
+
+ marker_header->ad_header.destination_address = lacpdu_multicast_address;
+ /* Note: source addres is set to be the member's PERMANENT address, because we use it
+@@ -1771,7 +1772,7 @@ static void ad_disable_collecting_distributing(struct port *port)
+ */
+ static void ad_marker_info_send(struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+ u16 index;
+
+ // fill the marker PDU with the appropriate values
+@@ -1804,13 +1805,14 @@ static void ad_marker_info_send(struct port *port)
+ * @port: the port we're looking at
+ *
+ */
+-static void ad_marker_info_received(struct marker *marker_info,struct port *port)
++static void ad_marker_info_received(struct bond_marker *marker_info,
++ struct port *port)
+ {
+- struct marker marker;
++ struct bond_marker marker;
+
+ // copy the received marker data to the response marker
+ //marker = *marker_info;
+- memcpy(&marker, marker_info, sizeof(struct marker));
++ memcpy(&marker, marker_info, sizeof(struct bond_marker));
+ // change the marker subtype to marker response
+ marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
+ // send the marker response
+@@ -1829,7 +1831,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port
+ * response for marker PDU's, in this stage, but only to respond to marker
+ * information.
+ */
+-static void ad_marker_response_received(struct marker *marker, struct port *port)
++static void ad_marker_response_received(struct bond_marker *marker,
++ struct port *port)
+ {
+ marker=NULL; // just to satisfy the compiler
+ port=NULL; // just to satisfy the compiler
+@@ -2217,15 +2220,16 @@ void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 leng
+ case AD_TYPE_MARKER:
+ // No need to convert fields to Little Endian since we don't use the marker's fields.
+
+- switch (((struct marker *)lacpdu)->tlv_type) {
++ switch (((struct bond_marker *)lacpdu)->tlv_type) {
+ case AD_MARKER_INFORMATION_SUBTYPE:
+ dprintk("Received Marker Information on port %d\n", port->actor_port_number);
+- ad_marker_info_received((struct marker *)lacpdu, port);
++ ad_marker_info_received((struct bond_marker *)lacpdu, port);
++
+ break;
+
+ case AD_MARKER_RESPONSE_SUBTYPE:
+ dprintk("Received Marker Response on port %d\n", port->actor_port_number);
+- ad_marker_response_received((struct marker *)lacpdu, port);
++ ad_marker_response_received((struct bond_marker *)lacpdu, port);
+ break;
+
+ default:
+diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
+index 4c60b17..2bb477e 100644
+--- a/drivers/net/bonding/bond_3ad.h
++++ b/drivers/net/bonding/bond_3ad.h
+@@ -105,7 +105,8 @@ typedef enum {
+ typedef enum {
+ AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
+ AD_MARKER_RESPONSE_SUBTYPE // marker response subtype
+-} marker_subtype_t;
++} bond_marker_subtype_t;
++
+
+ // timers types(43.4.9 in the 802.3ad standard)
+ typedef enum {
+@@ -161,7 +162,7 @@ typedef struct lacpdu_header {
+ } lacpdu_header_t;
+
+ // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
+-typedef struct marker {
++typedef struct bond_marker {
+ u8 subtype; // = 0x02 (marker PDU)
+ u8 version_number; // = 0x01
+ u8 tlv_type; // = 0x01 (marker information)
+@@ -174,12 +175,12 @@ typedef struct marker {
+ u8 tlv_type_terminator; // = 0x00
+ u8 terminator_length; // = 0x00
+ u8 reserved_90[90]; // = 0
+-} marker_t;
++} bond_marker_t;
+
+-typedef struct marker_header {
++typedef struct bond_marker_header {
+ struct ad_header ad_header;
+- struct marker marker;
+-} marker_header_t;
++ struct bond_marker marker;
++} bond_marker_header_t;
+
+ #pragma pack()
+
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 165a02f..eaf230e 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -6,10 +6,18 @@
+ #define VMLINUX_SYMBOL(_sym_) _sym_
+ #endif
+
++/* Kernel markers : pointers */
++#define MARKER \
++ . = ALIGN(8); \
++ VMLINUX_SYMBOL(__start___markers) = .; \
++ *(__markers) \
++ VMLINUX_SYMBOL(__stop___markers) = .;
++
+ #define RODATA \
+ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
+ *(.rodata) *(.rodata.*) \
+ *(__vermagic) /* Kernel version magic */ \
++ *(__markers_strings) /* Markers: strings */ \
+ } \
+ \
+ .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
+diff --git a/include/linux/marker.h b/include/linux/marker.h
+new file mode 100644
+index 0000000..efbc82b
+--- /dev/null
++++ b/include/linux/marker.h
+@@ -0,0 +1,139 @@
++#ifndef _LINUX_MARKER_H
++#define _LINUX_MARKER_H
++
++/*
++ * Code markup for dynamic and static tracing.
++ *
++ * See Documentation/marker.txt.
++ *
++ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
++ *
++ * This file is released under the GPLv2.
++ * See the file COPYING for more details.
++ */
++
++#include <linux/types.h>
++
++struct module;
++struct marker;
++
++/**
++ * marker_probe_func - Type of a marker probe function
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @args: variable argument list pointer. Use a pointer to overcome C's
++ * inability to pass this around as a pointer in a portable manner in
++ * the callee otherwise.
++ *
++ * Type of marker probe functions. They receive the mdata and need to parse the
++ * format string to recover the variable argument list.
++ */
++typedef void marker_probe_func(void *probe_private, void *call_private,
++ const char *fmt, va_list *args);
++
++struct marker_probe_closure {
++ marker_probe_func *func; /* Callback */
++ void *probe_private; /* Private probe data */
++};
++
++struct marker {
++ const char *name; /* Marker name */
++ const char *format; /* Marker format string, describing the
++ * variable argument list.
++ */
++ char state; /* Marker state. */
++ char ptype; /* probe type : 0 : single, 1 : multi */
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++} __attribute__((aligned(8)));
++
++#ifdef CONFIG_MARKERS
++
++/*
++ * Note : the empty asm volatile with read constraint is used here instead of a
++ * "used" attribute to fix a gcc 4.1.x bug.
++ * Make sure the alignment of the structure in the __markers section will
++ * not add unwanted padding between the beginning of the section and the
++ * structure. Force alignment to the same alignment as the section start.
++ */
++#define __trace_mark(name, call_private, format, args...) \
++ do { \
++ static const char __mstrtab_##name[] \
++ __attribute__((section("__markers_strings"))) \
++ = #name "\0" format; \
++ static struct marker __mark_##name \
++ __attribute__((section("__markers"), aligned(8))) = \
++ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
++ 0, 0, marker_probe_cb, \
++ { __mark_empty_function, NULL}, NULL }; \
++ __mark_check_format(format, ## args); \
++ if (unlikely(__mark_##name.state)) { \
++ (*__mark_##name.call) \
++ (&__mark_##name, call_private, \
++ format, ## args); \
++ } \
++ } while (0)
++
++extern void marker_update_probe_range(struct marker *begin,
++ struct marker *end);
++#else /* !CONFIG_MARKERS */
++#define __trace_mark(name, call_private, format, args...) \
++ __mark_check_format(format, ## args)
++static inline void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{ }
++#endif /* CONFIG_MARKERS */
++
++/**
++ * trace_mark - Marker
++ * @name: marker name, not quoted.
++ * @format: format string
++ * @args...: variable argument list
++ *
++ * Places a marker.
++ */
++#define trace_mark(name, format, args...) \
++ __trace_mark(name, NULL, format, ## args)
++
++/**
++ * MARK_NOARGS - Format string for a marker with no argument.
++ */
++#define MARK_NOARGS " "
++
++/* To be used for string format validity checking with gcc */
++static inline void __attribute__ ((format (printf, 1, 2))) __mark_check_format(const char *fmt, ...)
++{
++}
++
++extern marker_probe_func __mark_empty_function;
++
++extern void marker_probe_cb(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++extern void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...);
++
++/*
++ * Connect a probe to a marker.
++ * private data pointer must be a valid allocated memory address, or NULL.
++ */
++extern int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private);
++
++/*
++ * Returns the private data given to marker_probe_register.
++ */
++extern int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private);
++/*
++ * Unregister a marker by providing the registered private data.
++ */
++extern int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private);
++
++extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num);
++
++#endif
+diff --git a/include/linux/module.h b/include/linux/module.h
+index 8da8948..2ad5efd 100644
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -18,6 +18,7 @@
+ #include <linux/stringify.h>
+ #include <linux/kobject.h>
+ #include <linux/moduleparam.h>
++#include <linux/marker.h>
+ #include <asm/local.h>
+
+ #include <asm/module.h>
+@@ -328,6 +329,10 @@ struct module
+ /* The command line arguments (may be mangled). People like
+ keeping pointers to this stuff */
+ char *args;
++#ifdef CONFIG_MARKERS
++ struct marker *markers;
++ unsigned int num_markers;
++#endif
+ };
+
+ /* FIXME: It'd be nice to isolate modules during init, too, so they
+@@ -448,6 +453,7 @@ int register_module_notifier(struct notifier_block * nb);
+ int unregister_module_notifier(struct notifier_block * nb);
+
+ extern void print_modules(void);
++extern void module_update_markers(void);
+ #else /* !CONFIG_MODULES... */
+ #define EXPORT_SYMBOL(sym)
+ #define EXPORT_SYMBOL_GPL(sym)
+@@ -537,6 +543,10 @@ static inline int unregister_module_notifier(struct notifier_block * nb)
+ static inline void print_modules(void)
+ {
+ }
++
++static inline void module_update_markers(void)
++{
++}
+ #endif /* CONFIG_MODULES */
+
+ #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x)
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index 91057d6..bcd0acb 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -42,6 +42,19 @@
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+
++/*
++ * Prevent the compiler from merging or refetching accesses. The compiler
++ * is also forbidden from reordering successive instances of ACCESS_ONCE(),
++ * but only when the compiler is aware of some particular ordering. One way
++ * to make the compiler aware of ordering is to put the two invocations of
++ * ACCESS_ONCE() in different C statements.
++ *
++ * This macro does absolutely -nothing- to prevent the CPU from reordering,
++ * merging, or refetching absolutely anything at any time.
++ *
++ */
++#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
++
+ /**
+ * struct rcu_head - callback structure for use with RCU
+ * @next: next update requests in a list
+@@ -102,6 +115,7 @@ struct rcu_data {
+ struct rcu_head *donelist;
+ struct rcu_head **donetail;
+ int cpu;
++ struct rcu_head barrier;
+ };
+
+ DECLARE_PER_CPU(struct rcu_data, rcu_data);
+@@ -266,6 +280,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head,
+ extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
+ void (*func)(struct rcu_head *head)));
+ extern void synchronize_kernel(void);
+-
++extern void rcu_barrier(void);
+ #endif /* __KERNEL__ */
+ #endif /* __LINUX_RCUPDATE_H */
+diff --git a/kernel/Makefile b/kernel/Makefile
+index 0b8c8ca..f8248bc 100644
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o
+ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+ obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o
+ obj-$(CONFIG_KPROBES) += kprobes.o
++obj-$(CONFIG_MARKERS) += marker.o
+
+ ifneq ($(CONFIG_IA64),y)
+ # According to Alan Modra <alan at linuxcare.com.au>, the -fno-omit-frame-pointer is
+diff --git a/kernel/marker.c b/kernel/marker.c
+new file mode 100644
+index 0000000..c4c2cd8
+--- /dev/null
++++ b/kernel/marker.c
+@@ -0,0 +1,851 @@
++/*
++ * Copyright (C) 2007 Mathieu Desnoyers
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/types.h>
++#include <linux/jhash.h>
++#include <linux/list.h>
++#include <linux/rcupdate.h>
++#include <linux/marker.h>
++#include <linux/err.h>
++
++extern struct marker __start___markers[];
++extern struct marker __stop___markers[];
++
++/* Set to 1 to enable marker debug output */
++const int marker_debug;
++
++/*
++ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
++ * and module markers and the hash table.
++ */
++static DEFINE_MUTEX(markers_mutex);
++
++/*
++ * Marker hash table, containing the active markers.
++ * Protected by module_mutex.
++ */
++#define MARKER_HASH_BITS 6
++#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
++
++/*
++ * Note about RCU :
++ * It is used to make sure every handler has finished using its private data
++ * between two consecutive operation (add or remove) on a given marker. It is
++ * also used to delay the free of multiple probes array until a quiescent state
++ * is reached.
++ * marker entries modifications are protected by the markers_mutex.
++ */
++struct marker_entry {
++ struct hlist_node hlist;
++ char *format;
++ void (*call)(const struct marker *mdata, /* Probe wrapper */
++ void *call_private, const char *fmt, ...);
++ struct marker_probe_closure single;
++ struct marker_probe_closure *multi;
++ int refcount; /* Number of times armed. 0 if disarmed. */
++ struct rcu_head rcu;
++ void *oldptr;
++ char rcu_pending:1;
++ char ptype:1;
++ char name[0]; /* Contains name'\0'format'\0' */
++};
++
++static struct hlist_head marker_table[MARKER_TABLE_SIZE];
++
++/**
++ * __mark_empty_function - Empty probe callback
++ * @probe_private: probe private data
++ * @call_private: call site private data
++ * @fmt: format string
++ * @...: variable argument list
++ *
++ * Empty callback provided as a probe to the markers. By providing this to a
++ * disabled marker, we make sure the execution flow is always valid even
++ * though the function pointer change and the marker enabling are two distinct
++ * operations that modifies the execution flow of preemptible code.
++ */
++void __mark_empty_function(void *probe_private, void *call_private,
++ const char *fmt, va_list *args)
++{
++}
++EXPORT_SYMBOL_GPL(__mark_empty_function);
++
++/*
++ * marker_probe_cb Callback that prepares the variable argument list for probes.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
++ * need to put a full smp_rmb() in this branch. This is why we do not use
++ * rcu_dereference() for the pointer read.
++ */
++void marker_probe_cb(const struct marker *mdata, void *call_private,
++ const char *fmt, ...)
++{
++ va_list args;
++ char ptype;
++
++ /*
++ * disabling preemption to make sure the teardown of the callbacks can
++ * be done correctly when they are in modules and they insure RCU read
++ * coherency.
++ */
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ va_start(args, fmt);
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ va_end(args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++) {
++ va_start(args, fmt);
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ va_end(args);
++ }
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb);
++
++/*
++ * marker_probe_cb Callback that does not prepare the variable argument list.
++ * @mdata: pointer of type struct marker
++ * @call_private: caller site private data
++ * @fmt: format string
++ * @...: Variable argument list.
++ *
++ * Should be connected to markers "MARK_NOARGS".
++ */
++void marker_probe_cb_noarg(const struct marker *mdata,
++ void *call_private, const char *fmt, ...)
++{
++ va_list args; /* not initialized */
++ char ptype;
++
++ preempt_disable();
++ ptype = ACCESS_ONCE(mdata->ptype);
++ if (likely(!ptype)) {
++ marker_probe_func *func;
++ /* Must read the ptype before ptr. They are not data dependant,
++ * so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func = ACCESS_ONCE(mdata->single.func);
++ /* Must read the ptr before private data. They are not data
++ * dependant, so we put an explicit smp_rmb() here. */
++ smp_rmb();
++ func(mdata->single.probe_private, call_private, fmt, &args);
++ } else {
++ struct marker_probe_closure *multi;
++ int i;
++ /*
++ * multi points to an array, therefore accessing the array
++ * depends on reading multi. However, even in this case,
++ * we must insure that the pointer is read _before_ the array
++ * data. Same as rcu_dereference, but we need a full smp_rmb()
++ * in the fast path, so put the explicit barrier here.
++ */
++ smp_read_barrier_depends();
++ multi = ACCESS_ONCE(mdata->multi);
++ for (i = 0; multi[i].func; i++)
++ multi[i].func(multi[i].probe_private, call_private, fmt,
++ &args);
++ }
++ preempt_enable();
++}
++EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
++
++static void free_old_closure(struct rcu_head *head)
++{
++ struct marker_entry *entry = container_of(head,
++ struct marker_entry, rcu);
++ kfree(entry->oldptr);
++ /* Make sure we free the data before setting the pending flag to 0 */
++ smp_wmb();
++ entry->rcu_pending = 0;
++}
++
++static void debug_print_probes(struct marker_entry *entry)
++{
++ int i;
++
++ if (!marker_debug)
++ return;
++
++ if (!entry->ptype) {
++ printk(KERN_DEBUG "Single probe : %p %p\n",
++ entry->single.func,
++ entry->single.probe_private);
++ } else {
++ for (i = 0; entry->multi[i].func; i++)
++ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
++ entry->multi[i].func,
++ entry->multi[i].probe_private);
++ }
++}
++
++static struct marker_probe_closure *
++marker_entry_add_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0;
++ struct marker_probe_closure *old, *new;
++
++ WARN_ON(!probe);
++
++ debug_print_probes(entry);
++ old = entry->multi;
++ if (!entry->ptype) {
++ if (entry->single.func == probe &&
++ entry->single.probe_private == probe_private)
++ return ERR_PTR(-EBUSY);
++ if (entry->single.func == __mark_empty_function) {
++ /* 0 -> 1 probes */
++ entry->single.func = probe;
++ entry->single.probe_private = probe_private;
++ entry->refcount = 1;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* 1 -> 2 probes */
++ nr_probes = 1;
++ old = NULL;
++ }
++ } else {
++ /* (N -> N+1), (N != 0, 1) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++)
++ if (old[nr_probes].func == probe
++ && old[nr_probes].probe_private
++ == probe_private)
++ return ERR_PTR(-EBUSY);
++ }
++ /* + 2 : one for new probe, one for NULL func */
++ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
++ GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ if (!old)
++ new[0] = entry->single;
++ else
++ memcpy(new, old,
++ nr_probes * sizeof(struct marker_probe_closure));
++ new[nr_probes].func = probe;
++ new[nr_probes].probe_private = probe_private;
++ entry->refcount = nr_probes + 1;
++ entry->multi = new;
++ entry->ptype = 1;
++ debug_print_probes(entry);
++ return old;
++}
++
++static struct marker_probe_closure *
++marker_entry_remove_probe(struct marker_entry *entry,
++ marker_probe_func *probe, void *probe_private)
++{
++ int nr_probes = 0, nr_del = 0, i;
++ struct marker_probe_closure *old, *new;
++
++ old = entry->multi;
++
++ debug_print_probes(entry);
++ if (!entry->ptype) {
++ /* 0 -> N is an error */
++ WARN_ON(entry->single.func == __mark_empty_function);
++ /* 1 -> 0 probes */
++ WARN_ON(probe && entry->single.func != probe);
++ WARN_ON(entry->single.probe_private != probe_private);
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ debug_print_probes(entry);
++ return NULL;
++ } else {
++ /* (N -> M), (N > 1, M >= 0) probes */
++ for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
++ if ((!probe || old[nr_probes].func == probe)
++ && old[nr_probes].probe_private
++ == probe_private)
++ nr_del++;
++ }
++ }
++
++ if (nr_probes - nr_del == 0) {
++ /* N -> 0, (N > 1) */
++ entry->single.func = __mark_empty_function;
++ entry->refcount = 0;
++ entry->ptype = 0;
++ } else if (nr_probes - nr_del == 1) {
++ /* N -> 1, (N > 1) */
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ entry->single = old[i];
++ entry->refcount = 1;
++ entry->ptype = 0;
++ } else {
++ int j = 0;
++ /* N -> M, (N > 1, M > 1) */
++ /* + 1 for NULL */
++ new = kzalloc((nr_probes - nr_del + 1)
++ * sizeof(struct marker_probe_closure), GFP_KERNEL);
++ if (new == NULL)
++ return ERR_PTR(-ENOMEM);
++ for (i = 0; old[i].func; i++)
++ if ((probe && old[i].func != probe) ||
++ old[i].probe_private != probe_private)
++ new[j++] = old[i];
++ entry->refcount = nr_probes - nr_del;
++ entry->ptype = 1;
++ entry->multi = new;
++ }
++ debug_print_probes(entry);
++ return old;
++}
++
++/*
++ * Get marker if the marker is present in the marker hash table.
++ * Must be called with markers_mutex held.
++ * Returns NULL if not present.
++ */
++static struct marker_entry *get_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ u32 hash = jhash(name, strlen(name), 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name))
++ return e;
++ }
++ return NULL;
++}
++
++/*
++ * Add the marker to the marker hash table. Must be called with markers_mutex
++ * held.
++ */
++static struct marker_entry *add_marker(const char *name, const char *format)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ size_t format_len = 0;
++ u32 hash = jhash(name, name_len-1, 0);
++
++ if (format)
++ format_len = strlen(format) + 1;
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ printk(KERN_NOTICE
++ "Marker %s busy\n", name);
++ return ERR_PTR(-EBUSY); /* Already there */
++ }
++ }
++ /*
++ * Using kmalloc here to allocate a variable length element. Could
++ * cause some memory fragmentation if overused.
++ */
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return ERR_PTR(-ENOMEM);
++ memcpy(&e->name[0], name, name_len);
++ if (format) {
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ } else {
++ e->format = NULL;
++ e->call = marker_probe_cb;
++ }
++ e->single.func = __mark_empty_function;
++ e->single.probe_private = NULL;
++ e->multi = NULL;
++ e->ptype = 0;
++ e->refcount = 0;
++ e->rcu_pending = 0;
++ hlist_add_head(&e->hlist, head);
++ return e;
++}
++
++/*
++ * Remove the marker from the marker hash table. Must be called with mutex_lock
++ * held.
++ */
++static int remove_marker(const char *name)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ int found = 0;
++ size_t len = strlen(name) + 1;
++ u32 hash = jhash(name, len-1, 0);
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ found = 1;
++ break;
++ }
++ }
++ if (!found)
++ return -ENOENT;
++ if (e->single.func != __mark_empty_function)
++ return -EBUSY;
++ hlist_del(&e->hlist);
++ /* Make sure the call_rcu has been executed */
++ if (e->rcu_pending)
++ rcu_barrier();
++ kfree(e);
++ return 0;
++}
++
++/*
++ * Set the mark_entry format to the format found in the element.
++ */
++static int marker_set_format(struct marker_entry **entry, const char *format)
++{
++ struct marker_entry *e;
++ size_t name_len = strlen((*entry)->name) + 1;
++ size_t format_len = strlen(format) + 1;
++
++
++ e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
++ GFP_KERNEL);
++ if (!e)
++ return -ENOMEM;
++ memcpy(&e->name[0], (*entry)->name, name_len);
++ e->format = &e->name[name_len];
++ memcpy(e->format, format, format_len);
++ if (strcmp(e->format, MARK_NOARGS) == 0)
++ e->call = marker_probe_cb_noarg;
++ else
++ e->call = marker_probe_cb;
++ e->single = (*entry)->single;
++ e->multi = (*entry)->multi;
++ e->ptype = (*entry)->ptype;
++ e->refcount = (*entry)->refcount;
++ e->rcu_pending = 0;
++ hlist_add_before(&e->hlist, &(*entry)->hlist);
++ hlist_del(&(*entry)->hlist);
++ /* Make sure the call_rcu has been executed */
++ if ((*entry)->rcu_pending)
++ rcu_barrier();
++ kfree(*entry);
++ *entry = e;
++ trace_mark(core_marker_format, "name %s format %s",
++ e->name, e->format);
++ return 0;
++}
++
++/*
++ * Sets the probe callback corresponding to one marker.
++ */
++static int set_marker(struct marker_entry **entry, struct marker *elem,
++ int active)
++{
++ int ret;
++ WARN_ON(strcmp((*entry)->name, elem->name) != 0);
++
++ if ((*entry)->format) {
++ if (strcmp((*entry)->format, elem->format) != 0) {
++ printk(KERN_NOTICE
++ "Format mismatch for probe %s "
++ "(%s), marker (%s)\n",
++ (*entry)->name,
++ (*entry)->format,
++ elem->format);
++ return -EPERM;
++ }
++ } else {
++ ret = marker_set_format(entry, elem->format);
++ if (ret)
++ return ret;
++ }
++
++ /*
++ * probe_cb setup (statically known) is done here. It is
++ * asynchronous with the rest of execution, therefore we only
++ * pass from a "safe" callback (with argument) to an "unsafe"
++ * callback (does not set arguments).
++ */
++ elem->call = (*entry)->call;
++ /*
++ * Sanity check :
++ * We only update the single probe private data when the ptr is
++ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
++ */
++ WARN_ON(elem->single.func != __mark_empty_function
++ && elem->single.probe_private
++ != (*entry)->single.probe_private &&
++ !elem->ptype);
++ elem->single.probe_private = (*entry)->single.probe_private;
++ /*
++ * Make sure the private data is valid when we update the
++ * single probe ptr.
++ */
++ smp_wmb();
++ elem->single.func = (*entry)->single.func;
++ /*
++ * We also make sure that the new probe callbacks array is consistent
++ * before setting a pointer to it.
++ */
++ rcu_assign_pointer(elem->multi, (*entry)->multi);
++ /*
++ * Update the function or multi probe array pointer before setting the
++ * ptype.
++ */
++ smp_wmb();
++ elem->ptype = (*entry)->ptype;
++ elem->state = active;
++
++ return 0;
++}
++
++/*
++ * Disable a marker and its probe callback.
++ * Note: only after a synchronize_sched() issued after setting elem->call to the
++ * empty function insures that the original callback is not used anymore. This
++ * insured by preemption disabling around the call site.
++ */
++static void disable_marker(struct marker *elem)
++{
++ /* leave "call" as is. It is known statically. */
++ elem->state = 0;
++ elem->single.func = __mark_empty_function;
++ /* Update the function before setting the ptype */
++ smp_wmb();
++ elem->ptype = 0; /* single probe */
++ /*
++ * Leave the private data and id there, because removal is racy and
++ * should be done only after a synchronize_sched(). These are never used
++ * until the next initialization anyway.
++ */
++}
++
++/**
++ * marker_update_probe_range - Update a probe range
++ * @begin: beginning of the range
++ * @end: end of the range
++ *
++ * Updates the probe callback corresponding to a range of markers.
++ */
++void marker_update_probe_range(struct marker *begin,
++ struct marker *end)
++{
++ struct marker *iter;
++ struct marker_entry *mark_entry;
++
++ mutex_lock(&markers_mutex);
++ for (iter = begin; iter < end; iter++) {
++ mark_entry = get_marker(iter->name);
++ if (mark_entry) {
++ set_marker(&mark_entry, iter,
++ !!mark_entry->refcount);
++ /*
++ * ignore error, continue
++ */
++ } else {
++ disable_marker(iter);
++ }
++ }
++ mutex_unlock(&markers_mutex);
++}
++
++/*
++ * Update probes, removing the faulty probes.
++ * Issues a synchronize_sched() when no reference to the module passed
++ * as parameter is found in the probes so the probe module can be
++ * safely unloaded from now on.
++ *
++ * Internal callback only changed before the first probe is connected to it.
++ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
++ * transitions. All other transitions will leave the old private data valid.
++ * This makes the non-atomicity of the callback/private data updates valid.
++ *
++ * "special case" updates :
++ * 0 -> 1 callback
++ * 1 -> 0 callback
++ * 1 -> 2 callbacks
++ * 2 -> 1 callbacks
++ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
++ * Site effect : marker_set_format may delete the marker entry (creating a
++ * replacement).
++ */
++static void marker_update_probes(void)
++{
++ /* Core kernel markers */
++ marker_update_probe_range(__start___markers, __stop___markers);
++ /* Markers in modules. */
++ module_update_markers();
++}
++
++/**
++ * marker_probe_register - Connect a probe to a marker
++ * @name: marker name
++ * @format: format string
++ * @probe: probe handler
++ * @probe_private: probe private data
++ *
++ * private data must be a valid allocated memory address, or NULL.
++ * Returns 0 if ok, error value on error.
++ * The probe address must at least be aligned on the architecture pointer size.
++ */
++int marker_probe_register(const char *name, const char *format,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ entry = add_marker(name, format);
++ if (IS_ERR(entry)) {
++ ret = PTR_ERR(entry);
++ goto end;
++ }
++ }
++ /*
++ * If we detect that a call_rcu is pending for this marker,
++ * make sure it's executed now.
++ */
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_add_probe(entry, probe, probe_private);
++ if (IS_ERR(old)) {
++ ret = PTR_ERR(old);
++ goto end;
++ }
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_register);
++
++/**
++ * marker_probe_unregister - Disconnect a probe from a marker
++ * @name: marker name
++ * @probe: probe function pointer
++ * @probe_private: probe private data
++ *
++ * Returns the private data given to marker_probe_register, or an ERR_PTR().
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister(const char *name,
++ marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ struct marker_probe_closure *old;
++ int ret = 0;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, probe, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker(name);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister);
++
++static struct marker_entry *
++get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
++{
++ struct marker_entry *entry;
++ unsigned int i;
++ struct hlist_head *head;
++ struct hlist_node *node;
++
++ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
++ head = &marker_table[i];
++ hlist_for_each_entry(entry, node, head, hlist) {
++ if (!entry->ptype) {
++ if (entry->single.func == probe
++ && entry->single.probe_private
++ == probe_private)
++ return entry;
++ } else {
++ struct marker_probe_closure *closure;
++ closure = entry->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func == probe &&
++ closure[i].probe_private
++ == probe_private)
++ return entry;
++ }
++ }
++ }
++ }
++ return NULL;
++}
++
++/**
++ * marker_probe_unregister_private_data - Disconnect a probe from a marker
++ * @probe: probe function
++ * @probe_private: probe private data
++ *
++ * Unregister a probe by providing the registered private data.
++ * Only removes the first marker found in hash table.
++ * Return 0 on success or error value.
++ * We do not need to call a synchronize_sched to make sure the probes have
++ * finished running before doing a module unload, because the module unload
++ * itself uses stop_machine(), which insures that every preempt disabled section
++ * have finished.
++ */
++int marker_probe_unregister_private_data(marker_probe_func *probe,
++ void *probe_private)
++{
++ struct marker_entry *entry;
++ int ret = 0;
++ struct marker_probe_closure *old;
++
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ if (!entry) {
++ ret = -ENOENT;
++ goto end;
++ }
++ if (entry->rcu_pending)
++ rcu_barrier();
++ old = marker_entry_remove_probe(entry, NULL, probe_private);
++ mutex_unlock(&markers_mutex);
++ marker_update_probes(); /* may update entry */
++ mutex_lock(&markers_mutex);
++ entry = get_marker_from_private_data(probe, probe_private);
++ WARN_ON(!entry);
++ entry->oldptr = old;
++ entry->rcu_pending = 1;
++ /* write rcu_pending before calling the RCU callback */
++ smp_wmb();
++ call_rcu(&entry->rcu, free_old_closure);
++ remove_marker(entry->name); /* Ignore busy error message */
++end:
++ mutex_unlock(&markers_mutex);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
++
++/**
++ * marker_get_private_data - Get a marker's probe private data
++ * @name: marker name
++ * @probe: probe to match
++ * @num: get the nth matching probe's private data
++ *
++ * Returns the nth private data pointer (starting from 0) matching, or an
++ * ERR_PTR.
++ * Returns the private data pointer, or an ERR_PTR.
++ * The private data pointer should _only_ be dereferenced if the caller is the
++ * owner of the data, or its content could vanish. This is mostly used to
++ * confirm that a caller is the owner of a registered probe.
++ */
++void *marker_get_private_data(const char *name, marker_probe_func *probe,
++ int num)
++{
++ struct hlist_head *head;
++ struct hlist_node *node;
++ struct marker_entry *e;
++ size_t name_len = strlen(name) + 1;
++ u32 hash = jhash(name, name_len-1, 0);
++ int i;
++
++ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
++ hlist_for_each_entry(e, node, head, hlist) {
++ if (!strcmp(name, e->name)) {
++ if (!e->ptype) {
++ if (num == 0 && e->single.func == probe)
++ return e->single.probe_private;
++ else
++ break;
++ } else {
++ struct marker_probe_closure *closure;
++ int match = 0;
++ closure = e->multi;
++ for (i = 0; closure[i].func; i++) {
++ if (closure[i].func != probe)
++ continue;
++ if (match++ == num)
++ return closure[i].probe_private;
++ }
++ }
++ }
++ }
++ return ERR_PTR(-ENOENT);
++}
++EXPORT_SYMBOL_GPL(marker_get_private_data);
+diff --git a/kernel/module.c b/kernel/module.c
+index 624e7ee..ae16b04 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -1507,6 +1507,8 @@ static struct module *load_module(void __user *umod,
+ struct exception_table_entry *extable;
+ mm_segment_t old_fs;
+ int gpgsig_ok;
++ unsigned int markersindex;
++ unsigned int markersstringsindex;
+
+ DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
+ umod, len, uargs);
+@@ -1739,6 +1741,9 @@ static struct module *load_module(void __user *umod,
+ tainted |= TAINT_FORCED_MODULE;
+ }
+ #endif
++ markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
++ markersstringsindex = find_sec(hdr, sechdrs, secstrings,
++ "__markers_strings");
+
+ /* Now do relocations. */
+ for (i = 1; i < hdr->e_shnum; i++) {
+@@ -1762,6 +1767,12 @@ static struct module *load_module(void __user *umod,
+ goto cleanup;
+ }
+
++#ifdef CONFIG_MARKERS
++ mod->markers = (void *)sechdrs[markersindex].sh_addr;
++ mod->num_markers =
++ sechdrs[markersindex].sh_size / sizeof(*mod->markers);
++#endif
++
+ /* Set up and sort exception table */
+ mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable);
+ mod->extable = extable = (void *)sechdrs[exindex].sh_addr;
+@@ -1773,6 +1784,12 @@ static struct module *load_module(void __user *umod,
+
+ add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+
++#ifdef CONFIG_MARKERS
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++#endif
++
+ err = module_finalize(hdr, sechdrs, mod);
+ if (err < 0)
+ goto cleanup;
+@@ -2190,6 +2207,20 @@ void struct_module(struct module *mod) { return; }
+ EXPORT_SYMBOL(struct_module);
+ #endif
+
++#ifdef CONFIG_MARKERS
++void module_update_markers(void)
++{
++ struct module *mod;
++
++ down(&module_mutex);
++ list_for_each_entry(mod, &modules, list)
++ if (!tainted)
++ marker_update_probe_range(mod->markers,
++ mod->markers + mod->num_markers);
++ up(&module_mutex);
++}
++#endif
++
+ static int __init modules_init(void)
+ {
+ return subsystem_register(&module_subsys);
+diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
+index 1b16bfc..20ade31 100644
+--- a/kernel/rcupdate.c
++++ b/kernel/rcupdate.c
+@@ -46,6 +46,7 @@
+ #include <linux/notifier.h>
+ #include <linux/rcupdate.h>
+ #include <linux/cpu.h>
++#include <linux/mutex.h>
+
+ /* Definition for rcupdate control block. */
+ struct rcu_ctrlblk rcu_ctrlblk =
+@@ -98,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head,
+ local_irq_restore(flags);
+ }
+
++static atomic_t rcu_barrier_cpu_count;
++static DEFINE_MUTEX(rcu_barrier_mutex);
++static struct completion rcu_barrier_completion;
++
+ /**
+ * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+@@ -410,6 +415,44 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self,
+ return NOTIFY_OK;
+ }
+
++static void rcu_barrier_callback(struct rcu_head *notused)
++{
++ if (atomic_dec_and_test(&rcu_barrier_cpu_count))
++ complete(&rcu_barrier_completion);
++}
++
++/*
++ * Called with preemption disabled, and from cross-cpu IRQ context.
++ */
++static void rcu_barrier_func(void *notused)
++{
++ int cpu = smp_processor_id();
++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
++ struct rcu_head *head;
++
++ head = &rdp->barrier;
++ atomic_inc(&rcu_barrier_cpu_count);
++ call_rcu(head, rcu_barrier_callback);
++}
++
++/**
++ * rcu_barrier - Wait until all the in-flight RCUs are complete.
++ */
++void rcu_barrier(void)
++{
++ BUG_ON(in_interrupt());
++ /* Take cpucontrol mutex to protect against CPU hotplug */
++ mutex_lock(&rcu_barrier_mutex);
++ init_completion(&rcu_barrier_completion);
++ atomic_set(&rcu_barrier_cpu_count, 0);
++ on_each_cpu(rcu_barrier_func, NULL, 0, 1);
++ wait_for_completion(&rcu_barrier_completion);
++ mutex_unlock(&rcu_barrier_mutex);
++}
++EXPORT_SYMBOL_GPL(rcu_barrier);
++
++
++
+ static struct notifier_block __devinitdata rcu_nb = {
+ .notifier_call = rcu_cpu_notify,
+ };
+diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
+index b3d31b5..b100a32 100644
+--- a/scripts/Makefile.modpost
++++ b/scripts/Makefile.modpost
+@@ -13,6 +13,7 @@
+ # 2) modpost is then used to
+ # 3) create one <module>.mod.c file pr. module
+ # 4) create one Module.symvers file with CRC for all exported symbols
++# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
+ # 5) compile all <module>.mod.c files
+ # 6) final link of the module to a <module.ko> file
+
+@@ -40,6 +41,11 @@ include scripts/Makefile.lib
+
+ symverfile := $(objtree)/Module.symvers
+
++kernelmarkersfile := $(objtree)/Module.markers
++modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
++
++markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
++
+ # Step 1), find all modules listed in $(MODVERDIR)/
+ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
+ modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o)))
+@@ -53,16 +59,27 @@ quiet_cmd_modpost = MODPOST
+ cmd_modpost = scripts/mod/modpost \
+ $(if $(CONFIG_MODVERSIONS),-m) \
+ $(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \
++ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
++ $(if $(CONFIG_MARKERS),-M $(markersfile)) \
+ $(filter-out FORCE,$^)
+
+ .PHONY: __modpost
+ __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE
+ $(call cmd,modpost)
+
++quiet_cmd_kernel-mod = MODPOST $@
++ cmd_kernel-mod = $(cmd_modpost) $@
++
++vmlinux.o: FORCE
++ $(call cmd,kernel-mod)
++
+ # Declare generated files as targets for modpost
+ $(symverfile): __modpost ;
+ $(modules:.ko=.mod.c): __modpost ;
+
++ifdef CONFIG_MARKERS
++$(markersfile): __modpost ;
++endif
+
+ # Step 5), compile all *.mod.c files
+
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index 2a174e5..c25948c 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -10,7 +10,8 @@
+ *
+ * Usage: modpost vmlinux module1.o module2.o ...
+ */
+-
++#define _GNU_SOURCE
++#include <stdio.h>
+ #include <ctype.h>
+ #include "modpost.h"
+
+@@ -289,7 +290,9 @@ parse_elf(struct elf_info *info, const char *filename)
+ if (strcmp(secstrings+sechdrs[i].sh_name, ".modinfo") == 0) {
+ info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
+ info->modinfo_len = sechdrs[i].sh_size;
+- }
++ } else if (strcmp(secstrings+sechdrs[i].sh_name, "__markers_strings") == 0)
++ info->markers_strings_sec = i;
++
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+
+@@ -397,6 +400,63 @@ is_vmlinux(const char *modname)
+ return strcmp(myname, "vmlinux") == 0;
+ }
+
++static void get_markers(struct elf_info *info, struct module *mod)
++{
++ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
++ const char *strings = (const char *) info->hdr + sh->sh_offset;
++ const Elf_Sym *sym, *first_sym, *last_sym;
++ size_t n;
++
++
++ if (!info->markers_strings_sec)
++ return;
++
++ /*
++ * First count the strings. We look for all the symbols defined
++ * in the __markers_strings section named __mstrtab_*. For
++ * these local names, the compiler puts a random .NNN suffix on,
++ * so the names don't correspond exactly.
++ */
++ first_sym = last_sym = NULL;
++ n = 0;
++ for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ if (first_sym == NULL)
++ first_sym = sym;
++ last_sym = sym;
++ ++n;
++ }
++
++ if (n == 0)
++ return;
++ /*
++ * Now collect each name and format into a line for the output.
++ * Lines look like:
++ * marker_name vmlinux marker %s format %d
++ * The format string after the second \t can use whitespace.
++ */
++ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
++ mod->nmarkers = n;
++
++ n = 0;
++ for (sym = first_sym; sym <= last_sym; sym++)
++ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
++ sym->st_shndx == info->markers_strings_sec &&
++ !strncmp(info->strtab + sym->st_name,
++ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
++ const char *name = strings + sym->st_value;
++ const char *fmt = strchr(name, '\0') + 1;
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++ mod->markers[n++] = line;
++ }
++}
++
++
+ void
+ read_symbols(char *modname)
+ {
+@@ -426,6 +486,7 @@ read_symbols(char *modname)
+ }
+ maybe_frob_version(modname, info.modinfo, info.modinfo_len,
+ (void *)info.modinfo - (void *)info.hdr);
++ get_markers(&info, mod);
+ parse_elf_finish(&info);
+
+ /* Our trick to get versioning for struct_module - it's
+@@ -682,6 +743,92 @@ write_dump(const char *fname)
+ write_if_changed(&buf, fname);
+ }
+
++static void add_marker(struct module *mod, const char *name, const char *fmt)
++{
++ char *line = NULL;
++ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
++ NOFAIL(line);
++
++ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
++ sizeof mod->markers[0])));
++ mod->markers[mod->nmarkers++] = line;
++}
++
++static void read_markers(const char *fname)
++{
++ unsigned long size, pos = 0;
++ void *file = grab_file(fname, &size);
++ char *line;
++
++ if (!file) /* No old markers, silently ignore */
++ return;
++
++ while ((line = get_next_line(&pos, file, size))) {
++ char *marker, *modname, *fmt;
++ struct module *mod;
++
++ marker = line;
++ modname = strchr(marker, '\t');
++ if (!modname)
++ goto fail;
++ *modname++ = '\0';
++ fmt = strchr(modname, '\t');
++ if (!fmt)
++ goto fail;
++ *fmt++ = '\0';
++ if (*marker == '\0' || *modname == '\0')
++ goto fail;
++
++ mod = find_module(modname);
++ if (!mod) {
++ if (is_vmlinux(modname))
++ have_vmlinux = 1;
++ mod = new_module(NOFAIL(strdup(modname)));
++ mod->skip = 1;
++ }
++
++ add_marker(mod, marker, fmt);
++ }
++ return;
++fail:
++ fatal("parse error in markers list file\n");
++}
++
++static int compare_strings(const void *a, const void *b)
++{
++ return strcmp(*(const char **) a, *(const char **) b);
++}
++
++static void write_markers(const char *fname)
++{
++ struct buffer buf = { };
++ struct module *mod;
++ size_t i;
++
++ for (mod = modules; mod; mod = mod->next)
++ if (mod->markers != NULL) {
++ /*
++ * Sort the strings so we can skip duplicates when
++ * we write them out.
++ */
++ qsort(mod->markers, mod->nmarkers,
++ sizeof mod->markers[0], &compare_strings);
++ for (i = 0; i < mod->nmarkers; ++i) {
++ char *line = mod->markers[i];
++ buf_write(&buf, line, strlen(line));
++ while (i + 1 < mod->nmarkers &&
++ !strcmp(mod->markers[i],
++ mod->markers[i + 1]))
++ free(mod->markers[i++]);
++ free(mod->markers[i]);
++ }
++ free(mod->markers);
++ mod->markers = NULL;
++ }
++
++ write_if_changed(&buf, fname);
++}
++
+ int
+ main(int argc, char **argv)
+ {
+@@ -690,8 +837,10 @@ main(int argc, char **argv)
+ char fname[SZ];
+ char *dump_read = NULL, *dump_write = NULL;
+ int opt;
++ char *markers_read = NULL;
++ char *markers_write = NULL;
+
+- while ((opt = getopt(argc, argv, "i:mo:")) != -1) {
++ while ((opt = getopt(argc, argv, "i:mo:M:K:")) != -1) {
+ switch(opt) {
+ case 'i':
+ dump_read = optarg;
+@@ -702,6 +851,12 @@ main(int argc, char **argv)
+ case 'o':
+ dump_write = optarg;
+ break;
++ case 'M':
++ markers_write = optarg;
++ break;
++ case 'K':
++ markers_read = optarg;
++ break;
+ default:
+ exit(1);
+ }
+@@ -732,6 +887,12 @@ main(int argc, char **argv)
+ if (dump_write)
+ write_dump(dump_write);
+
++ if (markers_read)
++ read_markers(markers_read);
++
++ if (markers_write)
++ write_markers(markers_write);
++
+ return 0;
+ }
+
+diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
+index 4871343..d79d7ea 100644
+--- a/scripts/mod/modpost.h
++++ b/scripts/mod/modpost.h
+@@ -18,6 +18,7 @@
+ #define Elf_Sym Elf32_Sym
+ #define ELF_ST_BIND ELF32_ST_BIND
+ #define ELF_ST_TYPE ELF32_ST_TYPE
++#define Elf_Section Elf32_Half
+
+ #else
+
+@@ -26,7 +27,7 @@
+ #define Elf_Sym Elf64_Sym
+ #define ELF_ST_BIND ELF64_ST_BIND
+ #define ELF_ST_TYPE ELF64_ST_TYPE
+-
++#define Elf_Section Elf64_Half
+ #endif
+
+ #if KERNEL_ELFDATA != HOST_ELFDATA
+@@ -77,6 +78,8 @@ struct module {
+ int has_init;
+ int has_cleanup;
+ struct buffer dev_table_buf;
++ char **markers;
++ size_t nmarkers;
+ };
+
+ struct elf_info {
+@@ -85,6 +88,7 @@ struct elf_info {
+ Elf_Shdr *sechdrs;
+ Elf_Sym *symtab_start;
+ Elf_Sym *symtab_stop;
++ Elf_Section markers_strings_sec;
+ const char *strtab;
+ char *modinfo;
+ unsigned int modinfo_len;
More information about the Kernelpatches-commits
mailing list