Hello,
Thanks for your email.
Most of the setup looks correct. However problem might be on Linux kernel side.
I assume you are using petalinux as your build system. If so, you must be using this driver: https://github.com/Xilinx/linux-xlnx/blob/xlnx_rebase_v6.6_LTS_2024.2/driver...
As far as I know, above driver does not have coredump support. So coredump won't work out-of-the box from petalinux.
Following could be one of the workaround:
Core dump support is available only in upstream driver: https://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux.git/commit/...
For now, you can port above patch to downstream driver (zynqmp_r5_remoteproc.c) and enable coredump support.
Thanks, Tanmay
On 1/17/25 10:22 AM, siva via Openamp-rp wrote:
Hello community,
I am trying to get a trace buffer setup and working during an event of crash on RPU R5F cores. I am using Xilinx Zynq MP board with both Cortex-R5F on split mode. I build a minimal RPU firmware application where I try to force a crash and get the trace details from trace buffer.
What I have done so far (along with Xilinx support personnel help)
- Built the libmetal latest version 1.7.0 from https://github.com/OpenAMP/libmetal
- Built the openamp latest version 1.7.0 from https://github.com/OpenAMP/open-amp
- Built the RPU firmware app with both these libraries, I had to build and link both to use the metal debug feature.
In the app, I create a resource table for the trace buffer as follows: struct remote_resource_table __resource resources = { .version = 1, .num = NUM_TABLE_ENTRIES, .reserved = {0, 0}, .offset[0] = offsetof(struct remote_resource_table, rpmsg_vdev), .offset[1] = offsetof(struct remote_resource_table, rsc_trace), /* Virtio device entry */ .rpmsg_vdev = { .type = RSC_VDEV, .id = VIRTIO_ID_RPMSG_, .notifyid = 31, .dfeatures = RPMSG_VDEV_DFEATURES, .gfeatures = 0, .config_len = 0, .status = 0, .num_of_vrings = NUM_VRINGS, .reserved = {0, 0}, }, /* Vring rsc entry - part of vdev rsc entry */ .rpmsg_vring0 = {RING_TX, VRING_ALIGN, VRING_SIZE, 1, 0}, .rpmsg_vring1 = {RING_RX, VRING_ALIGN, VRING_SIZE, 2, 0}, /* trace buffer for logs, accessible via debugfs */ .rsc_trace = { .type = RSC_TRACE, .da = (unsigned int)rsc_trace_buf, .len = sizeof(rsc_trace_buf), .reserved = 0, .name = "r5_trace", }, };
and in linker file, added the section in ddr as below:
/* Define Memories in the system */
MEMORY { psu_ocm_ram_0_MEM_0 : ORIGIN = 0xFFFC0000, LENGTH = 0x40000 psu_r5_0_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 psu_r5_0_btcm_MEM_0 : ORIGIN = 0x20000, LENGTH = 0x10000 psu_r5_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x40000 }
/* Specify the default entry point to the program */
ENTRY(_boot)
/* Define the sections, and where they are mapped in memory */
SECTIONS { .resource_table 0x3ed20000 : { . = ALIGN(4); *(.resource_table) } > psu_r5_ddr_0_MEM_0
Now when I try to create a core dump file by starting and running the RPU app from userspace, after I disable recovery and enable coredump, I don’t see any trace info about the coredump in the trace file other than print statements I added. See below. echo “disabled” > /sys/kernel/debug/remoteproc/remoteproc0/recovery echo “enabled” > /sys/kernel/debug/remoteproc/remoteproc0/enabled
root@zcu102-zynqmp:/sys/kernel/debug/remoteproc/remoteproc0# ls -altr total 0 drwxr-xr-x 4 root root 0 Jan 1 1970 .. -r-------- 1 root root 0 Jan 14 18:05 resource_table -rw------- 1 root root 0 Jan 14 18:05 recovery -r-------- 1 root root 0 Jan 14 18:05 name --w------- 1 root root 0 Jan 14 18:05 crash -rw------- 1 root root 0 Jan 14 18:05 coredump -r-------- 1 root root 0 Jan 14 18:05 carveout_memories drwxr-xr-x 2 root root 0 Jan 14 18:05 . -r-------- 1 root root 0 Jan 14 18:10 trace0 root@zcu102-zynqmp:/sys/kernel/debug/remoteproc/remoteproc0# cat trace0 0 L7 registered generic bus 1 L6 MCS-sp: c_buf,c_len = 0x3ed3a7ec,40962 L6 MCS-sp: platform_create_proc()3 L6 ML:platform_create_proc() 4 L6 MCS-sp: rsc_table, rsc_size = 0x3ed20000, 0x1005 L6 MCS-sp: metal_device_open(generic, poll_dev, 0x23b4) 6 L6 MCS-sp: poll{name,bus,chn_mask} = poll_dev,generic,0x10000007 L6 MCS-sp: lpa,lda= 0x3ed20000,0xffffffff 8 L6 MCS-sp: mem= 0x34d8 9 L6 MCS-sp: tmpio= 0x3518 10 L6 MCS-sp: lpa,lda= 0x3ed40000,0xffffffff 11 L6 MCS-sp: mem= 0x3558 12 L6 MCS-sp: tmpio= 0x3598 13 L6 MCS-sp: Initialize remoteproc successfully.14 L6 MCS-sp: creating remoteproc virtio rproc 0x3ed3a7b4 15 L6 MCS-sp: initializing rpmsg shared buffer pool 16 L6 MCS-sp: initializing rpmsg vdev 17 L6 MCS-sp: trying to coredump root@zcu102-zynqmp:/sys/kernel/debug/remoteproc/remoteproc0#
I also tried with “recovery” state enabled too with no luck. Any pointers on what I may be missing here? Hopefully I can get some help from the openamp community, appreciate the help! Copied file rpu.c is the main file where init_platform is called to init the platform and init_system() is called, and platform.c has these functions defined along with the resource table below.
Thanks Siva
rpu.c file:
extern struct rpmsg_device * platform_create_rpmsg_vdev(void *platform, uint32_t vdev_index, uint32_t role, void (*rst_cb)(struct virtio_device *vdev), rpmsg_ns_bind_cb ns_bind_cb);
int main(void) { unsigned long sweep_count = 0;
void *platform; init_platform(&platform); struct rpmsg_device *rpdev;
rpdev = platform_create_rpmsg_vdev(platform, 0, VIRTIO_DEV_DEVICE, NULL, NULL); if (!rpdev) { metal_log(METAL_LOG_INFO,"MCS-sp: Failed to create rpmsg virtio device.\r\n"); metal_log(METAL_LOG_INFO,"MCS-sp: RPU reboot is required to recover\r\n"); } while (1) { usleep(10); if(sweep_count++ >= 0x400000) { printf("MCS-sp: trying to coredump"); metal_log(METAL_LOG_INFO,"MCS-sp: trying to coredump\r\n"); sleep (1); metal_log(METAL_LOG_INFO,"MCS-sp: coredump here %d", (1/0)); printf("MCS-sp: coredump here %d", (1/0)); } } return 0; }
platform.c file snippet
static struct remoteproc * platform_create_proc(uint32_t proc_index, uint32_t rsc_index) { void *rsc_table; uint32_t rsc_size; int32_t ret; metal_phys_addr_t pa;
(void) proc_index; rsc_table = get_resource_table(rsc_index, &rsc_size); metal_log(METAL_LOG_INFO,"MCS-sp: rsc_table, rsc_size = %#x, %#x", rsc_table, rsc_size);
/* Register IPI device */ if (metal_register_generic_device(&kick_device)) return NULL;
/* Initialize remoteproc instance */ if (!remoteproc_init(&rproc_inst, &zynqmp_r5_a53_proc_ops, &rproc_priv)) return NULL;
metal_log(METAL_LOG_INFO,"MCS-sp: poll{name,bus,chn_mask} = %s,%s,%#x", rproc_priv.kick_dev_name, rproc_priv.kick_dev_bus_name, IPI_CHN_BITMASK); /* * Mmap shared memories * Or shall we constraint that they will be set as carved out * in the resource table? */ /* mmap resource table */ pa = (metal_phys_addr_t)rsc_table; (void *)remoteproc_mmap(&rproc_inst, &pa, NULL, rsc_size, NORM_NSHARED_NCACHE|PRIV_RW_USER_RW, &rproc_inst.rsc_io); /* mmap shared memory */ pa = SHARED_MEM_PA; (void *)remoteproc_mmap(&rproc_inst, &pa, NULL, SHARED_MEM_SIZE, NORM_NSHARED_NCACHE|PRIV_RW_USER_RW, NULL);
/* parse resource table to remoteproc */ ret = remoteproc_set_rsc_table(&rproc_inst, rsc_table, rsc_size); if (ret != 0) { metal_log(METAL_LOG_INFO,"MCS-sp: Failed to initialize remoteproc"); remoteproc_remove(&rproc_inst); return NULL; } metal_log(METAL_LOG_INFO,"MCS-sp: Initialize remoteproc successfully.");
return &rproc_inst; }
int32_t init_system(void); struct rpmsg_device * platform_create_rpmsg_vdev(void *platform, uint32_t vdev_index, uint32_t role, void (*rst_cb)(struct virtio_device *vdev), rpmsg_ns_bind_cb ns_bind_cb); int init_platform(void **platform) { /* metal_log setup is in init_system */ if (!platform) { metal_log(METAL_LOG_INFO,"MCS-sp: Failed to initialize platform," "NULL pointer to store platform data."); return -EINVAL; } init_system(); unsigned long proc_id = 0; unsigned long rsc_id = 0; metal_log(METAL_LOG_INFO, "MCS-sp: platform_create_proc()"); ML_INFO("ML:platform_create_proc()\r\n"); struct remoteproc *rproc; rproc = platform_create_proc(proc_id, rsc_id); if (!rproc) { metal_log(METAL_LOG_INFO,"MCS-sp: Failed to create remoteproc device."); return -EINVAL; } *platform = rproc; return 0; }
void cleanup_platform() { disable_caches(); }
struct rpmsg_device * platform_create_rpmsg_vdev(void *platform, uint32_t vdev_index, uint32_t role, void (*rst_cb)(struct virtio_device *vdev), rpmsg_ns_bind_cb ns_bind_cb) { struct remoteproc *rproc = platform; struct rpmsg_virtio_device *rpmsg_vdev; struct virtio_device *vdev; void *shbuf; struct metal_io_region *shbuf_io; int32_t ret; struct rpmsg_device *ret_rpmsg_dev=NULL; rpmsg_vdev = metal_allocate_memory(sizeof(*rpmsg_vdev)); if (!rpmsg_vdev) return NULL; shbuf_io = remoteproc_get_io_with_pa(rproc, SHARED_MEM_PA); if (!shbuf_io){ goto err1; } shbuf = metal_io_phys_to_virt(shbuf_io, SHARED_MEM_PA + SHARED_BUF_OFFSET);
metal_log(METAL_LOG_INFO,"MCS-sp: creating remoteproc virtio rproc %p\r\n", rproc); /* TODO: can we have a wrapper for the following two functions? */ vdev = remoteproc_create_virtio(rproc, vdev_index, role, rst_cb); if (!vdev) { metal_log(METAL_LOG_INFO,"MCS-sp: failed remoteproc_create_virtio\r\n"); goto err1; }
metal_log(METAL_LOG_INFO,"MCS-sp: initializing rpmsg shared buffer pool\r\n"); /* Only RPMsg virtio master needs to initialize the shared buffers pool */ rpmsg_virtio_init_shm_pool(&shpool, shbuf, (SHARED_MEM_SIZE - SHARED_BUF_OFFSET));
metal_log(METAL_LOG_INFO,"MCS-sp: initializing rpmsg vdev\r\n"); /* RPMsg virtio device can set shared buffers pool argument to NULL */ ret = rpmsg_init_vdev(rpmsg_vdev, vdev, ns_bind_cb, shbuf_io, &shpool); if (ret != 0) { metal_log(METAL_LOG_INFO,"MCS-sp: failed rpmsg_init_vdev\r\n"); goto err2; }
ret_rpmsg_dev = rpmsg_virtio_get_rpmsg_device(rpmsg_vdev);
if (rpmsg_vdev != NULL){ metal_free_memory(rpmsg_vdev); }
return ret_rpmsg_dev; err2: remoteproc_remove_virtio(rproc, vdev); err1: metal_free_memory(rpmsg_vdev); return NULL; }
static void rsc_trace_putchar(char c) { if (circ.c_pos >= circ.c_len) circ.c_pos = 0; circ.c_buf[circ.c_pos++] = c; }
static void rsc_trace_logger(enum metal_log_level level, const char *format, ...) { char msg[128]; char *p; int32_t len; va_list args;
/* prefix "cnt L6 ": record count and log level */ len = sprintf(msg, "%lu L%u ", circ.c_cnt, level); if (len < 0 || len >= sizeof(msg)) len = 0; circ.c_cnt++;
va_start(args, format); vsnprintf(msg + len, sizeof(msg) - len, format, args); va_end(args);
/* copy at most sizeof(msg) to the circular buffer */ for (len = 0, p = msg; *p && len < sizeof(msg); ++len, ++p) rsc_trace_putchar(*p); /* Remove this xil_printf to stop printing to console */ xil_printf("%s", msg); }
char *get_rsc_trace_info(uint32_t *len) { *len = sizeof(rsc_trace_buf); return rsc_trace_buf; } void *get_resource_table (uint32_t rsc_id, uint32_t *len) { (void) rsc_id; *len = sizeof(resources); return &resources; }
int32_t init_system(void) { int32_t ret; struct metal_init_params metal_param = METAL_INIT_DEFAULTS;
circ.c_buf = get_rsc_trace_info(&circ.c_len); if (circ.c_buf && circ.c_len){ metal_param.log_handler = &rsc_trace_logger; metal_param.log_level = METAL_LOG_DEBUG; circ.c_pos = circ.c_cnt = 0; };
/* Low level abstraction layer for openamp initialization */ metal_init(&metal_param);
/* configure the global interrupt controller */ app_gic_initialize();
/* Initialize metal Xilinx IRQ controller */ ret = metal_xlnx_irq_init(); if (ret != 0) { metal_log(METAL_LOG_INFO,"MCS-sp: metal_xlnx_irq_init failed."); }
metal_log(METAL_LOG_INFO,"MCS-sp: c_buf,c_len = %p,%u", circ.c_buf, circ.c_len); return ret; }