linux/drivers/ptp/ptp_clock.c

550 lines
13 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PTP 1588 clock support
*
* Copyright (C) 2010 OMICRON electronics GmbH
*/
#include <linux/idr.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/posix-clock.h>
#include <linux/pps_kernel.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/debugfs.h>
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 06:30:02 +08:00
#include <uapi/linux/sched/types.h>
#include "ptp_private.h"
#define PTP_MAX_ALARMS 4
#define PTP_PPS_DEFAULTS (PPS_CAPTUREASSERT | PPS_OFFSETASSERT)
#define PTP_PPS_EVENT PPS_CAPTUREASSERT
#define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC)
const struct class ptp_class = {
.name = "ptp",
.dev_groups = ptp_groups
};
/* private globals */
static dev_t ptp_devt;
static DEFINE_IDA(ptp_clocks_map);
/* time stamp event queue operations */
static inline int queue_free(struct timestamp_event_queue *q)
{
return PTP_MAX_TIMESTAMPS - queue_cnt(q) - 1;
}
static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
struct ptp_clock_event *src)
{
struct ptp_extts_event *dst;
struct timespec64 offset_ts;
unsigned long flags;
s64 seconds;
u32 remainder;
if (src->type == PTP_CLOCK_EXTTS) {
seconds = div_u64_rem(src->timestamp, 1000000000, &remainder);
} else if (src->type == PTP_CLOCK_EXTOFF) {
offset_ts = ns_to_timespec64(src->offset);
seconds = offset_ts.tv_sec;
remainder = offset_ts.tv_nsec;
} else {
WARN(1, "%s: unknown type %d\n", __func__, src->type);
return;
}
spin_lock_irqsave(&queue->lock, flags);
dst = &queue->buf[queue->tail];
dst->index = src->index;
dst->flags = PTP_EXTTS_EVENT_VALID;
dst->t.sec = seconds;
dst->t.nsec = remainder;
if (src->type == PTP_CLOCK_EXTOFF)
dst->flags |= PTP_EXT_OFFSET;
/* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */
if (!queue_free(queue))
WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
WRITE_ONCE(queue->tail, (queue->tail + 1) % PTP_MAX_TIMESTAMPS);
spin_unlock_irqrestore(&queue->lock, flags);
}
/* posix clock implementation */
static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp)
{
tp->tv_sec = 0;
tp->tv_nsec = 1;
return 0;
}
static int ptp_clock_settime(struct posix_clock *pc, const struct timespec64 *tp)
{
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
if (ptp_clock_freerun(ptp)) {
pr_err("ptp: physical clock is free running\n");
return -EBUSY;
}
return ptp->info->settime64(ptp->info, tp);
}
static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp)
{
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
int err;
if (ptp->info->gettimex64)
err = ptp->info->gettimex64(ptp->info, tp, NULL);
else
err = ptp->info->gettime64(ptp->info, tp);
return err;
}
static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
{
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
struct ptp_clock_info *ops;
int err = -EOPNOTSUPP;
if (ptp_clock_freerun(ptp)) {
pr_err("ptp: physical clock is free running\n");
return -EBUSY;
}
ops = ptp->info;
if (tx->modes & ADJ_SETOFFSET) {
struct timespec64 ts;
ktime_t kt;
s64 delta;
ts.tv_sec = tx->time.tv_sec;
ts.tv_nsec = tx->time.tv_usec;
if (!(tx->modes & ADJ_NANO))
ts.tv_nsec *= 1000;
if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
kt = timespec64_to_ktime(ts);
delta = ktime_to_ns(kt);
err = ops->adjtime(ops, delta);
} else if (tx->modes & ADJ_FREQUENCY) {
long ppb = scaled_ppm_to_ppb(tx->freq);
if (ppb > ops->max_adj || ppb < -ops->max_adj)
return -ERANGE;
err = ops->adjfine(ops, tx->freq);
ptp->dialed_frequency = tx->freq;
} else if (tx->modes & ADJ_OFFSET) {
if (ops->adjphase) {
s32 max_phase_adj = ops->getmaxphase(ops);
s32 offset = tx->offset;
if (!(tx->modes & ADJ_NANO))
offset *= NSEC_PER_USEC;
if (offset > max_phase_adj || offset < -max_phase_adj)
return -ERANGE;
err = ops->adjphase(ops, offset);
}
} else if (tx->modes == 0) {
tx->freq = ptp->dialed_frequency;
err = 0;
}
return err;
}
static struct posix_clock_operations ptp_clock_ops = {
.owner = THIS_MODULE,
.clock_adjtime = ptp_clock_adjtime,
.clock_gettime = ptp_clock_gettime,
.clock_getres = ptp_clock_getres,
.clock_settime = ptp_clock_settime,
.ioctl = ptp_ioctl,
.open = ptp_open,
.release = ptp_release,
.poll = ptp_poll,
.read = ptp_read,
};
ptp: fix the race between the release of ptp_clock and cdev In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-27 10:26:27 +08:00
static void ptp_clock_release(struct device *dev)
{
ptp: fix the race between the release of ptp_clock and cdev In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-27 10:26:27 +08:00
struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
struct timestamp_event_queue *tsevq;
unsigned long flags;
ptp_cleanup_pin_groups(ptp);
kfree(ptp->vclock_index);
mutex_destroy(&ptp->pincfg_mux);
mutex_destroy(&ptp->n_vclocks_mux);
/* Delete first entry */
spin_lock_irqsave(&ptp->tsevqs_lock, flags);
tsevq = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue,
qlist);
list_del(&tsevq->qlist);
spin_unlock_irqrestore(&ptp->tsevqs_lock, flags);
bitmap_free(tsevq->mask);
kfree(tsevq);
debugfs_remove(ptp->debugfs_root);
ida_free(&ptp_clocks_map, ptp->index);
kfree(ptp);
}
static int ptp_getcycles64(struct ptp_clock_info *info, struct timespec64 *ts)
{
if (info->getcyclesx64)
return info->getcyclesx64(info, ts, NULL);
else
return info->gettime64(info, ts);
}
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 06:30:02 +08:00
static void ptp_aux_kworker(struct kthread_work *work)
{
struct ptp_clock *ptp = container_of(work, struct ptp_clock,
aux_work.work);
struct ptp_clock_info *info = ptp->info;
long delay;
delay = info->do_aux_work(info);
if (delay >= 0)
kthread_queue_delayed_work(ptp->kworker, &ptp->aux_work, delay);
}
/* public interface */
struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
struct device *parent)
{
struct ptp_clock *ptp;
struct timestamp_event_queue *queue = NULL;
int err = 0, index, major = MAJOR(ptp_devt);
char debugfsname[16];
size_t size;
if (info->n_alarm > PTP_MAX_ALARMS)
return ERR_PTR(-EINVAL);
/* Initialize a clock structure. */
err = -ENOMEM;
ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
if (ptp == NULL)
goto no_memory;
index = ida_alloc_max(&ptp_clocks_map, MINORMASK, GFP_KERNEL);
if (index < 0) {
err = index;
goto no_slot;
}
ptp->clock.ops = ptp_clock_ops;
ptp->info = info;
ptp->devid = MKDEV(major, index);
ptp->index = index;
INIT_LIST_HEAD(&ptp->tsevqs);
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
if (!queue)
goto no_memory_queue;
list_add_tail(&queue->qlist, &ptp->tsevqs);
spin_lock_init(&ptp->tsevqs_lock);
queue->mask = bitmap_alloc(PTP_MAX_CHANNELS, GFP_KERNEL);
if (!queue->mask)
goto no_memory_bitmap;
bitmap_set(queue->mask, 0, PTP_MAX_CHANNELS);
spin_lock_init(&queue->lock);
mutex_init(&ptp->pincfg_mux);
mutex_init(&ptp->n_vclocks_mux);
init_waitqueue_head(&ptp->tsev_wq);
if (ptp->info->getcycles64 || ptp->info->getcyclesx64) {
ptp->has_cycles = true;
if (!ptp->info->getcycles64 && ptp->info->getcyclesx64)
ptp->info->getcycles64 = ptp_getcycles64;
} else {
/* Free running cycle counter not supported, use time. */
ptp->info->getcycles64 = ptp_getcycles64;
if (ptp->info->gettimex64)
ptp->info->getcyclesx64 = ptp->info->gettimex64;
if (ptp->info->getcrosststamp)
ptp->info->getcrosscycles = ptp->info->getcrosststamp;
}
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 06:30:02 +08:00
if (ptp->info->do_aux_work) {
kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
ptp->kworker = kthread_create_worker(0, "ptp%d", ptp->index);
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 06:30:02 +08:00
if (IS_ERR(ptp->kworker)) {
err = PTR_ERR(ptp->kworker);
pr_err("failed to create ptp aux_worker %d\n", err);
goto kworker_err;
}
}
/* PTP virtual clock is being registered under physical clock */
if (parent && parent->class && parent->class->name &&
strcmp(parent->class->name, "ptp") == 0)
ptp->is_virtual_clock = true;
if (!ptp->is_virtual_clock) {
ptp->max_vclocks = PTP_DEFAULT_MAX_VCLOCKS;
size = sizeof(int) * ptp->max_vclocks;
ptp->vclock_index = kzalloc(size, GFP_KERNEL);
if (!ptp->vclock_index) {
err = -ENOMEM;
goto no_mem_for_vclocks;
}
}
err = ptp_populate_pin_groups(ptp);
if (err)
goto no_pin_groups;
/* Register a new PPS source. */
if (info->pps) {
struct pps_source_info pps;
memset(&pps, 0, sizeof(pps));
snprintf(pps.name, PPS_MAX_NAME_LEN, "ptp%d", index);
pps.mode = PTP_PPS_MODE;
pps.owner = info->owner;
ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS);
if (IS_ERR(ptp->pps_source)) {
err = PTR_ERR(ptp->pps_source);
pr_err("failed to register pps source\n");
goto no_pps;
}
ptp->pps_source->lookup_cookie = ptp;
}
ptp: fix the race between the release of ptp_clock and cdev In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-27 10:26:27 +08:00
/* Initialize a new device of our class in our clock structure. */
device_initialize(&ptp->dev);
ptp->dev.devt = ptp->devid;
ptp->dev.class = &ptp_class;
ptp: fix the race between the release of ptp_clock and cdev In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-27 10:26:27 +08:00
ptp->dev.parent = parent;
ptp->dev.groups = ptp->pin_attr_groups;
ptp->dev.release = ptp_clock_release;
dev_set_drvdata(&ptp->dev, ptp);
dev_set_name(&ptp->dev, "ptp%d", ptp->index);
/* Create a posix clock and link it to the device. */
err = posix_clock_register(&ptp->clock, &ptp->dev);
if (err) {
if (ptp->pps_source)
pps_unregister_source(ptp->pps_source);
if (ptp->kworker)
kthread_destroy_worker(ptp->kworker);
put_device(&ptp->dev);
pr_err("failed to create posix clock\n");
return ERR_PTR(err);
}
/* Debugfs initialization */
snprintf(debugfsname, sizeof(debugfsname), "ptp%d", ptp->index);
ptp->debugfs_root = debugfs_create_dir(debugfsname, NULL);
return ptp;
no_pps:
ptp_cleanup_pin_groups(ptp);
no_pin_groups:
kfree(ptp->vclock_index);
no_mem_for_vclocks:
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 06:30:02 +08:00
if (ptp->kworker)
kthread_destroy_worker(ptp->kworker);
kworker_err:
mutex_destroy(&ptp->pincfg_mux);
mutex_destroy(&ptp->n_vclocks_mux);
bitmap_free(queue->mask);
no_memory_bitmap:
list_del(&queue->qlist);
kfree(queue);
no_memory_queue:
ida_free(&ptp_clocks_map, index);
no_slot:
kfree(ptp);
no_memory:
return ERR_PTR(err);
}
EXPORT_SYMBOL(ptp_clock_register);
static int unregister_vclock(struct device *dev, void *data)
{
struct ptp_clock *ptp = dev_get_drvdata(dev);
ptp_vclock_unregister(info_to_vclock(ptp->info));
return 0;
}
int ptp_clock_unregister(struct ptp_clock *ptp)
{
if (ptp_vclock_in_use(ptp)) {
device_for_each_child(&ptp->dev, NULL, unregister_vclock);
}
ptp->defunct = 1;
wake_up_interruptible(&ptp->tsev_wq);
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 06:30:02 +08:00
if (ptp->kworker) {
kthread_cancel_delayed_work_sync(&ptp->aux_work);
kthread_destroy_worker(ptp->kworker);
}
/* Release the clock's resources. */
if (ptp->pps_source)
pps_unregister_source(ptp->pps_source);
posix_clock_unregister(&ptp->clock);
return 0;
}
EXPORT_SYMBOL(ptp_clock_unregister);
void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
{
struct timestamp_event_queue *tsevq;
struct pps_event_time evt;
unsigned long flags;
switch (event->type) {
case PTP_CLOCK_ALARM:
break;
case PTP_CLOCK_EXTTS:
case PTP_CLOCK_EXTOFF:
/* Enqueue timestamp on selected queues */
spin_lock_irqsave(&ptp->tsevqs_lock, flags);
list_for_each_entry(tsevq, &ptp->tsevqs, qlist) {
if (test_bit((unsigned int)event->index, tsevq->mask))
enqueue_external_timestamp(tsevq, event);
}
spin_unlock_irqrestore(&ptp->tsevqs_lock, flags);
wake_up_interruptible(&ptp->tsev_wq);
break;
case PTP_CLOCK_PPS:
pps_get_ts(&evt);
pps_event(ptp->pps_source, &evt, PTP_PPS_EVENT, NULL);
break;
case PTP_CLOCK_PPSUSR:
pps_event(ptp->pps_source, &event->pps_times,
PTP_PPS_EVENT, NULL);
break;
}
}
EXPORT_SYMBOL(ptp_clock_event);
int ptp_clock_index(struct ptp_clock *ptp)
{
return ptp->index;
}
EXPORT_SYMBOL(ptp_clock_index);
int ptp_find_pin(struct ptp_clock *ptp,
enum ptp_pin_function func, unsigned int chan)
{
struct ptp_pin_desc *pin = NULL;
int i;
for (i = 0; i < ptp->info->n_pins; i++) {
if (ptp->info->pin_config[i].func == func &&
ptp->info->pin_config[i].chan == chan) {
pin = &ptp->info->pin_config[i];
break;
}
}
return pin ? i : -1;
}
EXPORT_SYMBOL(ptp_find_pin);
int ptp_find_pin_unlocked(struct ptp_clock *ptp,
enum ptp_pin_function func, unsigned int chan)
{
int result;
mutex_lock(&ptp->pincfg_mux);
result = ptp_find_pin(ptp, func, chan);
mutex_unlock(&ptp->pincfg_mux);
return result;
}
EXPORT_SYMBOL(ptp_find_pin_unlocked);
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 06:30:02 +08:00
int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay)
{
return kthread_mod_delayed_work(ptp->kworker, &ptp->aux_work, delay);
}
EXPORT_SYMBOL(ptp_schedule_worker);
void ptp_cancel_worker_sync(struct ptp_clock *ptp)
{
kthread_cancel_delayed_work_sync(&ptp->aux_work);
}
EXPORT_SYMBOL(ptp_cancel_worker_sync);
/* module operations */
static void __exit ptp_exit(void)
{
class_unregister(&ptp_class);
unregister_chrdev_region(ptp_devt, MINORMASK + 1);
ida_destroy(&ptp_clocks_map);
}
static int __init ptp_init(void)
{
int err;
err = class_register(&ptp_class);
if (err) {
pr_err("ptp: failed to allocate class\n");
return err;
}
err = alloc_chrdev_region(&ptp_devt, 0, MINORMASK + 1, "ptp");
if (err < 0) {
pr_err("ptp: failed to allocate device region\n");
goto no_region;
}
pr_info("PTP clock support registered\n");
return 0;
no_region:
class_unregister(&ptp_class);
return err;
}
subsys_initcall(ptp_init);
module_exit(ptp_exit);
MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
MODULE_DESCRIPTION("PTP clocks support");
MODULE_LICENSE("GPL");