213 lines
5.3 KiB
C
213 lines
5.3 KiB
C
|
/*
|
||
|
* itmt.c: Support Intel Turbo Boost Max Technology 3.0
|
||
|
*
|
||
|
* (C) Copyright 2016 Intel Corporation
|
||
|
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or
|
||
|
* modify it under the terms of the GNU General Public License
|
||
|
* as published by the Free Software Foundation; version 2
|
||
|
* of the License.
|
||
|
*
|
||
|
* On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
|
||
|
* the maximum turbo frequencies of some cores in a CPU package may be
|
||
|
* higher than for the other cores in the same package. In that case,
|
||
|
* better performance can be achieved by making the scheduler prefer
|
||
|
* to run tasks on the CPUs with higher max turbo frequencies.
|
||
|
*
|
||
|
* This file provides functions and data structures for enabling the
|
||
|
* scheduler to favor scheduling on cores can be boosted to a higher
|
||
|
* frequency under ITMT.
|
||
|
*/
|
||
|
|
||
|
#include <linux/sched.h>
|
||
|
#include <linux/cpumask.h>
|
||
|
#include <linux/cpuset.h>
|
||
|
#include <linux/mutex.h>
|
||
|
#include <linux/sysctl.h>
|
||
|
#include <linux/nodemask.h>
|
||
|
|
||
|
static DEFINE_MUTEX(itmt_update_mutex);
|
||
|
DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
|
||
|
|
||
|
/* Boolean to track if system has ITMT capabilities */
|
||
|
static bool __read_mostly sched_itmt_capable;
|
||
|
|
||
|
/*
|
||
|
* Boolean to control whether we want to move processes to cpu capable
|
||
|
* of higher turbo frequency for cpus supporting Intel Turbo Boost Max
|
||
|
* Technology 3.0.
|
||
|
*
|
||
|
* It can be set via /proc/sys/kernel/sched_itmt_enabled
|
||
|
*/
|
||
|
unsigned int __read_mostly sysctl_sched_itmt_enabled;
|
||
|
|
||
|
static int sched_itmt_update_handler(struct ctl_table *table, int write,
|
||
|
void __user *buffer, size_t *lenp,
|
||
|
loff_t *ppos)
|
||
|
{
|
||
|
unsigned int old_sysctl;
|
||
|
int ret;
|
||
|
|
||
|
mutex_lock(&itmt_update_mutex);
|
||
|
|
||
|
if (!sched_itmt_capable) {
|
||
|
mutex_unlock(&itmt_update_mutex);
|
||
|
return -EINVAL;
|
||
|
}
|
||
|
|
||
|
old_sysctl = sysctl_sched_itmt_enabled;
|
||
|
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||
|
|
||
|
if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
|
||
|
x86_topology_update = true;
|
||
|
rebuild_sched_domains();
|
||
|
}
|
||
|
|
||
|
mutex_unlock(&itmt_update_mutex);
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static unsigned int zero;
|
||
|
static unsigned int one = 1;
|
||
|
static struct ctl_table itmt_kern_table[] = {
|
||
|
{
|
||
|
.procname = "sched_itmt_enabled",
|
||
|
.data = &sysctl_sched_itmt_enabled,
|
||
|
.maxlen = sizeof(unsigned int),
|
||
|
.mode = 0644,
|
||
|
.proc_handler = sched_itmt_update_handler,
|
||
|
.extra1 = &zero,
|
||
|
.extra2 = &one,
|
||
|
},
|
||
|
{}
|
||
|
};
|
||
|
|
||
|
static struct ctl_table itmt_root_table[] = {
|
||
|
{
|
||
|
.procname = "kernel",
|
||
|
.mode = 0555,
|
||
|
.child = itmt_kern_table,
|
||
|
},
|
||
|
{}
|
||
|
};
|
||
|
|
||
|
static struct ctl_table_header *itmt_sysctl_header;
|
||
|
|
||
|
/**
|
||
|
* sched_set_itmt_support() - Indicate platform supports ITMT
|
||
|
*
|
||
|
* This function is used by the OS to indicate to scheduler that the platform
|
||
|
* is capable of supporting the ITMT feature.
|
||
|
*
|
||
|
* The current scheme has the pstate driver detects if the system
|
||
|
* is ITMT capable and call sched_set_itmt_support.
|
||
|
*
|
||
|
* This must be done only after sched_set_itmt_core_prio
|
||
|
* has been called to set the cpus' priorities.
|
||
|
* It must not be called with cpu hot plug lock
|
||
|
* held as we need to acquire the lock to rebuild sched domains
|
||
|
* later.
|
||
|
*
|
||
|
* Return: 0 on success
|
||
|
*/
|
||
|
int sched_set_itmt_support(void)
|
||
|
{
|
||
|
mutex_lock(&itmt_update_mutex);
|
||
|
|
||
|
if (sched_itmt_capable) {
|
||
|
mutex_unlock(&itmt_update_mutex);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
itmt_sysctl_header = register_sysctl_table(itmt_root_table);
|
||
|
if (!itmt_sysctl_header) {
|
||
|
mutex_unlock(&itmt_update_mutex);
|
||
|
return -ENOMEM;
|
||
|
}
|
||
|
|
||
|
sched_itmt_capable = true;
|
||
|
|
||
|
sysctl_sched_itmt_enabled = 1;
|
||
|
|
||
|
x86_topology_update = true;
|
||
|
rebuild_sched_domains();
|
||
|
|
||
|
mutex_unlock(&itmt_update_mutex);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* sched_clear_itmt_support() - Revoke platform's support of ITMT
|
||
|
*
|
||
|
* This function is used by the OS to indicate that it has
|
||
|
* revoked the platform's support of ITMT feature.
|
||
|
*
|
||
|
* It must not be called with cpu hot plug lock
|
||
|
* held as we need to acquire the lock to rebuild sched domains
|
||
|
* later.
|
||
|
*/
|
||
|
void sched_clear_itmt_support(void)
|
||
|
{
|
||
|
mutex_lock(&itmt_update_mutex);
|
||
|
|
||
|
if (!sched_itmt_capable) {
|
||
|
mutex_unlock(&itmt_update_mutex);
|
||
|
return;
|
||
|
}
|
||
|
sched_itmt_capable = false;
|
||
|
|
||
|
if (itmt_sysctl_header) {
|
||
|
unregister_sysctl_table(itmt_sysctl_header);
|
||
|
itmt_sysctl_header = NULL;
|
||
|
}
|
||
|
|
||
|
if (sysctl_sched_itmt_enabled) {
|
||
|
/* disable sched_itmt if we are no longer ITMT capable */
|
||
|
sysctl_sched_itmt_enabled = 0;
|
||
|
x86_topology_update = true;
|
||
|
rebuild_sched_domains();
|
||
|
}
|
||
|
|
||
|
mutex_unlock(&itmt_update_mutex);
|
||
|
}
|
||
|
|
||
|
int arch_asym_cpu_priority(int cpu)
|
||
|
{
|
||
|
return per_cpu(sched_core_priority, cpu);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
|
||
|
* @prio: Priority of cpu core
|
||
|
* @core_cpu: The cpu number associated with the core
|
||
|
*
|
||
|
* The pstate driver will find out the max boost frequency
|
||
|
* and call this function to set a priority proportional
|
||
|
* to the max boost frequency. CPU with higher boost
|
||
|
* frequency will receive higher priority.
|
||
|
*
|
||
|
* No need to rebuild sched domain after updating
|
||
|
* the CPU priorities. The sched domains have no
|
||
|
* dependency on CPU priorities.
|
||
|
*/
|
||
|
void sched_set_itmt_core_prio(int prio, int core_cpu)
|
||
|
{
|
||
|
int cpu, i = 1;
|
||
|
|
||
|
for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
|
||
|
int smt_prio;
|
||
|
|
||
|
/*
|
||
|
* Ensure that the siblings are moved to the end
|
||
|
* of the priority chain and only used when
|
||
|
* all other high priority cpus are out of capacity.
|
||
|
*/
|
||
|
smt_prio = prio * smp_num_siblings / i;
|
||
|
per_cpu(sched_core_priority, cpu) = smt_prio;
|
||
|
i++;
|
||
|
}
|
||
|
}
|