/*	$NetBSD$ */

/*-
 * Copyright (c) 2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jukka Ruohonen.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_cpufreq.c,v 1.15 2011/09/02 22:25:08 Exp $");

#include <sys/param.h>
#include <sys/cpu.h>
#include <sys/cpufreq.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/time.h>
#include <sys/xcall.h>

static int		  cpufreq_latency(void);
static uint16_t		  cpufreq_get_max(void);
static uint16_t		  cpufreq_get_min(void);

static struct cpufreq_if *cpufreq_if __read_mostly = NULL;

int
cpufreq_register(struct cpufreq_if *cif)
{
	size_t count, i, j, k;
	uint16_t m;
	int rv;

	KASSERT(cif != NULL);
	KASSERT(cif->get_freq != NULL);
	KASSERT(cif->set_freq != NULL);
	KASSERT(cif->state_count > 0);
	KASSERT(cif->state_count < CPUFREQ_STATE_MAX);

	mutex_enter(&cpu_lock);

	if (cpufreq_if != NULL) {
		mutex_exit(&cpu_lock);
		return EALREADY;
	}

	mutex_exit(&cpu_lock);
	cpufreq_if = kmem_zalloc(sizeof(*cif), KM_SLEEP);

	if (cpufreq_if == NULL)
		return ENOMEM;

	mutex_enter(&cpu_lock);

	cpufreq_if->cookie = cif->cookie;
	cpufreq_if->get_freq = cif->get_freq;
	cpufreq_if->set_freq = cif->set_freq;

	(void)strlcpy(cpufreq_if->name, cif->name, sizeof(cif->name));

	/*
	 * Sanity check the values and verify descending order.
	 */
	for (count = i = 0; i < cif->state_count; i++) {

		if (cif->state[i].freq == 0 || cif->state[i].freq > 9999)
			continue;

		for (j = k = 0; j < i; j++) {

			if (cif->state[i].freq >= cif->state[j].freq) {
				k = 1;
				break;
			}
		}

		if (k != 0)
			continue;

		count++;
		cpufreq_if->state[i].freq = cif->state[i].freq;
		cpufreq_if->state[i].power = cif->state[i].power;
	}

	cpufreq_if->state_count = count;

	if (cpufreq_if->state_count == 0) {
		mutex_exit(&cpu_lock);
		cpufreq_deregister();
		return ENODEV;
	}

	rv = cpufreq_latency();

	if (rv != 0) {
		mutex_exit(&cpu_lock);
		cpufreq_deregister();
		return rv;
	}

	m = cpufreq_get_max();
	mutex_exit(&cpu_lock);
	cpufreq_set_all(m);

	return 0;
}

void
cpufreq_deregister(void)
{

	mutex_enter(&cpu_lock);

	if (cpufreq_if == NULL) {
		mutex_exit(&cpu_lock);
		return;
	}

	mutex_exit(&cpu_lock);
	kmem_free(cpufreq_if, sizeof(*cpufreq_if));
	cpufreq_if = NULL;
}

static int
cpufreq_latency(void)
{
	struct timespec nta, ntb;
	const size_t n = 20;
	size_t i, j;
	uint64_t s;

	/*
	 * For each state, sample the average transition
	 * latency required to set the state for all CPUs.
	 * Few rounds are required to even the possible
	 * caching done in the backend.
	 */
	for (i = 0; i < cpufreq_if->state_count; i++) {

		for (s = 0, j = 0; j < n; j++) {

			nta.tv_sec = nta.tv_nsec = 0;
			ntb.tv_sec = ntb.tv_nsec = 0;

			nanotime(&nta);
			mutex_exit(&cpu_lock);
			cpufreq_set_all(cpufreq_if->state[i].freq);
			mutex_enter(&cpu_lock);
			nanotime(&ntb);
			timespecsub(&ntb, &nta, &ntb);

			if (ntb.tv_sec != 0 ||
			    ntb.tv_nsec > CPUFREQ_LATENCY_MAX)
				continue;

			if (s >= UINT64_MAX - CPUFREQ_LATENCY_MAX)
				break;

			s += ntb.tv_nsec;
		}

		/*
		 * Consider the backend unsuitable if
		 * the transition latency was too high.
		 */
		if (s == 0)
			return EMSGSIZE;

		cpufreq_if->state[i].latency = s / n;
	}

	return 0;
}

void
cpufreq_suspend(struct cpu_info *ci)
{
	struct cpufreq_if *cif;
	uint16_t l;

	mutex_enter(&cpu_lock);
	cif = cpufreq_if;

	if (__predict_false(cif == NULL)) {
		mutex_exit(&cpu_lock);
		return;
	}

	cif->saved_freq = 0;
	l = cpufreq_get_min();
	mutex_exit(&cpu_lock);

	cpufreq_get(ci, &cif->saved_freq);
	cpufreq_set(ci, l);
}

void
cpufreq_resume(struct cpu_info *ci)
{
	struct cpufreq_if *cif;

	mutex_enter(&cpu_lock);
	cif = cpufreq_if;

	if (__predict_false(cif == NULL)) {
		mutex_exit(&cpu_lock);
		return;
	}

	if (__predict_false(cif->saved_freq == 0)) {
		mutex_exit(&cpu_lock);
		return;
	}

	mutex_exit(&cpu_lock);
	cpufreq_set(ci, cif->saved_freq);
}

void
cpufreq_get(struct cpu_info *ci, uint16_t *freq)
{
	struct cpufreq_if *cif;
	uint64_t xc;

	mutex_enter(&cpu_lock);
	cif = cpufreq_if;

	if (__predict_false(cif == NULL)) {
		mutex_exit(&cpu_lock);
		return;
	}

	xc = xc_unicast(0, (*cif->get_freq), cif->cookie, freq, ci);
	xc_wait(xc);

	mutex_exit(&cpu_lock);
}

static uint16_t
cpufreq_get_max(void)
{
	struct cpufreq_if *cif = cpufreq_if;

	KASSERT(cif != NULL);
	KASSERT(mutex_owned(&cpu_lock) != 0);

	return cif->state[0].freq;
}

static uint16_t
cpufreq_get_min(void)
{
	struct cpufreq_if *cif = cpufreq_if;

	KASSERT(cif != NULL);
	KASSERT(mutex_owned(&cpu_lock) != 0);

	return cif->state[cif->state_count - 1].freq;
}

int
cpufreq_get_if(struct cpufreq_if *cif)
{

	mutex_enter(&cpu_lock);

	if (__predict_false(cpufreq_if == NULL)) {
		mutex_exit(&cpu_lock);
		return ENODEV;
	}

	memcpy(cif, cpufreq_if, sizeof(*cif));
	mutex_exit(&cpu_lock);

	return 0;
}

void
cpufreq_set(struct cpu_info *ci, uint16_t freq)
{
	struct cpufreq_if *cif;
	uint64_t xc;

	mutex_enter(&cpu_lock);
	cif = cpufreq_if;

	if (__predict_false(cif == NULL)) {
		mutex_exit(&cpu_lock);
		return;
	}

	xc = xc_unicast(0, (*cif->set_freq), cif->cookie, &freq, ci);
	xc_wait(xc);

	mutex_exit(&cpu_lock);
}

void
cpufreq_set_all(uint16_t freq)
{
	struct cpufreq_if *cif;
	uint64_t xc;

	mutex_enter(&cpu_lock);
	cif = cpufreq_if;

	if (__predict_false(cif == NULL)) {
		mutex_exit(&cpu_lock);
		return;
	}

	xc = xc_broadcast(0, (*cif->set_freq), cif->cookie, &freq);
	xc_wait(xc);

	mutex_exit(&cpu_lock);
}

/*-
 * Copyright (c) 2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jukka Ruohonen.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#ifndef	_SYS_CPUFREQ_H_
#define	_SYS_CPUFREQ_H_

#ifndef _KERNEL
#include <stdbool.h>
#endif

#ifdef _KERNEL
#ifndef _SYS_XCALL_H_
#include <sys/xcall.h>
#endif
#endif

#define CPUFREQ_NAME_MAX	16
#define CPUFREQ_STATE_MAX	255		/* Maximum number of states */
#define CPUFREQ_LATENCY_MAX	UINT16_MAX	/* Maximum per-CPU latency  */

struct cpufreq_state {
	uint16_t		 freq;		/* MHz  */
	uint16_t		 power;		/* mW   */
	uint32_t		 latency;	/* nsec */
};

struct cpufreq_if {
	char			 name[CPUFREQ_NAME_MAX];
	struct cpufreq_state	 state[CPUFREQ_STATE_MAX];
	uint16_t		 state_count;
	uint16_t		 state_target;
	bool			 mp;

#ifdef _KERNEL
	void			*cookie;
	xcfunc_t		 get_freq;
	xcfunc_t		 set_freq;
	uint16_t		 saved_freq;
#endif	/* _KERNEL */
};

#ifdef _KERNEL
int	cpufreq_register(struct cpufreq_if *);
void	cpufreq_deregister(void);
void	cpufreq_suspend(struct cpu_info *);
void	cpufreq_resume(struct cpu_info *);
void	cpufreq_get(struct cpu_info *, uint16_t *);
int	cpufreq_get_if(struct cpufreq_if *);
void	cpufreq_set(struct cpu_info *, uint16_t);
void	cpufreq_set_all(uint16_t);
#endif	/* _KERNEL */

#endif /* _SYS_CPUFREQ_H_ */

Index: kern_cpu.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_cpu.c,v
retrieving revision 1.51
diff -u -p -r1.51 kern_cpu.c
--- kern_cpu.c	11 Sep 2011 14:54:49 -0000	1.51
+++ kern_cpu.c	26 Sep 2011 15:58:20 -0000
@@ -177,8 +177,9 @@ int
 cpuctl_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
 {
 	CPU_INFO_ITERATOR cii;
-	cpustate_t *cs;
+	struct cpufreq_if cif;
 	struct cpu_info *ci;
+	cpustate_t *cs;
 	int error, i;
 	u_int id;
 
@@ -201,6 +202,19 @@ cpuctl_ioctl(dev_t dev, u_long cmd, void
 		}
 		error = cpu_setintr(ci, cs->cs_intr);
 		error = cpu_setstate(ci, cs->cs_online);
+
+		if (cs->cs_cpufreq.state_target == 0)
+			break;
+
+		mutex_exit(&cpu_lock);
+
+		if (cs->cs_cpufreq.mp != false)
+			cpufreq_set(ci, cs->cs_cpufreq.state_target);
+		else {
+			cpufreq_set_all(cs->cs_cpufreq.state_target);
+		}
+
+		mutex_enter(&cpu_lock);
 		break;
 
 	case IOC_CPU_GETSTATE:
@@ -208,6 +222,7 @@ cpuctl_ioctl(dev_t dev, u_long cmd, void
 			cs = data;
 		id = cs->cs_id;
 		memset(cs, 0, sizeof(*cs));
+		memset(&cif, 0, sizeof(struct cpufreq_if));
 		cs->cs_id = id;
 		if (cs->cs_id >= maxcpus ||
 		    (ci = cpu_lookup(id)) == NULL) {
@@ -227,6 +242,26 @@ cpuctl_ioctl(dev_t dev, u_long cmd, void
 		    (ci->ci_schedstate.spc_lastmod >> 32);
 		cs->cs_intrcnt = cpu_intr_count(ci) + 1;
 		cs->cs_hwid = ci->ci_cpuid;
+
+		mutex_exit(&cpu_lock);
+		error = cpufreq_get_if(&cif);
+		mutex_enter(&cpu_lock);
+
+		if (error != 0) {
+			error = 0;
+			break;
+		}
+
+		cs->cs_cpufreq.mp = cif.mp;
+		cs->cs_cpufreq.state_count = cif.state_count;
+		strlcpy(cs->cs_cpufreq.name, cif.name, sizeof(cif.name));
+
+		for (i = 0; i < cif.state_count; i++) {
+			cs->cs_cpufreq.state[i].freq = cif.state[i].freq;
+			cs->cs_cpufreq.state[i].power = cif.state[i].power;
+			cs->cs_cpufreq.state[i].latency = cif.state[i].latency;
+		}
+
 		break;
 
 	case IOC_CPU_MAPID:
? cpufreq.h
Index: Makefile
===================================================================
RCS file: /cvsroot/src/sys/sys/Makefile,v
retrieving revision 1.137
diff -u -p -r1.137 Makefile
--- Makefile	7 Aug 2011 13:33:02 -0000	1.137
+++ Makefile	26 Sep 2011 15:58:36 -0000
@@ -8,7 +8,7 @@ INCS=	acct.h agpio.h aio.h ansi.h aout_m
 	bitops.h bootblock.h bswap.h buf.h \
 	callback.h callout.h cdefs.h cdefs_aout.h \
 	cdefs_elf.h cdio.h chio.h clockctl.h condvar.h conf.h core.h \
-	cpuio.h ctype_bits.h ctype_inline.h \
+	cpufreq.h cpuio.h ctype_bits.h ctype_inline.h \
 	device.h device_if.h \
 	dir.h dirent.h \
 	disk.h disklabel.h disklabel_acorn.h disklabel_gpt.h disklabel_rdb.h \
Index: cpuio.h
===================================================================
RCS file: /cvsroot/src/sys/sys/cpuio.h,v
retrieving revision 1.5
diff -u -p -r1.5 cpuio.h
--- cpuio.h	11 Sep 2011 14:54:49 -0000	1.5
+++ cpuio.h	26 Sep 2011 15:58:36 -0000
@@ -35,6 +35,7 @@
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/ioccom.h>
+#include <sys/cpufreq.h>
 
 #ifndef _KERNEL
 #include <stdbool.h>
@@ -45,16 +46,17 @@
  * are better returned via autoconf.
  */
 typedef struct cpustate {
-	u_int		cs_id;		/* matching ci_cpuid */
-	bool		cs_online;	/* running unbound LWPs */
-	bool		cs_intr;	/* fielding interrupts */
-	bool		cs_unused[2];	/* reserved */
-	int32_t		cs_lastmod;	/* time of last state change */
-	char		cs_name[16];	/* reserved */
-	int32_t		cs_lastmodhi;	/* time of last state change */
-	uint32_t	cs_intrcnt;	/* count of interrupt handlers + 1 */
-	uint32_t	cs_hwid;	/* hardware id */
-	uint32_t	cs_reserved;	/* reserved */
+	u_int		  cs_id;	/* matching ci_cpuid */
+	bool		  cs_online;	/* running unbound LWPs */
+	bool		  cs_intr;	/* fielding interrupts */
+	bool		  cs_unused[2];	/* reserved */
+	int32_t		  cs_lastmod;	/* time of last state change */
+	char		  cs_name[16];	/* reserved */
+	int32_t		  cs_lastmodhi;	/* time of last state change */
+	uint32_t	  cs_intrcnt;	/* count of interrupt handlers + 1 */
+	uint32_t	  cs_hwid;	/* hardware id */
+	uint32_t	  cs_reserved;	/* reserved */
+	struct cpufreq_if cs_cpufreq;	/* cpufreq(9) */
 } cpustate_t;
 
 #define	IOC_CPU_SETSTATE	_IOW('c', 0, cpustate_t)
Index: files
===================================================================
RCS file: /cvsroot/src/sys/conf/files,v
retrieving revision 1.1027
diff -u -p -r1.1027 files
--- files	19 Sep 2011 08:53:30 -0000	1.1027
+++ files	26 Sep 2011 15:59:31 -0000
@@ -1525,6 +1525,7 @@ file	kern/subr_autoconf.c
 file	kern/subr_blist.c		vmswap
 file	kern/subr_bufq.c
 file	kern/subr_callback.c
+file	kern/subr_cpufreq.c
 file	kern/subr_copy.c
 file	kern/subr_debug.c		debug
 file	kern/subr_device.c