GGAP-RBF + Q-Learning
できるだけ明るいところを渡るようにと言っておいたはずなんですが・・
10秒弱かかってこのざまだよ!
@see http://www.soft.ics.keio.ac.jp/~okuno/docs/rinko1.ppt
@see http://www.topic.ad.jp/sice/papers/229/229-11.pdf
/**
* Copyright uwi ( http://wonderfl.net/user/uwi )
* MIT License ( http://www.opensource.org/licenses/mit-license.php )
* Downloaded from: http://wonderfl.net/c/nk6n
*/
// forked from uwi's Q-learning test
package {
import flash.utils.*;
import flash.display.*;
import flash.text.*;
import flash.geom.*;
// できるだけ明るいところを渡るようにと言っておいたはずなんですが・・
// 10秒弱かかってこのざまだよ!
// @see http://www.soft.ics.keio.ac.jp/~okuno/docs/rinko1.ppt
// @see http://www.topic.ad.jp/sice/papers/229/229-11.pdf
public class FlashTest extends Sprite {
public function FlashTest() {
var bmd : BitmapData = new BitmapData(465, 465, false, 0x000000);
addChild(new Bitmap(bmd));
var tf : TextField = new TextField();
addChild(tf);
tf.textColor = 0xffffff;
tf.height = 465;
// bmd.perlinNoise(465, 465, 6, 0, true, false, 7, true);
var i : int;
for(i = 0;i < 100;i++){
bmd.fillRect(
new Rectangle(Math.random() * 465, Math.random() * 465, Math.random() * 50, Math.random() * 50),
0xffffff
);
}
var ql : QLearner = new QLearner(93 * 93, 3,
function(state : int, action : int) : int {
return state + (action - 1) + 93;
},
function(state : int, t : int) : Number {
var y : int = (state - t * 93) * 5;
// tf.appendText("re" + y + "\n");
if(y < 0 || y > 465)return 0;
return 1.0 - bmd.getPixel(t * 5, y) / 0xffffff;
},
93 / 2,
93,
0.8,
0.8
);
ql.init();
var s : int = getTimer();
var w : Array = [];
for(i = 0;i < 1000;i++){
if(i % 200 == 0)w.push(ql.getOptimizedWay());
if(i % 2000 == 0)tf.appendText("" + ql.getMaxQ() + "\n");
ql.learn();
// tf.appendText("" + ql.learn() + "\n");
}
tf.appendText("" + ql.getMaxQ() + "\n");
var g : int = getTimer();
tf.appendText("" + (g - s) + " ms");
w.push(ql.getOptimizedWay());
for(var j : int = 0;j < w.length;j++){
var state : int = 230;
var c : uint = (j / w.length * 127 + 128) << 16;
for(i = 0;i < 93;i++){
bmd.setPixel(i * 5, state, c);
state += (w[j][i] - 1) * 5;
}
}
}
}
}
class RBFNetwork
{
protected var _neurons : Array;
public function RBFNetwork()
{
_neurons = [];
}
public function d2(a : Array, b : Array) : Number
{
var ret : Number = 0.0;
for(var i : int = 0;i < a.length;i++){
ret += (a[i] - b[i]) * (a[i] - b[i]);
}
return ret;
}
public function f(x : Array) : Number
{
var ret : Number = 0;
for(var j : int = 0;j < _neurons.length;j++){
var n : Object = _neurons[j];
var r2 : Number = d2(n.x, x);
ret += n.w * Math.exp(-r2 / n.sigma);
}
return ret;
}
}
class GGAPRBF extends RBFNetwork
{
private var _delta : Number = 0.01;
private var _epsilon : Number = 0.01;
private var _zeta : Number = 1.0;
private var _eta : Number = 0.2;
public function GGAPRBF()
{
super();
}
public function learn(x : Array, y : Number) : void
{
var minj : int = -1;
var mind : Number = Number.MAX_VALUE;
for(var j : int = 0;j < _neurons.length;j++){
var n : Object = _neurons[j];
var r2 : Number = d2(n.x, x);
if(r2 < _delta)return;
if(r2 < mind){
mind = r2;
minj = j;
}
}
if(Math.abs(y - f(x)) < _epsilon)return;
// append
_neurons.push({
x : x.concat(),
w : y - f(x),
sigma : mind * _zeta
});
if(minj != -1){
var nj : Object = _neurons[minj];
// remove
if(Math.abs(nj.w / f(nj.x)) < _eta){
if(j == _neurons.length - 1){
_neurons.pop();
}else{
_neurons[minj] = _neurons.pop();
}
}
}
}
}
class QLearner
{
private var _reward : Function;
private var _update : Function;
private var _Q : GGAPRBF;
private var _nState : int;
private var _nAction : int;
private var _alpha : Number;
private var _gamma : Number;
private var _iniState : int;
private var _tlim : int;
public function QLearner(nState : int, nAction : int, update : Function, reward : Function, iniState : int, tlim : int, alpha : Number = 0.1, gamma : Number = 0.1)
{
_nState = nState;
_nAction = nAction;
_reward = reward;
_update = update;
_alpha = alpha;
_gamma = gamma;
_iniState = iniState;
_tlim = tlim;
}
public function init() : void
{
_Q = new GGAPRBF();
}
public function learn() : Array
{
var ret : Array = [];
var state : int = _iniState;
for(var t : int = 0;t < _tlim - 1;t++){
// ランダム行動決定
var a : int = Math.random() * _nAction;
var r : Number = _reward(state, t);
var alpha : Number = _alpha / (t + 10);
var nex : int = _update(state, a);
var maxq : Number = 0;
for(var aa : int = 0;aa < _nAction;aa++){
var q : Number = _Q.f(qvec(nex, a));
if(maxq < q)maxq = q;
}
_Q.learn(qvec(state, a), (1 - alpha) * _Q.f(qvec(state, a)) + alpha * (r + _gamma * maxq));
state = nex;
ret.push(state);
}
return ret;
}
private function qvec(state : int, a : int) : Array
{
return [(int(state / 93) * 5) / 232.5 - 1, ((state % 93) * 5) / 232.5 - 1, a - 1];
}
public function getOptimizedWay() : Array
{
var ret : Array = new Array(_tlim);
var state : int = _iniState;
for(var t : int = 0;t < _tlim;t++){
var maxq : Number = -1;
var maxaa : int = -1;
for(var aa : int = 0;aa < _nAction;aa++){
var v : Number = _Q.f(qvec(state, aa));
if(maxq < v){
maxq = v;
maxaa = aa;
}
}
state = _update(state, maxaa);
ret[t] = maxaa;
}
return ret;
}
public function getMaxQ() : Number
{
var ret : Number = 0;
var state : int = _iniState;
for(var t : int = 0;t < _tlim;t++){
var maxq : Number = -1;
var maxaa : Number = -1;
for(var aa : int = 0;aa < _nAction;aa++){
var v : Number = _Q.f(qvec(state, aa));
if(maxq < v){
maxq = v;
maxaa = aa;
}
}
state = _update(state, maxaa);
ret += Math.pow(_gamma, t) * maxq;
}
return ret;
}
}