//  Copyright (c) 2019 National Institute of Advanced Industrial Science and Technology (AIST), All Rights Reserved.
//  Author: Yuuji Ichisugi
/*

wKŐ_K̉llfvOBvg^CvB

 */

package tmm1;

import java.awt.Panel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.concurrent.ForkJoinTask;
import java.util.stream.Collectors;

import static tmm1.TMM2v4.Action.*;

import lab.Lab;
import lab.Lab.LabCode;

public class TMM2v4 {
    public static void main(String[] args) {
        Lab.addSelectableClass(TMM2v4.class);
        System.out.println(Lab.selectableClasses + "");

        LabCode labCode = new LabCode();
        labCode.main(TMM2Main1.class);
    }
    public static enum Action {
        Call,
        Return,
        Set,
        Fail;
    }
    // Symbols
    public static final Object V = "V".intern();
    public static final Object T = "T".intern();
    public static final Object F = "F".intern();
    public static final Object A = "A".intern();
    public static final Object B = "B".intern();
    public static final Object C = "C".intern();
    public static final Object D = "D".intern();
    // Abstract syntax node for DSL
    public static class StateN {
        public List<Object> elems;
        public StateN(List<Object> vars) { this.elems = vars; }
        public String toString(){
            StringBuffer buf = new StringBuffer();
            buf.append("s(");
            elems.forEach(obj -> {
                buf.append(obj.toString());
                buf.append(", ");
            });
            buf.append(")");
            return buf.toString();
        }
    }
    public static class CallN {
        public StateN m;
        public CallN(StateN m){ this.m = m; }
        public String toString() {
            return "call("+ m+ ")";
        }
    }
    public static class SetN {
        public StateN m;
        public SetN(StateN m){ this.m = m; }
        public String toString() {
            return "set("+ m+ ")";
        }
    }
    public static class RuleN {
        public StateN s, g;
        public ActionN a;
        public RuleN(StateN s, StateN g, ActionN a){ 
            this.s = s; this.g = g; this.a = a;
        }
        public String toString(){
            return "rule("+ s+ ", "+ g+ ", "+ a+ ")";
        }
    }
    public static class ActionN {
        public Action a;
        public StateN m;
        public ActionN(Action a, StateN m){ 
            this.a = a; this.m = m;
        }
        public String toString(){
            if (m == null){
                return a.toString();
            } else {
                return a.toString()+ "("+ m+ ")";
            }
        }
    }
    /**
     * pattern variable 
     */
    public static class VariableN {
        public String name;
        public VariableN(String name){ this.name = name; }
    }
    public static abstract class RuleCode extends Lab.Code {
        VariableN a0 = new VariableN("a0");
        VariableN a1 = new VariableN("a1");
        VariableN a2 = new VariableN("a2");
        VariableN b0 = new VariableN("b0");
        VariableN b1 = new VariableN("b1");
        VariableN b2 = new VariableN("b2");
        VariableN c0 = new VariableN("c0");
        VariableN c1 = new VariableN("c1");
        VariableN c2 = new VariableN("c2");
        VariableN x = new VariableN("x");
        VariableN y = new VariableN("y");
        public static final String __ = Rule.WILDCARD; // Two underscores.
        final Object O = "O".intern(); // Capital "o"
        public StateN s(Object a0, Object a1, Object a2,
                Object b0, Object b1, Object b2,
                Object c0, Object c1, Object c2
                ){
           Object[] args = {a0,a1,a2,b0,b1,b2,c0,c1,c2};
           return new StateN(Arrays.asList(args));
        }
        public CallN call(Object a0, Object a1, Object a2,
                Object b0, Object b1, Object b2,
                Object c0, Object c1, Object c2
                ){
           Object[] args = {a0,a1,a2,b0,b1,b2,c0,c1,c2};
           return new CallN(new StateN(Arrays.asList(args)));
        }
        public SetN set(Object a0, Object a1, Object a2,
                Object b0, Object b1, Object b2,
                Object c0, Object c1, Object c2
                ){
           Object[] args = {a0,a1,a2,b0,b1,b2,c0,c1,c2};
           return new SetN(new StateN(Arrays.asList(args)));
        }
        List<RuleN> ruleList;
        public void q(StateN s, StateN g, Action a){
            ruleList.add(new RuleN(s, g, new ActionN(a, null)));
        }
        public void q(StateN s, StateN g, Action a, StateN m){
            ruleList.add(new RuleN(s, g, new ActionN(a, m)));
        }
        public void q(StateN s, StateN g, CallN c){
            ruleList.add(new RuleN(s, g, new ActionN(Action.Call, c.m)));
        }
        public void q(StateN s, StateN g, SetN c){
            ruleList.add(new RuleN(s, g, new ActionN(Action.Set, c.m)));
        }
        // a0(a1,a2).
        public void axiom(Object a0, Object a1, Object a2){
            q(s(__,__,__, __,__,__, __,__,__), s(O,O,O, O,O,O, a0,a1,a2), 
                    set(O,O,O, O,O,O, a0,a1,a2));
        }
        // c0(c1,c2) :- a0(a1,a2), b0(b2,b3).
        public void inferenceRule2(Object c0, Object c1, Object c2,
                Object a0, Object a1, Object a2,
                Object b0, Object b1, Object b2
                ){
            {
                StateN g = s(O,O,O, O,O,O, c0,c1,c2);
                q(s(__,__,__, __,__,__, __,__,__), g, call(a0,a1,a2, O,O,O, O,O,O));
                q(s(a0,a1,a2, O,O,O, O,O,O), g, call(a0,a1,a2, b0,b1,b2, O,O,O));
                q(s(a0,a1,a2, b0,b1,b2, O,O,O), g, set(O,O,O, O,O,O, c0,c1,c2));
            }
            {
                StateN g = s(a0,a1,a2, O,O,O, O,O,O);
                q(s(__,__,__, __,__,__, __,__,__), g, call(O,O,O, O,O,O, a0,a1,a2));
                q(s(O,O,O, O,O,O, a0,a1,a2), g, set(a0,a1,a2, O,O,O, O,O,O));
            }
            {
                StateN g = s(a0,a1,a2, b0,b1,b2, O,O,O);
                q(s(a0,a1,a2, O,O,O, O,O,O), g, call(O,O,O, O,O,O, b0,b1,b2));
                q(s(O,O,O, O,O,O, b0,b1,b2), g, set(a0,a1,a2, b0,b1,b2, O,O,O));
            }
        }
        // c0(c1,c2) :- a0(a1,a2).
        public void inferenceRule1(Object c0, Object c1, Object c2,
                Object a0, Object a1, Object a2
                ){
            // Ԉ field ϐ b0,b1,b2 gȂ悤ɒӁB 
            {
                StateN g = s(O,O,O, O,O,O, c0,c1,c2);
                q(s(__,__,__, __,__,__, __,__,__), g, call(O,O,O, O,O,O, a0,a1,a2));
                q(s(O,O,O, O,O,O, a0,a1,a2), g, set(O,O,O, O,O,O, c0,c1,c2));
            }
        }
        // u_KvlpɓB
        public void q1set(
                Object a0, Object a1, Object a2,
                Object b0, Object b1, Object b2,
                Object c0, Object c1, Object c2
                ){
            q(s(O,O,O, O,O,O, a0,a1,a2),
                    s(O,O,O, O,O,O, b0,b1,b2), 
                    set(O,O,O, O,O,O, c0,c1,c2));
        }
        public void q1call(
                Object a0, Object a1, Object a2,
                Object b0, Object b1, Object b2,
                Object c0, Object c1, Object c2
                ){
            q(s(O,O,O, O,O,O, a0,a1,a2),
                    s(O,O,O, O,O,O, b0,b1,b2), 
                    call(O,O,O, O,O,O, c0,c1,c2));
        }
        public void a1(
                Object a0, Object a1, Object a2
                ){
            q1set(__,__,__, a0,a1,a2, a0,a1,a2);
        }

        public abstract List<RuleN> makeRules();
    }
    //--------------------------------------------------
    //--------------------------------------------------
    // Environment table
    // S̕ϐ̉\Ȓl̑gݍ킹錾B
    public static abstract class EnvCode extends Lab.Code {
        public abstract Object[] getVarTable();
        public abstract Object[][] getValTable();
        public abstract void initEpisode(TMM2Main1.World world, TMM2Main1.Agent agent);
        public abstract RuleCode getRuleCode();
    }
    public static class Env1 extends EnvCode {
        public Object[] getVarTable() {
            return new Object[] { A, B, C, D, }; 
        }
        public Object[][] getValTable() {
            Object[][] ret = {
                    // A, B, C, D
                    { T, T, F, T, },
                    { T, F, F, F, },
                    { F, T, T, F, },
                    { F, F, F, T, },
            };
            return ret;
        }
        public void initEpisode(TMM2Main1.World world, TMM2Main1.Agent agent){
            // \ȕϐ̒l̑ĝP_ɑIԁB
            world.currentEnv = world.envValTable[Lab.irand(world.envValTable.length)];
            // eϐ̉_ɐݒB
            for (int i = 0; i < world.isVisibleVar.length; i++) {
                world.isVisibleVar[i] = (Lab.irand(2) == 0); // true : false = 50% : 50%
            }
            // _ׂϐ߂BS[ɐݒ肳B
            Object targetVar = world.envVarTable[Lab.irand(world.envVarTable.length)];

            //  s ƃS[ g ̏lG[WFgɐݒB
            final Object O = "O".intern();
            State start = new State(new Object[] {O,O,O, O,O,O, O,O,O});  
            //goal = new State(new Object[] {O,O,O, O,O,O, "Goal".intern(),O,O});
            State goal = new State(new Object[] {O,O,O, O,O,O, V,targetVar,Rule.PHI});
            agent.setStartAndGoal(start, goal);
        }
        public RuleCode makeRuleCode = panel.getCode("makeRuleCode", 
                RuleTest1.class);
        public RuleCode getRuleCode() { return makeRuleCode; }
        
        public static class RuleTest1 extends RuleCode {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), s(__,__,__, __,__,__, __,__,__),
                        Fail);
                // Kɏ_KB
                a1(V,A,T);
                a1(V,B,T);
                a1(V,B,F);
                a1(V,C,T);
                a1(V,C,F);
                q1call(__,__,__, V,A,F, V,B,T);
                q1call(__,__,__, V,A,T, V,C,F);
                q1call(__,__,__, V,A,F, V,D,T);
                q1call(__,__,__, __,__,__, V,C,F);
                q1set(V,C,F, V,A,T, V,A,T);
                q1set(V,C,F, V,B,T, V,B,T);
                q1set(V,C,F, V,D,T, V,D,T);
                q1set(V,D,T, V,A,F, V,A,F);
                q1set(V,D,T, V,B,F, V,B,F);
                q1set(V,D,T, V,C,F, V,C,F);
                return ruleList;
            }
        }
    }
    public static class Env6not extends EnvCode {
        public Object[] getVarTable() {
            return new Object[] { A, B, }; 
        }
        public Object[][] getValTable() {
            Object[][] ret = {
                    // A, B
                    { T, F, },
                    { F, T, },
            };
            return ret;
        }
        public void initEpisode(TMM2Main1.World world, TMM2Main1.Agent agent){
            // \ȕϐ̒l̑ĝP_ɑIԁB
            world.currentEnv = world.envValTable[Lab.irand(world.envValTable.length)];
            world.isVisibleVar[0] = true;
            world.isVisibleVar[1] = Lab.rand() < panel.getFloat("Visible rate", 0.5f, 0, 1);
            //world.isVisibleVar[1] = Lab.irand(2) == 0; // true : false = 50% : 50%
            if (panel.flag("Always var B is invisible.", false)) {
                world.isVisibleVar[1] = false;
            }
            Object targetVar = world.envVarTable[1]; // B

            //  s ƃS[ g ̏lG[WFgɐݒB
            final Object O = "O".intern();
            State start = new State(new Object[] {O,O,O, O,O,O, O,O,O});  
            //goal = new State(new Object[] {O,O,O, O,O,O, "Goal".intern(),O,O});
            State goal = new State(new Object[] {O,O,O, O,O,O, V,targetVar,Rule.PHI});
            agent.setStartAndGoal(start, goal);
        }
        public static abstract class  RuleCode2 extends RuleCode {}
        public RuleCode makeRuleCode = panel.getCode("makeRuleCode", 
                RuleCode2.class);
        public RuleCode getRuleCode() { return makeRuleCode; }

        public static class Rule1 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), s(__,__,__, __,__,__, __,__,__),
                        Fail);
                a1(V,A,T);
                a1(V,A,F);
                a1(V,B,T);
                a1(V,B,F);
                q1call(__,__,__, V,A,T, V,B,F);
                q1set(V,B,F, V,A,T, V,A,T);
                q1call(__,__,__, V,A,F, V,B,T);
                q1set(V,B,T, V,A,F, V,A,F);
                q1call(__,__,__, V,B,T, V,A,F);
                q1set(V,A,F, V,B,T, V,B,T);
                q1call(__,__,__, V,B,F, V,A,T);
                q1set(V,A,T, V,B,F, V,B,F);
                // bad rules
                q1call(__,__,__, V,A,F, V,B,F);
                q1set(V,B,F, V,A,F, V,A,F);
                q1call(__,__,__, V,A,T, V,B,T);
                q1set(V,B,T, V,A,T, V,A,T);
                q1call(__,__,__, V,B,F, V,A,F);
                q1set(V,A,F, V,B,F, V,B,F);
                q1call(__,__,__, V,B,T, V,A,T);
                q1set(V,A,T, V,B,T, V,B,T);
                return ruleList;
            }
        }
        // 𗦂Pɂ͂Ȃ邪AӐ}ʂɂ͓ĂȂB
        public static class Rule2 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), s(__,__,__, __,__,__, __,__,__),
                        Fail);
                q1set(__,__,__, V,A,x, V,A,x);
                // A ͉ϐ
                q1call(__,__,__, V,B,__, V,A,__);
                q1set(V,A,F, V,B,T, V,B,T);
                q1set(V,A,T, V,B,F, V,B,F);
                // bad rules
                q1call(__,__,__, V,B,F, V,A,F);
                q1set(V,A,F, V,B,F, V,B,F);
                q1call(__,__,__, V,B,T, V,A,T);
                q1set(V,A,T, V,B,T, V,B,T);
                return ruleList;
            }
        }
        public static class Rule3 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), s(__,__,__, __,__,__, __,__,__),
                        Fail);
                // A ͉ϐ
                q1set(__,__,__, V,B,T, V,A,F);
                q1set(V,A,F, V,B,T, V,B,T);
                q1set(__,__,__, V,B,F, V,A,T);
                q1set(V,A,T, V,B,F, V,B,F);
                // bad rules
                q1set(__,__,__, V,B,F, V,A,F); // not bad !
                q1set(V,A,F, V,B,F, V,B,F);
                q1set(__,__,__, V,B,T, V,A,T); // not bad !
                q1set(V,A,T, V,B,T, V,B,T);
                return ruleList;
            }
        }
        public static class Rule4 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q1set(__,__,__, V,B,__, V,A,__);
                q1set(V,A,F, V,B,T, V,B,T);
                q1set(V,A,T, V,B,F, V,B,F);
                // bad rules
                q1set(V,A,F, V,B,F, V,B,F);
                q1set(V,A,T, V,B,T, V,B,T);
                return ruleList;
            }
        }
        public static class Rule5 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q1set(__,__,__, V,B,__, V,A,__);
//                q1set(V,A,F, V,B,T, V,B,T);
//                q1set(V,A,T, V,B,F, V,B,F);
//                // bad rules
//                q1set(V,A,F, V,B,F, V,B,F);
//                q1set(V,A,T, V,B,T, V,B,T);
                q1set(V,A,F, V,B,__, V,B,T);
                q1set(V,A,T, V,B,__, V,B,F);
                // bad rules
                q1set(V,A,F, V,B,__, V,B,F);
                q1set(V,A,T, V,B,__, V,B,T);
                // Ăۂ̎gppx͐ݒɂĂ͂OɂȂȂB
                q1set(__,__,__, V,B,__, V,B,F);
                q1set(__,__,__, V,B,__, V,B,T);
                // ̊wK@ƃyieB̂Ȃ̃[łlȂB
                //q1set(__,__,__, V,B,__, V,B,__);
                return ruleList;
            }
        }
    }
    public static class Env7xor2 extends EnvCode {
        public static final Object FF = "FF".intern();
        public static final Object FT = "FT".intern();
        public static final Object TF = "TF".intern();
        public static final Object TT = "TT".intern();
        
        public Object[] getVarTable() {
            return new Object[] { A, B, C, D}; 
        }
        public Object[][] getValTable() {
            Object[][] ret = {
                    // A, B, C, D
                    { F, F, FF, F, },
                    { F, T, FT, T, },
                    { T, F, TF, T, },
                    { T, T, TT, F, },
            };
            return ret;
        }
        public void initEpisode(TMM2Main1.World world, TMM2Main1.Agent agent){
            // \ȕϐ̒l̑ĝP_ɑIԁB
            world.currentEnv = world.envValTable[Lab.irand(world.envValTable.length)];
            world.isVisibleVar[0] = true;
            world.isVisibleVar[1] = true;
            //world.isVisibleVar[2] = false;
            world.isVisibleVar[2] = Lab.irand(2) == 0; // true : false = 50% : 50%
            world.isVisibleVar[3] = Lab.irand(2) == 0; // true : false = 50% : 50%
            Object targetVar = world.envVarTable[3]; // D

            //  s ƃS[ g ̏lG[WFgɐݒB
            final Object O = "O".intern();
            State start = new State(new Object[] {O,O,O, O,O,O, O,O,O});  
            //goal = new State(new Object[] {O,O,O, O,O,O, "Goal".intern(),O,O});
            State goal = new State(new Object[] {O,O,O, O,O,O, V,targetVar,Rule.PHI});
            agent.setStartAndGoal(start, goal);
        }
        public static abstract class  RuleCode2 extends RuleCode {}
        public RuleCode makeRuleCode = panel.getCode("makeRuleCode", 
                RuleCode2.class);
        public RuleCode getRuleCode() { return makeRuleCode; }

        public static class Rule1 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();

                // ܂lȁB
//                // Other solutions
//                q1set(__,__,__, V,C,FF, V,B,F);
//                q1set(V,B,F, V,C,FF, V,A,F);
//                q1set(V,A,F, V,C,FF, V,C,FF);
                
                // F,F -> F
                q1call(__,__,__, V,D,__, V,C,FF);
                q1set(__,__,__, V,C,FF, V,A,F);
                q1set(V,A,F, V,C,FF, V,B,F);
                q1set(V,B,F, V,C,FF, V,C,FF);
                q1set(V,C,FF, V,D,__, V,D,F);

                // F,T -> T
                q1call(__,__,__, V,D,__, V,C,FT);
                q1set(__,__,__, V,C,FT, V,A,F);
                q1set(V,A,F, V,C,FT, V,B,T);
                q1set(V,B,T, V,C,FT, V,C,FT);
                q1set(V,C,FT, V,D,__, V,D,T);
                
                // T,F -> T
                q1call(__,__,__, V,D,__, V,C,TF);
                q1set(__,__,__, V,C,TF, V,A,T);
                q1set(V,A,T, V,C,TF, V,B,F);
                q1set(V,B,F, V,C,TF, V,C,TF);
                q1set(V,C,TF, V,D,__, V,D,T);
                
                // T,T -> F
                q1call(__,__,__, V,D,__, V,C,TT);
                q1set(__,__,__, V,C,TT, V,A,T);
                q1set(V,A,T, V,C,TT, V,B,T);
                q1set(V,B,T, V,C,TT, V,C,TT);
                q1set(V,C,TT, V,D,__, V,D,F);

                return ruleList;
            }
        }
    }
    public static class Env8xor3 extends EnvCode {
        
        public Object[] getVarTable() {
            return new Object[] { A, B, C }; 
        }
        public Object[][] getValTable() {
            Object[][] ret = {
                    // A, B, C
                    { F, F, F, },
                    { F, T, T, },
                    { T, F, T, },
                    { T, T, F, },
            };
            return ret;
        }
        public void initEpisode(TMM2Main1.World world, TMM2Main1.Agent agent){
            // \ȕϐ̒l̑ĝP_ɑIԁB
            world.currentEnv = world.envValTable[Lab.irand(world.envValTable.length)];
            world.isVisibleVar[0] = true;
            world.isVisibleVar[1] = true;
            world.isVisibleVar[2] = Lab.rand() < panel.getFloat("Visible rate", 0.5f, 0, 1);
            Object targetVar = world.envVarTable[2]; // C

            //  s ƃS[ g ̏lG[WFgɐݒB
            final Object O = "O".intern();
            State start = new State(new Object[] {O,O,O, O,O,O, O,O,O});  
            //goal = new State(new Object[] {O,O,O, O,O,O, "Goal".intern(),O,O});
            State goal = new State(new Object[] {O,O,O, O,O,O, V,targetVar,Rule.PHI});
            agent.setStartAndGoal(start, goal);
        }
        public static abstract class  RuleCode2 extends RuleCode {}
        public RuleCode makeRuleCode = panel.getCode("makeRuleCode", 
                RuleCode2.class);
        public RuleCode getRuleCode() { return makeRuleCode; }

        // C = A XOR B vZB bad rule ȂB
        public static class Rule1 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), 
                        s(O,O,O, O,O,O,  V,A,__), 
                        set(O,O,O, O,O,O, V,A,__));
                q(s(__,__,__, __,__,__, __,__,__), 
                        s(O,O,O, O,O,O, V,B,__), 
                        set(O,O,O, O,O,O, V,B,__));

                { 
                    StateN g = s(O,O,O, O,O,O, V,C,__);
                    q(s(__,__,__, __,__,__, __,__,__), g, 
                            call(O,O,O, O,O,O, V,A,__));
                    q(s(O,O,O, O,O,O,  V,A,x), g, 
                            call(O,O,O, V,A,x, V,B,__));
                    q(s(O,O,O, V,A,F, V,B,F), g, 
                            set(O,O,O, O,O,O, V,C,F));
                    q(s(O,O,O, V,A,F, V,B,T), g, 
                            set(O,O,O, O,O,O, V,C,T));
                    q(s(O,O,O, V,A,T, V,B,F), g, 
                            set(O,O,O, O,O,O, V,C,T));
                    q(s(O,O,O, V,A,T, V,B,T), g, 
                            set(O,O,O, O,O,O, V,C,F));
                    // bad rule
//                    q(s(O,O,O, V,A,T, V,B,T), g, 
//                            set(O,O,O, O,O,O, V,C,T));
                }
                { 
                    StateN g = s(O,O,O, V,A,x, V,B,__);
                    q(s(O,O,O, O,O,O,  V,A,x), g, 
                            call(O,O,O, O,O,O,  V,B,__));
                    q(s(O,O,O, O,O,O,  V,B,y), g, 
                            set(O,O,O, V,A,x, V,B,y));
            }

                return ruleList;
            }
        }
        public static class Rule4 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), 
                        s(O,O,O, O,O,O,  V,A,__), 
                        set(O,O,O, O,O,O, V,A,__));
                q(s(__,__,__, __,__,__, __,__,__), 
                        s(O,O,O, O,O,O, V,B,__), 
                        set(O,O,O, O,O,O, V,B,__));

                { 
                    StateN g = s(O,O,O, O,O,O, V,C,__);
                    q(s(__,__,__, __,__,__, __,__,__), g, 
                            call(O,O,O, O,O,O, V,A,__));
                    q(s(O,O,O, O,O,O,  V,A,x), g, 
                            call(O,O,O, V,A,x, V,B,__));
                    q(s(O,O,O, V,A,F, V,B,F), g, 
                            set(O,O,O, O,O,O, V,C,F));
                    q(s(O,O,O, V,A,F, V,B,T), g, 
                            set(O,O,O, O,O,O, V,C,T));
                    q(s(O,O,O, V,A,T, V,B,F), g, 
                            set(O,O,O, O,O,O, V,C,T));
                    q(s(O,O,O, V,A,T, V,B,T), g, 
                            set(O,O,O, O,O,O, V,C,F));
                    // bad rule
                    q(s(O,O,O, V,A,T, V,B,T), g, 
                            set(O,O,O, O,O,O, V,C,T));
                    q(s(O,O,O, O,O,O,  V,A,x), g, 
                            set(O,O,O, O,O,O, V,C,F)); // ܂艿lȂB
                    q(s(O,O,O, O,O,O,  V,A,x), g, 
                            call(O,O,O, O,O,O, V,B,__));
                    q(s(O,O,O, O,O,O,  V,B,x), g, 
                            call(O,O,O, O,O,O, V,C,x));
                }
                { 
                    StateN g = s(O,O,O, V,A,x, V,B,__);
                    q(s(O,O,O, O,O,O,  V,A,x), g, 
                            call(O,O,O, O,O,O,  V,B,__));
                    q(s(O,O,O, O,O,O,  V,B,y), g, 
                            set(O,O,O, V,A,x, V,B,y));
            }

                return ruleList;
            }
        }
        // A  B ̂ǂɌvZ邩B 𗦂オȂB
        public static class Rule3 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), 
                        s(O,O,O, O,O,O,  V,A,__), 
                        set(O,O,O, O,O,O, V,A,__));
                q(s(__,__,__, __,__,__, __,__,__), 
                        s(O,O,O, O,O,O, V,B,__), 
                        set(O,O,O, O,O,O, V,B,__));

                { 
                    StateN g = s(O,O,O, O,O,O, V,C,__);
                    // A 
                    q(s(__,__,__, __,__,__, __,__,__), g, 
                            call(O,O,O, O,O,O, V,A,__));
                    q(s(O,O,O, O,O,O,  V,A,x), g, 
                            call(O,O,O, V,A,x, V,B,__));
                    // B 
                    q(s(__,__,__, __,__,__, __,__,__), g, 
                            call(O,O,O, O,O,O, V,B,__));
                    q(s(O,O,O, O,O,O,  V,B,x), g, 
                            call(O,O,O, V,A,__, V,B,x));

                    q(s(O,O,O, V,A,F, V,B,F), g, 
                            set(O,O,O, O,O,O, V,C,F));
                    q(s(O,O,O, V,A,F, V,B,T), g, 
                            set(O,O,O, O,O,O, V,C,T));
                    q(s(O,O,O, V,A,T, V,B,F), g, 
                            set(O,O,O, O,O,O, V,C,T));
                    q(s(O,O,O, V,A,T, V,B,T), g, 
                            set(O,O,O, O,O,O, V,C,F));
                    // bad rule
//                    q(s(O,O,O, V,A,T, V,B,T), g, 
//                            set(O,O,O, O,O,O, V,C,T));
                }
                { // A  
                    StateN g = s(O,O,O, V,A,x, V,B,__);
                    q(s(O,O,O, O,O,O,  V,A,x), g, 
                            call(O,O,O, O,O,O,  V,B,__));
                    q(s(O,O,O, O,O,O,  V,B,y), g, 
                            set(O,O,O, V,A,x, V,B,y));
                }
                { // B  
                    StateN g = s(O,O,O, V,A,__, V,B,x);
                    q(s(O,O,O, O,O,O,  V,B,x), g, 
                            call(O,O,O, O,O,O,  V,A,__));
                    q(s(O,O,O, O,O,O,  V,A,y), g, 
                            set(O,O,O, V,A,y, V,B,x));
                }

                return ruleList;
            }
        }
    }
    // fobÔ߂̋ɗ͒PB
    public static class Env5 extends EnvCode {
        public static final Object V1 = "V1".intern();
        public static final Object V2 = "V2".intern();
        public static final Object V3 = "V3".intern();
        public static final Object V4 = "V4".intern();
        
        public Object[] getVarTable() {
            return new Object[] { A, B, C, D}; 
        }
        public Object[][] getValTable() {
            Object[][] ret = {
                    // A, B, C, 
                    { V1, V2, V3, V4 },
            };
            return ret;
        }
        public void initEpisode(TMM2Main1.World world, TMM2Main1.Agent agent){
            // \ȕϐ̒l̑ĝP_ɑIԁB
            world.currentEnv = world.envValTable[Lab.irand(world.envValTable.length)];
            world.isVisibleVar[0] = true;
            world.isVisibleVar[1] = true;
            world.isVisibleVar[2] = true;
            //world.isVisibleVar[3] = Lab.irand(2) == 0; // true : false = 50% : 50%
            world.isVisibleVar[3] = true; // ܂͑SϐƂB
            Object targetVar = world.envVarTable[3]; // C

            //  s ƃS[ g ̏lG[WFgɐݒB
            final Object O = "O".intern();
            State start = new State(new Object[] {O,O,O, O,O,O, O,O,O});  
            //goal = new State(new Object[] {O,O,O, O,O,O, "Goal".intern(),O,O});
            State goal = new State(new Object[] {O,O,O, O,O,O, V,targetVar,Rule.PHI});
            agent.setStartAndGoal(start, goal);
        }
        public static abstract class  RuleCode2 extends RuleCode {}
        public RuleCode makeRuleCode = panel.getCode("makeRuleCode", 
                RuleCode2.class);
        public RuleCode getRuleCode() { return makeRuleCode; }

        // BԈ[̉l͉B
        public static class Rule1 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), s(__,__,__, __,__,__, __,__,__),
                        Fail);
                q1call(__,__,__, V,D,V4, V,C,V3);
                q1call(__,__,__, V,D,V3, V,C,V2); // bad rule
                q1call(__,__,__, V,C,V3, V,B,V2);
                q1call(__,__,__, V,B,V2, V,A,V1);
                q1set(__,__,__, V,A,V1, V,A,V1);
                q1set(V,A,V1, V,B,V2, V,B,V2);
                q1set(V,B,V2, V,C,V3, V,C,V3);
                q1set(V,C,V3, V,D,V4, V,D,V4);
                
                return ruleList;
            }
        }
        // lwKB
        public static class Rule2 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), s(__,__,__, __,__,__, __,__,__),
                        Fail);
                q1set(__,__,__, V,D,V4, V,D,V4);
                q1set(__,__,__, V,D,V4, V,D,V3); // bad rule
                
                return ruleList;
            }
        }
        public static class Rule3 extends RuleCode2 {
            public List<RuleN> makeRules(){
                ruleList = new ArrayList<>();
                q(s(__,__,__, __,__,__, __,__,__), s(__,__,__, __,__,__, __,__,__),
                        Fail);
                q1call(__,__,__, V,D,V4, V,C,V3);
                q1set(__,__,__, V,C,V3, V,C,V1); // bad rule
                q1set(__,__,__, V,C,V3, V,C,V3);
                q1set(V,C,V3, V,D,V4, V,D,V4);
                
                return ruleList;
            }
        }
    }

    //--------------------------------------------------
    /**
     * Q(s,g,a) 𒊏ۉ[B
     * lxNgƃp^[}b`đIB 
     * Usage:
     *   r = new Rule(ruleN);
     *   boolean matched = r.match(vals);
     *   if (matched){
     *      // Access to the last matching results.
     *      Action a = r.getAction();
     *      State s = new State(r.getActionParam());
     *   }
     */
    public static class Rule {
        /**
         * Q value of this rule.
         */
        public float q;
        /**
         * Counter for demo.
         */
        public int useCounter = 0;
        /**
         * Number of variables appeared in this Rule.
         */
        public int numVars;
        // \ȃIuWFNgB̃IuWFNgƐ΂ɏdȂȂIuWFNgB
        public static final Object UNBOUND = new Object[]{"UNBOUND"};
        // p^[}b`IɖϐɑftHglӁB
        public static final Object PHI = "PHI".intern();
        // ChJ[h
        public static final String WILDCARD = "__".intern(); // Two underscores.
        public Object[] env;
        public Object[] patternVec; // Concatenated pattern of s and g.
        public Action action;
        public Object[] actionPatternVec;  // Pattern of m of action C_m.
        public int idCounter = 0;
        public Map<VariableN,PatternVariable> vmap = new HashMap<>(); 
        public Rule(RuleN ruleN){
            // ruleN ƂɃp^[\zB
            List<Object> elems = transStateN(ruleN.s);
            elems.addAll(transStateN(ruleN.g));
            numVars = vmap.size() + 
                    (int)elems.stream()
                    .filter(e -> e == WILDCARD)
                    .count();
            patternVec = elems.toArray();
            action = ruleN.a.a;
            if (action == Action.Call || action == Action.Set){
                actionPatternVec = transStateN(ruleN.a.m).toArray();
            }
            env = new Object[vmap.size()];
            vmap = null;
        }
        public Rule(){
           // Implicitly called from ReturnRule().
        }
        // ϐɓ id  PatternVariable 蓖ĂB
        public List<Object> transStateN(StateN s){
            List<Object> ret = new ArrayList<>();
            s.elems.forEach(e -> {
                if (e instanceof String) {
                    e = ((String)e).intern();
                }

                Object re;
                if (e == WILDCARD){
                    re = e;
                } else if (e instanceof VariableN){
                    if (vmap.containsKey(e)){
                        re = vmap.get(e);
                    } else {
                        re = new PatternVariable(((VariableN)e).name,
                                idCounter++);
                        vmap.put((VariableN)e, (PatternVariable)re);
                    }
                } else if (e instanceof Integer){
                    int i = (Integer)e;
                    // Accepts only small integers that can be compared with == operator.
                    Lab.assertTrue( -128 <= i && i <= 127); 
                    re = e;
                } else {
                    re = e;
                }
                ret.add(re);
            });
            return ret;
        }
        public boolean match(Object[] vals){
            Lab.assertTrue(vals.length == patternVec.length);
            for (int i = 0; i < env.length; i++) {
                env[i] = UNBOUND;
            }
            for (int i = 0; i < vals.length; i++) {
                //System.out.println(i+ ":"+ patternVec[i]+ ","+ vals[i]);
                if (patternVec[i] != WILDCARD){
                    Object pval;
                    if (patternVec[i] instanceof PatternVariable){
                        int id = ((PatternVariable)patternVec[i]).id;
                        if (env[id] == UNBOUND){
                            pval = env[id] = vals[i];
                        } else {
                            pval = env[id];
                        }
                        //System.out.println("i="+ i+ ", pval="+ pval+ ", vals[i]="+ 
                        //   vals[i]+ ", env["+ id+ "]="+ env[id]);
                        //System.out.println(vals[i]+" == "+pval+":"+(vals[i]==pval));
                    } else {
                        pval = patternVec[i];
                    }
                    if (vals[i] != PHI && vals[i] != pval) return false;
                    //if (vals[i] != pval) return false;
                }
                //System.out.println("i="+ i+ ", vals[i]="+ vals[i]);
                //for (int j = 0; j < env.length; j++) {
                //    System.out.println("env["+ j+ "]="+ env[j]);
                //}
            }
            //System.out.println("*** match ****");
            return true;
        }
        public Action getAction(){
            return action;
        }
        public Object[] getActionParam(){
            Lab.assertTrue(action == Action.Call || action == Action.Set);
            Object[] ret = actionPatternVec.clone();
//            System.out.println("before:this="+ this);
//            for (int i = 0; i < ret.length; i++) {
//                System.out.println("ret["+ i+ "]="+ ret[i]);
//            }
//            for (int i = 0; i < env.length; i++) {
//                System.out.println("env["+ i+ "]="+ env[i]);
//            }
            for (int i = 0; i < ret.length; i++) {
                if (ret[i] == WILDCARD){
                    ret[i] = PHI;
                } else if (ret[i] instanceof PatternVariable){
                    int id = ((PatternVariable)actionPatternVec[i]).id;
                    if (env[id] == UNBOUND){
                        ret[i] = PHI;
                    } else {
                        ret[i] = env[id];
                    }
                }
            }
//            System.out.println("after:this="+ this);
//            for (int i = 0; i < ret.length; i++) {
//                System.out.println("ret["+ i+ "]="+ ret[i]);
//            }
//            for (int i = 0; i < env.length; i++) {
//                System.out.println("env["+ i+ "]="+ env[i]);
//            }
            return ret;
        }
        public String toString(){
            StringBuffer buf = new StringBuffer();
            buf.append("rule(");
            for (int i = 0; i < patternVec.length; i++) {
                buf.append(patternVec[i]+ ",");
            }
            buf.append(action+ ",");
            if (actionPatternVec != null){
                for (int i = 0; i < actionPatternVec.length; i++) {
                    buf.append(actionPatternVec[i]+ ",");
                }
            }
            buf.append(").q = "+ q);
            return buf.toString();
        }
        public static class PatternVariable {
            String name;
            int id;
            public PatternVariable(String name, int id){ 
                this.name = name; this.id = id; 
            }
            //public String toString() { return ""+ id+ ":"+ name; }
            public String toString() { return ""+ name; }
        }
        // Special instance used for Action.Return
        public static final Rule returnRule = new ReturnRule();
    }
    public static class ReturnRule extends Rule {
        public ReturnRule(){
            action = Action.Return;
            q = 0; // Q(g,g,RET) == 0
        }
        public String toString(){
            return "rule(Return).q = "+ q;
        }
    }
    /**
     * Q(s,g,C_m)  s, g, m \邽߂̃f[^\B
     */
    public static class State {
        public Object[] values;
        public State(Object[] values) { this.values = values; }
        public Object[] getVec(){
            return values;
        }
        /**
         * Compares two states in order to check if the agent reaches 
         * the subgoal state x. 
         * State x may contain the special values PHI, 
         *   which matches to any values.
         */
        public boolean satisfies(State x){
            Object[] xv = x.values;
            Lab.assertTrue(values.length == xv.length);
            for (int i = 0; i < xv.length; i++) {
                if (xv[i] != Rule.PHI){
                    if (values[i] != xv[i]) return false;
                }
            }
            return true;
        }
        public String toString(){
            StringBuffer buf = new StringBuffer();
            buf.append("State(");
            for (int i = 0; i < values.length; i++) {
                buf.append(values[i].toString());
                buf.append(",");
            }
            buf.append(")");
            return buf.toString();
        }
    }
    
    
    //--------------------------------------------------
    public static class TMM2Main1 extends Lab.MainCode {
        //public int maxEpisodes = panel.getInt("max episodes", 1000000, 1, 100000);
        public int maxSteps = panel.getInt("max steps", 100, 1, 10000);
        public float alpha = panel.getFloat("alpha", 0.1f, 0, 1);
        public float mChangeReward = panel.getFloat("m change R", -1, -10, 0);
        public lab.Lab.WTextArea qView = null;
        public EnvCode envCode = panel.getCode("EnvCode", EnvCode.class);
        public RuleCode ruleCode = envCode.getRuleCode();
        
        //  main
        public void main() {
            World world = new World();
            if (panel.flag("test main", false)){
                world.testMainLoop();
            } else {
                world.main();
            }
        }
        
        public class Agent {
            public State newS; // state
            public State newG; // subgoal
            public Rule newR; // rule 
            public State oldS;
            public State oldG;
            public Rule oldR;
            public State actionParamState; 
            public float reward;
            public Stack<State> stack;
            public State start, goal;
            public boolean failedFlag;
            public State failedState;
            public World world;
            public List<Rule> rules;
            public float initVal = panel.getFloat("Table init value", 0, -50, 0);
            public float beta = panel.getFloat("beta", 1, 0.01f, 100); // for softmax
            //
            public Agent(World world){
                this.world = world;
                initTable();
            }
            public void initEnv() {
            }
            public void initTable() {
                rules = ruleCode.makeRules().stream().map(
                    ruleN ->  new Rule(ruleN)
                ).collect(Collectors.toList());
                // KvȂ q lBƂ肠Ô܂܂ƂB
                rules.forEach(r -> System.out.println(r));
            }
            public void setStartAndGoal(State s, State g){
                oldS = newS = start = s;
                oldG = newG = goal = g;
                failedState = s;
                initHist();
            }
            public void chooseFirstAction(){
                stack = new Stack<State>();
                chooseAction();
                oldR = newR;
                addToHist(oldR);
            }
            // 
            public float failPenalty = panel.getFloat("failPenalty", -3, -100, 0);
            public void takeAction(){
                Action action = oldR.getAction();
                failedFlag = false;

                if (panel.flag("Action log", true)) {
                    StringBuffer buf = new StringBuffer();
                    // indent
                    for (int i = 0; i < stack.size(); i++) {
                        buf.append("  ");
                    }
                    buf.append(action.toString());
                    if (action == Action.Call || action == Action.Set) {
                        buf.append('(');
                        Object[] values = actionParamState.values;
                        if (values.length > 0) {
                            buf.append(values[0].toString());
                            for (int i = 1; i < values.length; i++) {
                                buf.append(',');
                                buf.append(values[i].toString());
                            }
                        }
                        buf.append(')');
                    }
                    env.viewPanel.println("Action log", buf.toString());
                }

                if (action == Action.Return){
                    newS = oldS;
                    newG = stack.pop();
                    reward = 0;
                } else if (action == Action.Call){
                    newS = oldS;
                    stack.push(oldG);
                    newG = actionParamState;
                    reward = mChangeReward;
                } else if (action == Action.Set){
                    //newS = actionParamState;

                    newS = world.observeVal(actionParamState);
                    if (newS != null){
                        newG = oldG;
                        reward = mChangeReward;
                    } else {
                        // fail
                        failedFlag = true;
                        newS = failedState;
                        newG = goal;
                        stack.clear();
                        reward = failPenalty;
                    }
                } else if (action == Action.Fail){
                    failedFlag = true;
                    newS = failedState;
                    newG = goal;
                    stack.clear();
                    reward = failPenalty;
                } else {
                    Lab.assertTrue(false);
                    reward = world.takePrimitiveAction(action, this);
                    //newS = world.observe(this);
                    newG = oldG;
                }
            }
            public void chooseAction(){
                if (newS.satisfies(newG)){
                    newR = Rule.returnRule;
                } else {
                    List<Rule> matched = selectMatchedRules(newS, newG);
                    
//                    matched.forEach(r -> {
//                        System.out.println("matched: "+ r);
//                        if (r.action == Action.Call || r.action == Action.Set) {
//                            System.out.println("  a="+ r.action);
//                            System.out.println("  m="+ new State(r.getActionParam()));
//                        }
//                    });
                    
                    float[] q = calcRulePriorities(matched);
                    if (q.length == 0){
                        throw new Error("No action selected: (news,newG)="+ 
                                newS+ ", "+ newG);
                    }
                    // softmax  Rule PIB
                    int index = softmax(q);
                    if (panel.flag("Show matched rules", true)){
                        for (int i = 0; i < matched.size(); i++) {
                            env.viewPanel.println("matched", i+ ":"+ matched.get(i));
                        }
                        for (int i = 0; i < q.length; i++) {
                            env.viewPanel.println("priority", i+ ":"+ q[i]);
                        }
                        for (int i = 0; i < probTable.length; i++) {
                            env.viewPanel.println("probTable", i+ ":"+ probTable[i]);
                        }
                    }
                    newR = matched.get(index);
                    if (newR.action == Action.Call || newR.action == Action.Set) {
                        actionParamState = new State(newR.getActionParam());
                    }
                }
            }
            public List<Rule> selectMatchedRules(State s, State g){
                // s,g ̒lzB
                Object[] vals = new Object[s.values.length + g.values.length];
                for (int i = 0; i < s.values.length; i++) {
                    vals[i] = s.values[i];
                }
                for (int i = 0; i < g.values.length; i++) {
                    vals[i + s.values.length] = g.values[i];
                }
                //rules.forEach(r -> r.resetMatchResult());
                // (s,g) Ƀ}b`郋[IB
                // [̐ parallelStream gĂ݂B
                List<Rule> matched = rules.stream().filter(
                        r -> r.match(vals)
                ).collect(Collectors.toList());
                return matched;
            }
            public float genericityPenalty = panel.getFloat("gen penalty", 100, 0, 100);
            public float[] calcRulePriorities(List<Rule> matched){
                float[] q = new float[matched.size()];
                for (int i = 0; i < q.length; i++) {
                    Rule r = matched.get(i);
                    // numVars ɉyieB^Bϐ̐Ȃ[DB
                    float val = r.q - genericityPenalty * r.numVars;
                    q[i] = val;
                }
                return q;
            }
            // Currently not used.
            //public boolean eTrace = panel.flag("eTrace", false); 
            public void update() {
                // fail 炱g\B
                // evalValue(start, goal) + mChangeReward;
                if (oldR == Rule.returnRule){
                    // Do nothing.
                } else if (failedFlag){
                    float delta = reward + evalValue(newG, newS) - oldR.q;
                    oldR.q += alpha * delta;
                } else {
                    //q[oldS][oldA] += alpha * (reward + q[newS][newA] - q[oldS][oldA]);
                    float vg; // V_g(g')
                    if (oldG == newG){
                        vg = 0;
                    } else {
                        vg = evalValue(oldG, newG);
                    }
                    //System.out.println(oldR+ ":vg="+vg);
                    float delta;
                    delta = reward + newR.q - oldR.q + vg;
                    //System.out.println(delta);
                    oldR.q += alpha * delta;
                }

                oldS = newS;
                oldG = newG;
                oldR = newR;
            }
            public boolean approxValueEvalFlag = panel.flag("approxValueEvalFlag", false);
            /** Returns V_g(s) */
            public float evalValue(State g, State s){
                List<Rule> matched = selectMatchedRules(s, g);
                float[] q = calcRulePriorities(matched);
                if (approxValueEvalFlag){
                    // V_g(s) \approx max_a Q(s,g,a)
                    int i = Lab.argmax(q); 
                    return matched.get(i).q;
                } else {
                    // V_g(s) = \Sigma_a \pi((s,g),a)Q(s,g,a)
                    calcProbTable(q, 0, q.length);
                    float val = 0;
                    for (int i = 0; i < probTable.length; i++) {
                        // To avoid 0 * -Infinity = NaN
                        float value = matched.get(i).q;
                        if (value != Float.NEGATIVE_INFINITY){
                            val += probTable[i] * value;
                        }
                    }
                    return val; 
                }
            }
            
            // Kixg[X
            public float lambda = panel.getFloat("lambda", 0.9f, 0, 1);
            public int histSize = panel.getInt("histSize", 100, 1, 100);
            public Rule histR[];
            public int hTop;
            // This method should be called before starting each episode.
            public void initHist(){
                hTop = 0;
                histR = new Rule[histSize * 2];
            }
            public void addToHist(Rule r){
                histR[hTop] = r;
                hTop++;
                if (hTop >= histR.length) {
                    // Forget histories older than histSize.
                    for (int i = 0; i < histSize; i++) {
                        histR[i] = histR[i + histSize];
                    }
                    hTop = histSize;
                }
            }
            // Current eTrace implementation does not work.
            public void updateWithEligibilityTrace(float delta){
                float d = delta;
                int index = hTop - 1;
                for (int i = 0; i < histSize; i++) {
                    histR[index].q += alpha * d;
                    d *= lambda;
                    if (--index < 0) break;
                }
            }
            
            // Softmax
            public double[] probTable = new double[0]; /** \pi(a) \in [0,1] */
            public int softmax(float[] q){ return softmax(q, 0, q.length); }
            public int softmax(float[] q, int from, int to){
                calcProbTable(q, from, to);
//                System.out.println("probTable=");
//                for (int i = 0; i < probTable.length; i++) {
//                    System.out.print(probTable[i]+ ", ");
//                }
//                System.out.println();
                float r = Lab.rand();
                double sum = 0;
                for (int i = from; i < to; i++){
                    sum += probTable[i]; 
                    if (sum > r) {
                        Lab.assertTrue(q[i] != Float.NEGATIVE_INFINITY); 
                        return i;
                    }
                }
                Lab.assertTrue(sum - 0.001f < 1);
                Lab.assertTrue(q[to - 1] != Float.NEGATIVE_INFINITY); 
                return to - 1;
            }
            // \pi((s,g),a) = exp(beta * Q(s,g,a)) / a' exp(beta * Q(s,g,a'))
            public void calcProbTable(float[] q, int from, int to){
                if (q.length != probTable.length){
                    probTable = new double[q.length];
                }
                float max = Lab.max(q);
                double total = 0;
                for (int i = from; i < to; i++){
                    // To avoid overflow, subtract max.
                    // exp(a-c)/\Sigma_i exp(ai-c) = exp(a)/\Sigma_i exp(ai)  
                    double val = Math.exp(beta * (q[i] - max));
                    probTable[i] = val;
                    total += val;
//                     System.out.println("q["+ i+ "]="+ q[i]);
//                     System.out.println("val="+ val);
                }
//                System.out.println("total="+ total);
                Lab.assertTrue(total > 0);
                for (int i = from; i < to; i++){
                    probTable[i] /= total;
                }
            }
            
            public boolean achieved() {
                return stack.size() == 0 && newR.getAction() == Action.Return; 
            }
        }
        //--------------------------------------------------
        public boolean visualizeFlag;
        public class World {
            public Agent agent;
            //
            public Object[] envVarTable = envCode.getVarTable();
            public Object[][] envValTable = envCode.getValTable();
            public Object[] currentEnv;
            public boolean[] isVisibleVar = new boolean[envVarTable.length];
            public int scoreBin = panel.getInt("scoreBin", 10, 1, 1000);
            //
            public World(){
            }
            public void main(){
                agent = new Agent(this);
                int episodes = 0;
                int timeoutEpisodes = 0;
                int correctResults = 0; 
                agent.rules.forEach(r -> r.useCounter = 0);
                for (;;){
                    env.viewPanel.print1("episodes=", ""+ episodes++);
                    visualizeFlag = panel.flag("visualizeFlag", true);
                    alpha = panel.getFloat("alpha", 0.1f, 0, 1);
                    agent.beta = panel.getFloat("beta", 1, 0.01f, 100);
                    panel.speedControl("Episode loop", 0);
                    envCode.initEpisode(this, agent);
                    printEpisode();
                    //agent.setStartAndGoal();
                    agent.chooseFirstAction();
                    int steps = 0;
                    boolean timeoutFlag = false;
                    System.out.println("goal: "+ agent.goal);
                    System.out.println("Start loop.");
                    while (! agent.achieved()){
                        if (steps++ >= maxSteps) {
                            System.out.println("timeout");
                            timeoutFlag = true;
                            break;
                        }
                        env.viewPanel.print1("steps=", ""+ steps);
                        if (visualizeFlag){
                            panel.speedControl("Step loop", 1);
                            visualizeAgentState();
                        }
                        if (panel.button("Print rules")) {
                            env.viewPanel.setText("Rules", "");
                            env.viewPanel.println("Rules", "---------------");
                            agent.rules.forEach(r -> env.viewPanel.println("Rules", ""+ r));
                        }
                        
                        agent.takeAction(); agent.oldR.useCounter++;
                        //System.out.println("oldR: "+ agent.oldR.useCounter+ " :"+ agent.oldR);
                        agent.chooseAction();
                        agent.update();
                    }
                    System.out.println("End. steps="+ steps);
                    if (panel.flag("Action log", true)) {
                        env.viewPanel.println("Action log", "Done. -------------------------");
                    }
                    if (timeoutFlag) {
                        timeoutEpisodes++;
                    } else {
                        System.out.println("result: "+ agent.oldS);
                        boolean correct = checkInferredVal(agent.oldS);
                        System.out.println("checkInferredVal: "+ correct);
                        if (correct) correctResults++;
                    }
                    if (episodes >= scoreBin) {
                        env.viewPanel.plotWithFixedY("Correct Rate", 
                                (correctResults + 0f) / (episodes - timeoutEpisodes),
                                0, 1);
                        env.viewPanel.plotWithFixedY("Timeout Rate", 
                                (timeoutEpisodes + 0f) / episodes,
                                0, 1);
                        if (panel.flag("Plot Use count", false)) {
                            for (int i = 0; i < agent.rules.size(); i++) {
                                env.viewPanel.plot("Use count "+ i, 
                                        agent.rules.get(i).useCounter);
                            }
                            agent.rules.forEach(r -> r.useCounter = 0);
                        }

                        episodes = timeoutEpisodes = correctResults = 0;
                    }
                    if (visualizeFlag){
                        visualizeAgentState();
                    }
                }
            }
            public void visualizeAgentState(){
                {
                    String goalsLabel = "Goals";
                    env.viewPanel.setText(goalsLabel, ""); // Clear text.
                    for (int i = 0; i < agent.stack.size(); i++) {
                        // Print elements from bottom to top.
                        env.viewPanel.println(goalsLabel, ""+ agent.stack.get(i));
                    }
                    env.viewPanel.println(goalsLabel, ""+ agent.oldG);
                }
                {
                    String logLabel = "Log";
                    env.viewPanel.println(logLabel, "---");
                    String s = "stack size="+ agent.stack.size()+ ":";
                    for (int i = agent.stack.size() - 1; i >= 0 ; i--) {
                        // Add elements from top to bottom.
                        s += agent.stack.get(i)+ ", ";
                    }
                    env.viewPanel.println(logLabel, s);
                    env.viewPanel.println(logLabel, "s,g="+ agent.oldS+
                            ", "+ agent.oldG);
                    env.viewPanel.println(logLabel, ""+ agent.oldR);
                }
                //env.viewPanel.plotWithFixedY("rule.q", 0, -10, 0);// dummy
                env.viewPanel.scatterPlotFixedY("rule.q", 0, 0, -10, 0);// dummy
                env.viewPanel.resetGraphData("rule.q");
                agent.rules.forEach(r -> {
                    env.viewPanel.plot("rule.q", r.q);
                });
            }
            /** old */
            public void testMainLoop(){
                agent = new Agent(this);
                envCode.initEpisode(this, agent);
                for (Action a : Action.values()) {
                    panel.button(a.name());
                }
                //agent.chooseFirstAction();
                for (;;){
                    panel.speedControl("World mainLoop", 100);
                    //agent.chooseAction();
                    //agent.takeAction();
                    for (Action a : Action.values()) {
                        if (panel.button(a.name())){
                            takePrimitiveAction(a, agent);
                        }
                    }
                    // 
                }
            }
//            public void initEpisode(){
//                // \ȕϐ̒l̑ĝP_ɑIԁB
//                currentEnv = envValTable[Lab.irand(envValTable.length)];
//                // eϐ̉_ɐݒB
//                for (int i = 0; i < isVisibleVar.length; i++) {
//                    isVisibleVar[i] = (Lab.irand(2) == 0); // true : false = 50% : 50%
//                }
//                // _ׂϐ߂BS[ɐݒ肳B
//                Object targetVar = envVarTable[Lab.irand(envVarTable.length)];
//
//                //  s ƃS[ g ̏lG[WFgɐݒB
//                final Object O = "O".intern();
//                State start = new State(new Object[] {O,O,O, O,O,O, O,O,O});  
//                //goal = new State(new Object[] {O,O,O, O,O,O, "Goal".intern(),O,O});
//                State goal = new State(new Object[] {O,O,O, O,O,O, V,targetVar,Rule.PHI});
//                agent.setStartAndGoal(start, goal);
//            }
            public void printEpisode() {
                System.out.println("envVarTable: "+ Arrays.asList(envVarTable));
                System.out.println("currentEnv: "+ Arrays.asList(currentEnv));
                System.out.print("isVisibleVar: [");
                for (int i = 0; i < isVisibleVar.length; i++) {
                    System.out.print(isVisibleVar[i]+ ", ");
                }
                System.out.println("]");
            }
            /**
             * Set(V,X,Y) sBVԂԂB
             * VԂɂ Y ͈ȉ̋KŌ܂lɒuB
             * 
             * ϐ X ϐ̏ꍇF
             *      Y  PHI ̏ꍇF
             *          ϐ X ̖{̒lB
             *      Y  PHI ȊO̒l̏ꍇF
             *          Y ƕϐ X ̖{̒lvꍇF
             *              Y B
             *          Y ƕϐ X ̖{̒lvȂꍇF
             *              null Bifail NBj
             * ϐ X sϐ̏ꍇF
             *      Y B(Y  PHI ̏ꍇ PHI ̂܂܁B)
             * 
             * BUG: Set(P(x) and Q(y)) ̂Ƃ̊̏Ԃ̃`FbNsSB
             * 
             */
            public State observeVal(State state) {
                Object[] vec = state.getVec();
                Object[] retVec = vec.clone();
                Lab.assertTrue(vec[vec.length - 3] == V);
                Object var = vec[vec.length-2];
                Object val = vec[vec.length-1];
                Object retVal;
                for (int i = 0; i < envVarTable.length; i++) {
                    if (envVarTable[i] == var) {
                        if (isVisibleVar[i]) {
                            if (val == Rule.PHI) {
                                retVal = currentEnv[i];
                            } else {
                                if (currentEnv[i] == val) {
                                    retVal = val;                                    
                                } else {
                                    return null;
                                }
                            }
                        } else {
                            retVal = val;                                    
                        }
                        retVec[vec.length-1] = retVal;
                        return new State(retVec);
                    }
                }
                throw new Error();
            }
//            public boolean observeVal(State state) {
//                Object[] vec = state.getVec();
//                Lab.assertTrue(vec[vec.length - 3] == V);
//                Object var = vec[vec.length-2];
//                Object val = vec[vec.length-1];
//                for (int i = 0; i < envVarTable.length; i++) {
//                    if (envVarTable[i] == var) {
//                        return (! isVisibleVar[i]) || currentEnv[i] == val;
//                    }
//                }
//                throw new Error();
//            }
            // _ꂽϐ̒lۂ̊̒lƈv邩`FbNB
            public boolean checkInferredVal(State state) {
                Object[] vec = state.getVec();
                Lab.assertTrue(vec[vec.length - 3] == V);
                Object var = vec[vec.length-2];
                Object val = vec[vec.length-1];
                for (int i = 0; i < envVarTable.length; i++) {
                    if (envVarTable[i] == var) {
                        return currentEnv[i] == val;
                    }
                }
                throw new Error();
            }
            
            
            public float takePrimitiveAction(Action action, Agent a) {
                float reward = 0;
                
                
                return reward;
            }
            public State observe(Agent agent) {
                return null;
            }
        }
    }
}
