Q-learning Source code Analysis.
Import Java.util.random;public class qlearning1{private static final int q_size = 6; Private static final Double GAMMA = 0.8; private static final int iterations = 10; private static final int initial_states[] = new int[] {1, 3, 5, 2, 4, 0}; private static final int r[][] = new int[][] {{-1,-1,-1,-1, 0,-1}, { -1,-1,-1, 0,-1, 100}, {-1,-1,-1, 0,-1,-1}, {-1, 0, 0,-1, 0,-1}, {0,-1,-1, 0,-1, 10 0}, {-1, 0,-1,-1, 0, 100}}; private static int q[][] = new Int[q_size][q_size]; private static int currentstate = 0; private static void Train () {initialize (); Perform training, starting at all initial states. for (int j = 0; J < iterations; J + +) {for (int i = 0; i < q_size; i++) {episode (Initial_states[i]); }//I}//J System.out.println ("Q Matrix Values:"); for (int i = 0, i < q_size; i++) {for (int j = 0; J < Q_size; J + +) {Syst Em.out.print (Q[i][j] + ", \ t"); }//J System.out.print ("\ n"); }//I System.out.print ("\ n"); Return } private static void Test () {//Perform tests, starting at all initial states. System.out.println ("Shortest routes from initial states:"); for (int i = 0; i < q_size; i++) {currentstate = initial_states[i]; int newstate = 0; do {newstate = maximum (CurrentState, true); System.out.print (CurrentState + ","); CurrentState = newstate; }while (CurrentState < 5); System.out.print ("5\n"); } return; } private static void episode (final int initialstate) {currentstate = initialstate; Until goal state is reached. do {chooseanaction (); }while (CurrentState = = 5); When currentstate = 5, Run through the set once to convergence. for (int i = 0; i < q_size; i++) {chooseanaction (); } return; } private static void Chooseanaction () {int possibleaction = 0; Randomly choose a possible action connected to the current state. Possibleaction = Getrandomaction (q_size); if (R[currentstate][possibleaction] >= 0) {q[currentstate][possibleaction] = reward (possibleaction); CurrentState = possibleaction; } return; } private static int getrandomaction (final int upperbound) {int action = 0; Boolean choiceisvalid = false; Randomly choose a possiBLE action connected to the current state. while (Choiceisvalid = = False) {//Get a random value between 0 (inclusive) and 6 (exclusive). Action = new Random (). Nextint (Upperbound); if (R[currentstate][action] >-1) {choiceisvalid = true; }} return action; } private static void Initialize () {for (int i = 0; i < q_size; i++) {for (int j = 0 ; J < Q_size; J + +) {Q[i][j] = 0; }//J}//I return; } private static int maximum (final int state, final Boolean returnindexonly) {//If returnindexonly = Tr UE, the Q matrix index is returned. If returnindexonly = False, the Q matrix value is returned. int winner = 0; Boolean foundnewwinner = false; Boolean done = false; while (!done) {Foundnewwinner = false; for (int i = 0; i < q_SIZE; i++) {if (I! = winner) {//Avoid Self-comparison. if (Q[state][i] > Q[state][winner]) {winner = i; Foundnewwinner = true; }}} if (Foundnewwinner = = False) {done = true; }} if (returnindexonly = = True) {return winner; }else{return Q[state][winner]; }} private static int reward (final int Action) {return (int) (R[currentstate][action] + (GAMMA * Maxi Mum (Action, false))); } public static void Main (string[] args) {train (); Test (); Return }}
Reinforcement Learning q-learning Algorithm Learning-3