Reinforcement Learning q-learning Algorithm Learning-3

Source: Internet
Author: User

Q-learning Source code Analysis.
Import Java.util.random;public class qlearning1{private static final int q_size = 6; Private static final Double GAMMA = 0.8; private static final int iterations = 10; private static final int initial_states[] = new int[] {1, 3, 5, 2, 4, 0}; private static final int r[][] = new int[][] {{-1,-1,-1,-1, 0,-1}, { -1,-1,-1, 0,-1, 100}, {-1,-1,-1, 0,-1,-1}, {-1, 0, 0,-1, 0,-1}, {0,-1,-1, 0,-1, 10 0}, {-1, 0,-1,-1, 0, 100}}; private static int q[][] = new Int[q_size][q_size]; private static int currentstate = 0; private static void Train () {initialize (); Perform training, starting at all initial states. for (int j = 0; J < iterations; J + +) {for (int i = 0; i < q_size; i++) {episode (Initial_states[i]); }//I}//J System.out.println ("Q Matrix Values:"); for (int i = 0, i < q_size; i++) {for (int j = 0; J < Q_size; J + +) {Syst Em.out.print (Q[i][j] + ", \ t"); }//J System.out.print ("\ n"); }//I System.out.print ("\ n"); Return } private static void Test () {//Perform tests, starting at all initial states. System.out.println ("Shortest routes from initial states:"); for (int i = 0; i < q_size; i++) {currentstate = initial_states[i]; int newstate = 0; do {newstate = maximum (CurrentState, true); System.out.print (CurrentState + ","); CurrentState = newstate; }while (CurrentState < 5); System.out.print ("5\n"); } return; } private static void episode (final int initialstate) {currentstate = initialstate; Until goal state is reached. do {chooseanaction (); }while (CurrentState = = 5); When currentstate = 5, Run through the set once to convergence. for (int i = 0; i < q_size; i++) {chooseanaction (); } return; } private static void Chooseanaction () {int possibleaction = 0; Randomly choose a possible action connected to the current state. Possibleaction = Getrandomaction (q_size); if (R[currentstate][possibleaction] >= 0) {q[currentstate][possibleaction] = reward (possibleaction); CurrentState = possibleaction; } return; } private static int getrandomaction (final int upperbound) {int action = 0; Boolean choiceisvalid = false; Randomly choose a possiBLE action connected to the current state. while (Choiceisvalid = = False) {//Get a random value between 0 (inclusive) and 6 (exclusive). Action = new Random (). Nextint (Upperbound); if (R[currentstate][action] >-1) {choiceisvalid = true; }} return action; } private static void Initialize () {for (int i = 0; i < q_size; i++) {for (int j = 0 ; J < Q_size; J + +) {Q[i][j] = 0; }//J}//I return; } private static int maximum (final int state, final Boolean returnindexonly) {//If returnindexonly = Tr UE, the Q matrix index is returned. If returnindexonly = False, the Q matrix value is returned. int winner = 0; Boolean foundnewwinner = false; Boolean done = false; while (!done) {Foundnewwinner = false; for (int i = 0; i < q_SIZE; i++) {if (I! = winner) {//Avoid Self-comparison. if (Q[state][i] > Q[state][winner]) {winner = i; Foundnewwinner = true; }}} if (Foundnewwinner = = False) {done = true; }} if (returnindexonly = = True) {return winner; }else{return Q[state][winner]; }} private static int reward (final int Action) {return (int) (R[currentstate][action] + (GAMMA * Maxi Mum (Action, false))); } public static void Main (string[] args) {train (); Test (); Return }}

Reinforcement Learning q-learning Algorithm Learning-3

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.