Reinforcement Learning q-learning Algorithm Learning-3

Last Update:2017-05-05 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

Q-learning Source code Analysis.
Import Java.util.random;public class qlearning1{private static final int q_size = 6;    Private static final Double GAMMA = 0.8;    private static final int iterations = 10;    private static final int initial_states[] = new int[] {1, 3, 5, 2, 4, 0}; private static final int r[][] = new int[][] {{-1,-1,-1,-1, 0,-1}, {                                                   -1,-1,-1, 0,-1, 100}, {-1,-1,-1, 0,-1,-1}, {-1, 0, 0,-1, 0,-1}, {0,-1,-1, 0,-1, 10    0}, {-1, 0,-1,-1, 0, 100}};    private static int q[][] = new Int[q_size][q_size];        private static int currentstate = 0;        private static void Train () {initialize ();        Perform training, starting at all initial states. for (int j = 0; J < iterations; J + +) {for (int i = 0; i < q_size;            i++) {episode (Initial_states[i]);        }//I}//J System.out.println ("Q Matrix Values:"); for (int i = 0, i < q_size; i++) {for (int j = 0; J < Q_size; J + +) {Syst            Em.out.print (Q[i][j] + ", \ t");        }//J System.out.print ("\ n");        }//I System.out.print ("\ n");    Return        } private static void Test () {//Perform tests, starting at all initial states.        System.out.println ("Shortest routes from initial states:");            for (int i = 0; i < q_size; i++) {currentstate = initial_states[i];            int newstate = 0;                do {newstate = maximum (CurrentState, true);                System.out.print (CurrentState + ",");            CurrentState = newstate;            }while (CurrentState < 5);        System.out.print ("5\n");   } return; } private static void episode (final int initialstate) {currentstate = initialstate;        Until goal state is reached.        do {chooseanaction ();        }while (CurrentState = = 5);        When currentstate = 5, Run through the set once to convergence.        for (int i = 0; i < q_size; i++) {chooseanaction ();    } return;        } private static void Chooseanaction () {int possibleaction = 0;        Randomly choose a possible action connected to the current state.        Possibleaction = Getrandomaction (q_size);            if (R[currentstate][possibleaction] >= 0) {q[currentstate][possibleaction] = reward (possibleaction);        CurrentState = possibleaction;    } return;        } private static int getrandomaction (final int upperbound) {int action = 0;        Boolean choiceisvalid = false; Randomly choose a possiBLE action connected to the current state.            while (Choiceisvalid = = False) {//Get a random value between 0 (inclusive) and 6 (exclusive).            Action = new Random (). Nextint (Upperbound);            if (R[currentstate][action] >-1) {choiceisvalid = true;    }} return action; } private static void Initialize () {for (int i = 0; i < q_size; i++) {for (int j = 0 ; J < Q_size;            J + +) {Q[i][j] = 0;    }//J}//I return; } private static int maximum (final int state, final Boolean returnindexonly) {//If returnindexonly = Tr        UE, the Q matrix index is returned.        If returnindexonly = False, the Q matrix value is returned.        int winner = 0;        Boolean foundnewwinner = false;        Boolean done = false;            while (!done) {Foundnewwinner = false; for (int i = 0; i < q_SIZE;                    i++) {if (I! = winner) {//Avoid Self-comparison.                        if (Q[state][i] > Q[state][winner]) {winner = i;                    Foundnewwinner = true;            }}} if (Foundnewwinner = = False) {done = true;        }} if (returnindexonly = = True) {return winner;        }else{return Q[state][winner]; }} private static int reward (final int Action) {return (int) (R[currentstate][action] + (GAMMA * Maxi    Mum (Action, false)));        } public static void Main (string[] args) {train ();        Test ();    Return }}

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Reinforcement Learning q-learning Algorithm Learning-3

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Reinforcement Learning q-learning Algorithm Learning-3

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support