1 year ago · e9969fc845
--- a/STEPCTQL.py
+++ b/STEPCTQL.py
@@ -0,0 +1,516 @@
 
																+#!/usr/bin/env python
															
 
																+# -*- coding: UTF-8 -*-
															
 
																+"""
															
 
																+A simple example for Reinforcement Learning using table lookup Q-learning method.
															
 
																+An agent "o" is on the left of a 1 dimensional world, the treasure is on the rightmost location.
															
 
																+Run this program and to see how the agent will improve its strategy of finding the treasure.
															
 
																+
															
 
																+View more on my tutorial page: https://morvanzhou.github.io/tutorials/
															
 
																+"""
															
 
																+import numpy as np
															
 
																+import pandas as pd
															
 
																+import time
															
 
																+import smbus
															
 
																+import math
															
 
																+import sympy
															
 
																+from sympy import asin, cos, sin, acos,tan ,atan
															
 
																+#from DFRobot_RaspberryPi_A02YYUW import DFRobot_A02_Distance as Board
															
 
																+import paho.mqtt.client as mqtt
															
 
																+import json
															
 
																+import RPi.GPIO as GPIO
															
 
																+from time import sleep
															
 
																+np.random.seed(2)  # reproducible
															
 
																+
															
 
																+#設定步進馬達
															
 
																+GPIO.setmode(GPIO.BCM)
															
 
																+DIR = 23
															
 
																+STEP = 24
															
 
																+CW = 1
															
 
																+CCW = 0
															
 
																+GPIO.setup(DIR, GPIO.OUT)
															
 
																+GPIO.setup(STEP, GPIO.OUT)
															
 
																+GPIO.output(DIR, CW)
															
 
																+
															
 
																+N_STATES = ['raise_time<1','1<=raise_time<2','2<=raise_time<4','raise_time>4','0<=overshoot<0.33','0.33<overshoot<1','10<=setingtime<20','20<=setingtime<30']  ## 1:time<5      2:5.05<time<5.25    3:5.25<time<5.5  4:time>5.5
															
 
																+goal=320    #goal
															
 
																+ACTIONS = ['kp+1','kp+0.1','kp+0.01', 'kp+0','kp-0.01','kp-0.1','kp-1','ki+0.1','ki+0.01', 'ki+0','ki-0.01','ki-0.1','kd+0.1', 'kd+0','kd-0.1']     # available actions
															
 
																+EPSILON = 0.9   # greedy police
															
 
																+ALPHA = 0.1     # learning rate
															
 
																+GAMMA = 0.9    # discount factor
															
 
																+MAX_EPISODES =1 # maximum episodes
															
 
																+FRESH_TIME = 0.1    # fresh time for one move
															
 
																+
															
 
																+kp=0.0
															
 
																+ki=0.0
															
 
																+kd=0.0
															
 
																+count=50
															
 
																+x=0
															
 
																+jsonmsg=""
															
 
																+S_=""
															
 
																+
															
 
																+def step(CW,plus):
															
 
																+    global DIR,STEP
															
 
																+    GPIO.output(DIR, CW)
															
 
																+    for x in range(plus):
															
 
																+		      # Set one coil winding to high
															
 
																+			    GPIO.output(STEP,GPIO.HIGH)
															
 
																+			    # Allow it to get there.
															
 
																+			    sleep(0.001) # Dictates how fast stepper motor will run
															
 
																+			    # Set coil winding to low
															
 
																+			    GPIO.output(STEP,GPIO.LOW)
															
 
																+			    sleep(0.001)
															
 
																+    
															
 
																+def on_connect(client, userdata, flags, rc):
															
 
																+    print("Connected with result code " + str(rc))
															
 
																+    client.subscribe("b8:27:eb:eb:21:13/Log", qos=2)
															
 
																+
															
 
																+    # 當接收到從伺服器發送的訊息時要進行的動作
															
 
																+
															
 
																+
															
 
																+def on_message(client, userdata, msg):
															
 
																+    # 轉換編碼utf-8才看得懂中文
															
 
																+    global  jsonmsg
															
 
																+    global  x
															
 
																+    msg.payload = msg.payload.decode('utf-8')
															
 
																+    jsonmsg = json.loads(msg.payload)
															
 
																+    x=int(jsonmsg['x'])
															
 
																+
															
 
																+      
															
 
																+class PIDController:
															
 
																+    def __init__(self, Kp, Ki, Kd):
															
 
																+        self.Kp = Kp
															
 
																+        self.Ki = Ki
															
 
																+        self.Kd = Kd
															
 
																+        self.last_error = 0
															
 
																+        self.integral = 0
															
 
																+
															
 
																+    def control(self, error):
															
 
																+        output = self.Kp * error + self.Ki * self.integral + self.Kd * (error - self.last_error)
															
 
																+        self.integral += error
															
 
																+        self.last_error = error 
															
 
																+        return output
															
 
																+    
															
 
																+   
															
 
																+class Any_System:
															
 
																+    def __init__(self, goal):
															
 
																+        self.target = goal
															
 
																+        self.current = 0
															
 
																+
															
 
																+    def update(self, control_singal):
															
 
																+        self.current += control_singal
															
 
																+        return self.current
															
 
																+
															
 
																+    def get_error(self):
															
 
																+        return self.target - self.current
															
 
																+
															
 
																+def train(controller,system,num_iterations):
															
 
																+    global  jsonmsg
															
 
																+    global x
															
 
																+    global S_,R
															
 
																+    errors=[]
															
 
																+    raise_time=0
															
 
																+    cont=0
															
 
																+    rtz=0
															
 
																+    for _ in range(num_iterations):
															
 
																+        #error = system.get_error()
															
 
																+        raise_time+=1
															
 
																+        current=x
															
 
																+        error=system.target-current # 真實訊號
															
 
																+        output=controller.control(error)
															
 
																+        output=output*4
															
 
																+        print(raise_time,current,output)
															
 
																+        if abs(rtz)<7750:
															
 
																+         if output>0:
															
 
																+           step(1,int(output))
															
 
																+           rtz-=int(output)
															
 
																+         else:
															
 
																+           step(0,int(-1*output))
															
 
																+           rtz+=int(-1*output)
															
 
																+         time.sleep(0.5)
															
 
																+        #control_signal=controller.control(error)
															
 
																+        #current=system.update(control_signal)
															
 
																+        if ((current-system.target)>=0):
															
 
																+            cont=raise_time
															
 
																+            break
															
 
																+        else:
															
 
																+            cont=50
															
 
																+    if cont<=10:
															
 
																+                S_= N_STATES[0]
															
 
																+                R=5
															
 
																+                print('raise_time success')
															
 
																+    elif (10<cont) and (cont<=20):
															
 
																+                S_= N_STATES[1]
															
 
																+                R=((15-cont)/10)
															
 
																+    elif (20< cont) and (cont <= 40):
															
 
																+                S_= N_STATES[2]
															
 
																+                R=((30-cont)/10)
															
 
																+    else:
															
 
																+                S_= N_STATES[3]
															
 
																+                R=-(error/100)
															
 
																+    if rtz>0:
															
 
																+           step(1,int(rtz))
															
 
																+    else:
															
 
																+           step(0,int(-1*rtz))
															
 
																+    time.sleep(3)
															
 
																+    print("rtz:",rtz)       
															
 
																+    return  S_,R
															
 
																+
															
 
																+def train2(controller,system,num_iterations):
															
 
																+    global x
															
 
																+    global S_,R
															
 
																+    errors=[]
															
 
																+    current_arr=[]
															
 
																+    ot=0
															
 
																+    over_time=0
															
 
																+    over_shoot=0
															
 
																+    rtz=0
															
 
																+    for _ in range(num_iterations):
															
 
																+        #error = system.get_error()
															
 
																+        #current = 23.5 - (board.getDistance() / 10)
															
 
																+        current = x
															
 
																+        value=current
															
 
																+        error = system.target - current  # 真實訊號
															
 
																+        output=controller.control(error)
															
 
																+        output=output*4
															
 
																+        if abs(rtz)<7750:
															
 
																+         if output>0:
															
 
																+           step(1,int(output))
															
 
																+           rtz-=int(output)
															
 
																+         else:
															
 
																+           step(0,int(-1*output))
															
 
																+           rtz+=int(-1*output)
															
 
																+         time.sleep(0.5)
															
 
																+        over_time+=1
															
 
																+        if(value>ot):
															
 
																+           ot=value
															
 
																+        print(over_time,ot,output,rtz)
															
 
																+    over_shoot=float(abs(ot-system.target))/320
															
 
																+    print("overshoot",str(over_shoot))
															
 
																+    if over_shoot>=0 and over_shoot < 0.0625:
															
 
																+        print('overshoot success')
															
 
																+        S_ = N_STATES[4]
															
 
																+        R = 5
															
 
																+    elif (0.0625 <= over_shoot) and (over_shoot <1):
															
 
																+        S_ = N_STATES[5]
															
 
																+        R = -1*over_shoot
															
 
																+    else: 
															
 
																+        S_ = N_STATES[0]
															
 
																+        R =  0
															
 
																+    if rtz>0:
															
 
																+           step(1,int(rtz))
															
 
																+    else:
															
 
																+           step(0,int(-1*rtz))
															
 
																+    time.sleep(3)
															
 
																+    print("rtz:",rtz)   
															
 
																+    return S_, R
															
 
																+
															
 
																+def train3(controller,system,num_iterations):
															
 
																+    global x
															
 
																+    global S_,R
															
 
																+    errors=[]
															
 
																+    cont=0
															
 
																+    setingtime=0
															
 
																+    con=0
															
 
																+    rtz=0
															
 
																+    print("3")
															
 
																+    for _ in range(num_iterations):
															
 
																+        cont=cont+1
															
 
																+        current = x
															
 
																+        error = system.target - current  # 真實訊號
															
 
																+        output=controller.control(error)
															
 
																+        output=output*4
															
 
																+        if abs(rtz)<7750: 
															
 
																+          if output>0:
															
 
																+           step(1,int(output))
															
 
																+           rtz-=int(output)
															
 
																+          else:
															
 
																+           step(0,int(-1*output))
															
 
																+           rtz+=int(-1*output)
															
 
																+          time.sleep(0.5)    
															
 
																+        print(cont,current,output)
															
 
																+        if ((-1*error)>=0) and ((-1*error)<=5) and (con==0):
															
 
																+            setingtime =cont
															
 
																+            con=1
															
 
																+        elif(con==0):
															
 
																+            setingtime=40
															
 
																+    print(setingtime)
															
 
																+    if setingtime>=0 and setingtime < 10:
															
 
																+        S_ = N_STATES[6]
															
 
																+        R = 10
															
 
																+        print('setingtime success')
															
 
																+        with open('pid.txt', 'a') as f:
															
 
																+            f.write('kp:')
															
 
																+            f.write(str(controller.Kp))
															
 
																+            f.write('ki:')
															
 
																+            f.write(str(controller.Ki))
															
 
																+            f.write('kd:')
															
 
																+            f.write(str(controller.Kd))
															
 
																+            f.write('\r\n')
															
 
																+    elif (10 <= setingtime) and (setingtime <= 40):
															
 
																+        S_ = N_STATES[7]
															
 
																+        R = ((25-setingtime)/10)
															
 
																+    else:
															
 
																+        S_ = N_STATES[4]
															
 
																+        R = -(error/100)
															
 
																+    if rtz>0:
															
 
																+           step(1,int(rtz))
															
 
																+    else:
															
 
																+           step(0,int(-1*rtz))
															
 
																+    time.sleep(3)
															
 
																+    print("rtz:",rtz)   
															
 
																+    return S_, R
															
 
																+
															
 
																+def build_q_table(n_states, actions):
															
 
																+    try:
															
 
																+      table = pd.read_csv("/home/pi/pid.csv",index_col=0)
															
 
																+    except:
															
 
																+     table = pd.DataFrame(
															
 
																+        np.zeros((len(n_states), len(actions))),     # q_table initial values
															
 
																+        columns=actions, index=n_states,   # actions's name
															
 
																+     )
															
 
																+    print(table)    # show table
															
 
																+    return table
															
 
																+
															
 
																+
															
 
																+def choose_action(state, q_table):
															
 
																+    # This is how to choose an action
															
 
																+    state_actions = q_table.loc[state, :]
															
 
																+    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
															
 
																+        ACT = ['kp+1', 'kp+0.1', 'kp+0.01', 'kp+0', 'kp-0.01', 'kp-0.1', 'kp-1']
															
 
																+        action_name = np.random.choice(ACT)
															
 
																+    else:   # act greedy
															
 
																+         action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas
															
 
																+    return action_name
															
 
																+
															
 
																+def choose_action1(state, q_table):
															
 
																+    # This is how to choose an action
															
 
																+    state_actions = q_table.loc[state, :]
															
 
																+    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
															
 
																+        ACT = ['kp+1', 'kp+0.1', 'kp+0.01', 'kp+0', 'kp-0.01', 'kp-0.1', 'kp-1']
															
 
																+        action_name = np.random.choice(ACT)
															
 
																+    else:   # act greedy
															
 
																+        action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas
															
 
																+    return action_name
															
 
																+
															
 
																+def choose_action2(state, q_table):
															
 
																+    # This is how to choose an action
															
 
																+    state_actions = q_table.loc[state, :]
															
 
																+    print("3")
															
 
																+    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
															
 
																+        ACT = [ 'ki+0.1', 'ki+0.01', 'ki+0', 'ki-0.1', 'ki-0.01']
															
 
																+        action_name = np.random.choice(ACT)
															
 
																+    else:   # act greedy
															
 
																+        action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas
															
 
																+    print(action_name)    
															
 
																+    return action_name
															
 
																+
															
 
																+def pid(kp):
															
 
																+    global  goal,count
															
 
																+    global S_
															
 
																+    R=0
															
 
																+    print("raisetime")
															
 
																+    pid_controller = PIDController(kp,0.0,0.0)
															
 
																+    any_system = Any_System(goal)
															
 
																+    S_,R = train(pid_controller, any_system,count)
															
 
																+    print('kp:',kp)
															
 
																+    return  S_,R
															
 
																+
															
 
																+def pid1(kp):
															
 
																+    global S_
															
 
																+    R=0
															
 
																+    print("overshoot")
															
 
																+    pid_controller = PIDController(kp, 0.0, 0.0)
															
 
																+    any_system = Any_System(goal)
															
 
																+    S_, R = train2(pid_controller, any_system, count)
															
 
																+    print('kp:', kp)
															
 
																+    return  S_,R
															
 
																+
															
 
																+def pid2(kp,ki,kd):
															
 
																+    global S_
															
 
																+    R=0
															
 
																+    print("setingtime")
															
 
																+    pid_controller = PIDController(kp,ki,kd)
															
 
																+    any_system = Any_System(goal)
															
 
																+    S_, R = train3(pid_controller, any_system, count)
															
 
																+    print('kp:', kp,'ki',ki,'kd',kd)
															
 
																+    return  S_,R
															
 
																+
															
 
																+def get_env_feedback(S, A):
															
 
																+    # This is how agent will interact with the environment
															
 
																+    global kp,S_
															
 
																+    R=0
															
 
																+    if A == 'kp+1':    # move right
															
 
																+          kp+=1
															
 
																+          S_,R=pid(kp)
															
 
																+    elif A == 'kp+0.1':    # move right
															
 
																+          kp+=0.1
															
 
																+          S_,R=pid(kp)
															
 
																+    elif A == 'kp+0.01':    # move right
															
 
																+          kp+=0.01
															
 
																+          S_,R=pid(kp)
															
 
																+    elif A=='kp+0':
															
 
																+          kp=kp+0
															
 
																+          S_,R= pid(kp)
															
 
																+    elif A == 'kp-0.01':    # move right
															
 
																+          kp-=0.01
															
 
																+          S_,R=pid(kp)
															
 
																+    elif A == 'kp-0.1':    # move right
															
 
																+          kp-=0.1
															
 
																+          S_,R=pid(kp)
															
 
																+    elif A == 'kp-1':
															
 
																+          kp-=1
															
 
																+          S_,R= pid(kp)
															
 
																+          
															
 
																+    return S_, R
															
 
																+
															
 
																+def get_env_feedback1(S, A):
															
 
																+    # This is how agent will interact with the environment
															
 
																+    global kp,S_
															
 
																+    R=0
															
 
																+    if A == 'kp+1':    # move right
															
 
																+          kp+=1
															
 
																+          S_,R=pid1(kp)
															
 
																+    elif A == 'kp+0.1':    # move right
															
 
																+          kp+=0.1
															
 
																+          S_,R=pid1(kp)
															
 
																+    elif A == 'kp+0.01':    # move right
															
 
																+          kp+=0.01
															
 
																+          S_,R=pid1(kp)
															
 
																+    elif A=='kp+0':
															
 
																+          kp=kp+0
															
 
																+          S_,R= pid1(kp)
															
 
																+    elif A == 'kp-0.01':    # move right
															
 
																+          kp-=0.01
															
 
																+          S_,R=pid1(kp)
															
 
																+    elif A == 'kp-0.1':    # move right
															
 
																+          kp-=0.1
															
 
																+          S_,R=pid1(kp)
															
 
																+    elif A == 'kp-1':    # move right
															
 
																+          kp-=1
															
 
																+          S_,R=pid1(kp)
															
 
																+    return S_, R
															
 
																+
															
 
																+def get_env_feedback2(S, A):
															
 
																+    # This is how agent will interact with the environment
															
 
																+    global ki
															
 
																+    global kp
															
 
																+    global kd,S_
															
 
																+    R=0
															
 
																+    if A == 'ki+0.1':    # move right
															
 
																+          ki+=0.1
															
 
																+          S_,R=pid2(kp,ki,kd)
															
 
																+    elif A == 'ki+0.01':    # move right
															
 
																+          ki+=0.01
															
 
																+          S_,R=pid2(kp,ki,kd)
															
 
																+    elif A=='ki+0':
															
 
																+          ki=ki+0
															
 
																+          S_,R= pid2(kp,ki,kd)
															
 
																+    elif A == 'ki-0.1':    # move right
															
 
																+          ki-=0.1
															
 
																+          S_,R=pid2(kp,ki,kd)
															
 
																+    elif A == 'ki-0.01':    # move right
															
 
																+          ki-=0.01
															
 
																+          S_,R=pid2(kp,ki,kd)
															
 
																+    elif A == 'kd+0.1':    # move right
															
 
																+          kd+=0.1
															
 
																+          S_,R=pid2(kp,ki,kd)
															
 
																+    elif A=='kd+0':
															
 
																+          kd=kd+0
															
 
																+          S_,R= pid2(kp,ki,kd)
															
 
																+    elif A == 'kd-0.1':    # move right
															
 
																+          kd-=0.1
															
 
																+          S_,R=pid2(kp,ki,kd)
															
 
																+    return S_, R
															
 
																+
															
 
																+def update_env(S, episode, step_counter):
															
 
																+    # This is how environment be updated
															
 
																+    interaction = 'Episode %s: raise_time= %s' % (episode + 1,S)
															
 
																+    #print('\r{}'.format(interaction), end='')
															
 
																+    #print('Episode %s: raise_time= %s\r\n' % (episode + 1,S))
															
 
																+
															
 
																+
															
 
																+def rl():
															
 
																+    global  x,y,z
															
 
																+    # main part of RL loop
															
 
																+    q_table = build_q_table(N_STATES, ACTIONS)
															
 
																+    for episode in range(MAX_EPISODES):
															
 
																+        S = N_STATES[3]
															
 
																+        is_terminated = False
															
 
																+        step(1,7750)
															
 
																+        time.sleep(5)
															
 
																+        while not is_terminated:
															
 
																+           #update_env(S, episode, step_counter)
															
 
																+           if S==N_STATES[3] or S==N_STATES[2] or S==N_STATES[1] or S==N_STATES[0]:
															
 
																+              A = choose_action(S, q_table)
															
 
																+              S_, R = get_env_feedback(S, A)  # take action & get next state and reward
															
 
																+              q_predict = q_table.loc[S, A]
															
 
																+              q_target = R + GAMMA * q_table.loc[S_, :].max()   # next state is not terminal
															
 
																+              q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
															
 
																+              print(q_table)
															
 
																+              S = S_  # move to next state
															
 
																+              #update_env(S, episode, step_counter)
															
 
																+              #step_counter += 1
															
 
																+              if S==N_STATES[0]:
															
 
																+                  S=N_STATES[5]
															
 
																+           elif  S == N_STATES[4] or S == N_STATES[5]:
															
 
																+               A = choose_action1(S, q_table)
															
 
																+               S_, R = get_env_feedback1(S, A)  # take action & get next state and reward
															
 
																+               q_predict = q_table.loc[S, A]
															
 
																+               print(q_table)
															
 
																+               q_target = R + GAMMA * q_table.loc[S_, :].max()  # next state is not terminal
															
 
																+               q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
															
 
																+               S = S_  # move to next state
															
 
																+               #update_env(S, episode, step_counter)
															
 
																+               #step_counter += 1
															
 
																+               if S==N_STATES[4]:
															
 
																+                  S=N_STATES[7]
															
 
																+           elif  S == N_STATES[6] or S == N_STATES[7] :
															
 
																+               A = choose_action2(S, q_table)
															
 
																+               S_, R = get_env_feedback2(S, A)  # take action & get next state and reward
															
 
																+               q_predict = q_table.loc[S, A]
															
 
																+               print(q_table)
															
 
																+               q_target = R + GAMMA * q_table.loc[S_, :].max()  # next state is not terminal
															
 
																+               q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
															
 
																+               S = S_  # move to next state
															
 
																+               if S == N_STATES[6]:
															
 
																+                 is_terminated = True
															
 
																+               #update_env(S, episode, step_counter )
															
 
																+    return q_table
															
 
																+
															
 
																+
															
 
																+def test_pid():
															
 
																+  global x
															
 
																+  step(1,7750)
															
 
																+  time.sleep(5)
															
 
																+  p_prev_error=0
															
 
																+  p_integral=0
															
 
																+  # ----------------------------------------
															
 
																+  #計算各關節角度
															
 
																+  while True:
															
 
																+    current= x
															
 
																+    p_error=320-current
															
 
																+    p_output=5.32*p_error+ 0.0*p_integral+0.00*(p_error - p_prev_error)
															
 
																+    p_integral +=p_error
															
 
																+    p_prev_error=p_error 
															
 
																+    if p_output>0:
															
 
																+           step(1,int(p_output))
															
 
																+    else:
															
 
																+           step(0,int(-1*p_output))
															
 
																+    time.sleep(0.1)
															
 
																+    
															
 
																+    
															
 
																+if __name__ == "__main__":
															
 
																+    client = mqtt.Client()
															
 
																+    client.username_pw_set(username='aisky-client', password='aiskyc')
															
 
																+    client.connect("60.250.156.234", 1883, 60)
															
 
																+    client.on_connect = on_connect
															
 
																+    client.on_message = on_message
															
 
																+    client.loop_start()
															
 
																+    a="T"
															
 
																+    if (a=="a"):
															
 
																+      q_table = rl()
															
 
																+      print('\r\nQ-table:\n')
															
 
																+      print(q_table)
															
 
																+      q_table.to_csv("/home/pi/pid.csv")
															
 
																+    else:
															
 
																+      test_pid()