allen
/
Q-learning-PID


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
							#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
A simple example for Reinforcement Learning using table lookup Q-learning method.
An agent "o" is on the left of a 1 dimensional world, the treasure is on the rightmost location.
Run this program and to see how the agent will improve its strategy of finding the treasure.

View more on my tutorial page: https://morvanzhou.github.io/tutorials/
"""
import numpy as np
import pandas as pd
import time
import smbus
import math
import sympy
from sympy import asin, cos, sin, acos,tan ,atan
#from DFRobot_RaspberryPi_A02YYUW import DFRobot_A02_Distance as Board
import paho.mqtt.client as mqtt
import json

np.random.seed(2)  # reproducible


N_STATES = ['raise_time<1','1<=raise_time<2','2<=raise_time<4','raise_time>4','0<=overshoot<0.33','0.33<overshoot<1','10<=setingtime<20','20<=setingtime<30']  ## 1:time<5      2:5.05<time<5.25    3:5.25<time<5.5  4:time>5.5
goal=320    #goal
ACTIONS = ['kp+1','kp+0.1','kp+0.01', 'kp+0','kp-0.01','kp-0.1','kp-1','ki+0.1','ki+0.01', 'ki+0','ki-0.01','ki-0.1','kd+0.1', 'kd+0','kd-0.1']     # available actions
EPSILON = 0.9   # greedy police
ALPHA = 0.1     # learning rate
GAMMA = 0.9    # discount factor
MAX_EPISODES =1 # maximum episodes
FRESH_TIME = 0.1    # fresh time for one move

kp=0.0
ki=0.0
kd=0.0
count=50
x=0
jsonmsg=""
pwm_1=1600
S_=""
p_prev_error=0
p_integral=0
def on_connect(client, userdata, flags, rc):
    print("Connected with result code " + str(rc))
    client.subscribe("b8:27:eb:eb:21:13/Log", qos=2)

    # 當接收到從伺服器發送的訊息時要進行的動作


def on_message(client, userdata, msg):
    # 轉換編碼utf-8才看得懂中文
    global  jsonmsg
    global  x
    msg.payload = msg.payload.decode('utf-8')
    jsonmsg = json.loads(msg.payload)
    x=int(jsonmsg['x'])
class PCA9685:

  # Registers/etc.
  __SUBADR1            = 0x02
  __SUBADR2            = 0x03
  __SUBADR3            = 0x04
  __MODE1              = 0x00
  __PRESCALE           = 0xFE
  __LED0_ON_L          = 0x06
  __LED0_ON_H          = 0x07
  __LED0_OFF_L         = 0x08
  __LED0_OFF_H         = 0x09
  __ALLLED_ON_L        = 0xFA
  __ALLLED_ON_H        = 0xFB
  __ALLLED_OFF_L       = 0xFC
  __ALLLED_OFF_H       = 0xFD

  def __init__(self, address=0x60, debug=False):
      self.bus = smbus.SMBus(1)
      self.address = address
      self.debug = debug
      if (self.debug):
          print("Reseting PCA9685")
      self.write(self.__MODE1, 0x00)

  def write(self, reg, value):
      "Writes an 8-bit value to the specified register/address"
      self.bus.write_byte_data(self.address, reg, value)
      if (self.debug):
          print("I2C: Write 0x%02X to register 0x%02X" % (value, reg))

  def read(self, reg):
      "Read an unsigned byte from the I2C device"
      result = self.bus.read_byte_data(self.address, reg)
      if (self.debug):
          print("I2C: Device 0x%02X returned 0x%02X from reg 0x%02X" % (self.address, result & 0xFF, reg))
      return result

  def setPWMFreq(self, freq):
      "Sets the PWM frequency"
      prescaleval = 25000000.0  # 25MHz
      prescaleval /= 4096.0  # 12-bit
      prescaleval /= float(freq)
      prescaleval -= 1.0
      if (self.debug):
          print("Setting PWM frequency to %d Hz" % freq)
          print("Estimated pre-scale: %d" % prescaleval)
      prescale = math.floor(prescaleval + 0.5)
      if (self.debug):
          print("Final pre-scale: %d" % prescale)

      oldmode = self.read(self.__MODE1);
      newmode = (oldmode & 0x7F) | 0x10  # sleep
      self.write(self.__MODE1, newmode)  # go to sleep
      self.write(self.__PRESCALE, int(math.floor(prescale)))
      self.write(self.__MODE1, oldmode)
      time.sleep(0.005)
      self.write(self.__MODE1, oldmode | 0x80)

  def setPWM(self, channel, on, off):
      "Sets a single PWM channel"
      self.write(self.__LED0_ON_L + 4 * channel, on & 0xFF)
      self.write(self.__LED0_ON_H + 4 * channel, on >> 8)
      self.write(self.__LED0_OFF_L + 4 * channel, off & 0xFF)
      self.write(self.__LED0_OFF_H + 4 * channel, off >> 8)
      if (self.debug):
          print("channel: %d  LED_ON: %d LED_OFF: %d" % (channel, on, off))

  def setServoPulse(self, channel, pulse):
      "Sets the Servo Pulse,The PWM frequency must be 50HZ"
      pulse = pulse * 4096 / 20000  # PWM frequency is 50HZ,the period is 20000us
      self.setPWM(channel, 0, int(pulse))
      
class PIDController:
    def __init__(self, Kp, Ki, Kd):
        self.Kp = Kp
        self.Ki = Ki
        self.Kd = Kd
        self.last_error = 0
        self.integral = 0

    def control(self, error):
        output = self.Kp * error + self.Ki * self.integral + self.Kd * (error - self.last_error)
        self.integral += error
        self.last_error = error 
        return output
    
   
class Any_System:
    def __init__(self, goal):
        self.target = goal
        self.current = 0

    def update(self, control_singal):
        self.current += control_singal
        return self.current

    def get_error(self):
        return self.target - self.current

def train(controller,system,num_iterations):
    global  jsonmsg
    global x
    global S_,R
    errors=[]
    raise_time=0
    cont=0
    for _ in range(num_iterations):
        #error = system.get_error()
        raise_time+=1
        current=x
        print(raise_time,current)
        error=system.target-current # 真實訊號
        output=controller.control(error)
        current+= output
        pwm_3=0.6156*current+1396
        if(pwm_3<1500):
               pwm_3=1500
        elif(pwm_3>1750):
               pwm_3=1750        
        pwm.setServoPulse(14,pwm_3) #底部馬達置中  
        time.sleep(0.5)
        #control_signal=controller.control(error)
        #current=system.update(control_signal)
        if ((current-system.target)>0):
            cont=raise_time
            break
        else:
            cont=50
    if cont<=10:
                S_= N_STATES[0]
                R=5
                print('raise_time success')
    elif (10<cont) and (cont<=20):
                S_= N_STATES[1]
                R=2
    elif (20< cont) and (cont <= 40):
                S_= N_STATES[2]
                R=1
    else:
                S_= N_STATES[3]
                R=1.5-(error/100)
    return  S_,R

def train2(controller,system,num_iterations):
    global x
    global S_,R
    errors=[]
    current_arr=[]
    ot=0
    over_time=0
    over_shoot=0
    for _ in range(num_iterations):
        #error = system.get_error()
        #current = 23.5 - (board.getDistance() / 10)
        current = x
        value=current
        error = system.target - current  # 真實訊號
        output=controller.control(error)
        current+= output
        pwm_3=0.6156*current+1396
        if(pwm_3<1500):
               pwm_3=1500
        elif(pwm_3>1750):
               pwm_3=1750        
        pwm.setServoPulse(14,pwm_3) #底部馬達置中  
        time.sleep(0.5)
        over_time+=1
        if(value>ot):
           ot=value
        print(over_time,ot)
    over_shoot=float(abs(ot-system.target))/320
    print("overshoot",str(over_shoot))
    if over_shoot>=0 and over_shoot < 0.0625:
        print('overshoot success')
        S_ = N_STATES[4]
        R = 5
    elif (0.0625 <= over_shoot) and (over_shoot <1):
        S_ = N_STATES[5]
        R = -1*over_shoot
    else: 
        S_ = N_STATES[0]
        R =  0
    return S_, R

def train3(controller,system,num_iterations):
    global x
    global S_,R
    errors=[]
    cont=0
    setingtime=0
    con=0
    print("3")
    for _ in range(num_iterations):
        cont=cont+1
        current = x
        error = system.target - current  # 真實訊號
        output=controller.control(error)
        current+= output
        pwm_3=0.6156*current+1396
        if(pwm_3<1500):
               pwm_3=1500
        elif(pwm_3>1750):
               pwm_3=1750        
        pwm.setServoPulse(14,pwm_3) #底部馬達置中  
        time.sleep(0.5)
        print(cont,error)
        
        if ((-1*error)>=0) and ((-1*error)<=5) and (con==0):
            setingtime =cont
            con=1
        elif(con==0):
            setingtime=40
    print(setingtime)
    if setingtime>=0 and setingtime < 10:
        S_ = N_STATES[6]
        R = 10
        print('setingtime success')
        with open('pid.txt', 'a') as f:
            f.write('kp:')
            f.write(str(controller.Kp))
            f.write('ki:')
            f.write(str(controller.Ki))
            f.write('kd:')
            f.write(str(controller.Kd))
            f.write('\r\n')
    elif (10 <= setingtime) and (setingtime <= 40):
        S_ = N_STATES[7]
        R = 1.5 -1*(error/100)
    else:
        S_ = N_STATES[4]
        R = -1
    return S_, R

def build_q_table(n_states, actions):
    try:
      table = pd.read_csv("/home/pi/pid.csv",index_col=0)
    except:
     table = pd.DataFrame(
        np.zeros((len(n_states), len(actions))),     # q_table initial values
        columns=actions, index=n_states,   # actions's name
     )
    print(table)    # show table
    return table


def choose_action(state, q_table):
    # This is how to choose an action
    state_actions = q_table.loc[state, :]
    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
        ACT = ['kp+1', 'kp+0.1', 'kp+0.01', 'kp+0', 'kp-0.01', 'kp-0.1', 'kp-1']
        action_name = np.random.choice(ACT)
    else:   # act greedy
         action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas
    return action_name

def choose_action1(state, q_table):
    # This is how to choose an action
    state_actions = q_table.loc[state, :]
    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
        ACT = ['kp+1', 'kp+0.1', 'kp+0.01', 'kp+0', 'kp-0.01', 'kp-0.1', 'kp-1']
        action_name = np.random.choice(ACT)
    else:   # act greedy
        action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas
    return action_name

def choose_action2(state, q_table):
    # This is how to choose an action
    state_actions = q_table.loc[state, :]
    print("3")
    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
        ACT = [ 'ki+0.1', 'ki+0.01', 'ki+0', 'ki-0.1', 'ki-0.01']
        action_name = np.random.choice(ACT)
    else:   # act greedy
        action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas
    print(action_name)    
    return action_name

def pid(kp):
    global  goal,count
    global S_
    R=0
    print("raisetime")
    pid_controller = PIDController(kp,0.0,0.0)
    any_system = Any_System(goal)
    S_,R = train(pid_controller, any_system,count)
    print('kp:',kp)
    return  S_,R

def pid1(kp):
    global S_
    R=0
    print("overshoot")
    pid_controller = PIDController(kp, 0.0, 0.0)
    any_system = Any_System(goal)
    S_, R = train2(pid_controller, any_system, count)
    print('kp:', kp)
    return  S_,R

def pid2(kp,ki,kd):
    global S_
    R=0
    print("setingtime")
    pid_controller = PIDController(kp,ki,kd)
    any_system = Any_System(goal)
    S_, R = train3(pid_controller, any_system, count)
    print('kp:', kp,'ki',ki,'kd',kd)
    return  S_,R

def get_env_feedback(S, A):
    # This is how agent will interact with the environment
    global kp,S_
    R=0
    if A == 'kp+1':    # move right
          kp+=1
          S_,R=pid(kp)
    elif A == 'kp+0.1':    # move right
          kp+=0.1
          S_,R=pid(kp)
    elif A == 'kp+0.01':    # move right
          kp+=0.01
          S_,R=pid(kp)
    elif A=='kp+0':
          kp=kp+0
          S_,R= pid(kp)
    elif A == 'kp-0.01':    # move right
          kp-=0.01
          S_,R=pid(kp)
    elif A == 'kp-0.1':    # move right
          kp-=0.1
          S_,R=pid(kp)
    elif A == 'kp-1':
          kp-=1
          S_,R= pid(kp)
          
    return S_, R

def get_env_feedback1(S, A):
    # This is how agent will interact with the environment
    global kp,S_
    R=0
    if A == 'kp+1':    # move right
          kp+=1
          S_,R=pid1(kp)
    elif A == 'kp+0.1':    # move right
          kp+=0.1
          S_,R=pid1(kp)
    elif A == 'kp+0.01':    # move right
          kp+=0.01
          S_,R=pid1(kp)
    elif A=='kp+0':
          kp=kp+0
          S_,R= pid1(kp)
    elif A == 'kp-0.01':    # move right
          kp-=0.01
          S_,R=pid1(kp)
    elif A == 'kp-0.1':    # move right
          kp-=0.1
          S_,R=pid1(kp)
    elif A == 'kp-1':    # move right
          kp-=1
          S_,R=pid1(kp)
    return S_, R

def get_env_feedback2(S, A):
    # This is how agent will interact with the environment
    global ki
    global kp
    global kd,S_
    R=0
    if A == 'ki+0.1':    # move right
          ki+=0.1
          S_,R=pid2(kp,ki,kd)
    elif A == 'ki+0.01':    # move right
          ki+=0.01
          S_,R=pid2(kp,ki,kd)
    elif A=='ki+0':
          ki=ki+0
          S_,R= pid2(kp,ki,kd)
    elif A == 'ki-0.1':    # move right
          ki-=0.1
          S_,R=pid2(kp,ki,kd)
    elif A == 'ki-0.01':    # move right
          ki-=0.01
          S_,R=pid2(kp,ki,kd)
    elif A == 'kd+0.1':    # move right
          kd+=0.1
          S_,R=pid2(kp,ki,kd)
    elif A=='kd+0':
          kd=kd+0
          S_,R= pid2(kp,ki,kd)
    elif A == 'kd-0.1':    # move right
          kd-=0.1
          S_,R=pid2(kp,ki,kd)
    return S_, R

def update_env(S, episode, step_counter):
    # This is how environment be updated
    interaction = 'Episode %s: raise_time= %s' % (episode + 1,S)
    #print('\r{}'.format(interaction), end='')
    #print('Episode %s: raise_time= %s\r\n' % (episode + 1,S))


def rl():
    global  x,y,z
    # main part of RL loop
    q_table = build_q_table(N_STATES, ACTIONS)
    for episode in range(MAX_EPISODES):
        S = N_STATES[3]
        is_terminated = False
        pwm.setServoPulse(14, 1600)  # 底部馬達置中
        time.sleep(5)
        while not is_terminated:
           #update_env(S, episode, step_counter)
           if S==N_STATES[3] or S==N_STATES[2] or S==N_STATES[1] or S==N_STATES[0]:
              pwm.setServoPulse(14, 1600)  # 底部馬達置中
              time.sleep(3)
              A = choose_action(S, q_table)
              S_, R = get_env_feedback(S, A)  # take action & get next state and reward
              q_predict = q_table.loc[S, A]
              q_target = R + GAMMA * q_table.loc[S_, :].max()   # next state is not terminal
              q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
              print(q_table)
              S = S_  # move to next state
              #update_env(S, episode, step_counter)
              #step_counter += 1
              if S==N_STATES[0]:
                  S=N_STATES[5]
           elif  S == N_STATES[4] or S == N_STATES[5]:
               pwm.setServoPulse(14, 1600)  # 底部馬達置中
               time.sleep(3)
               A = choose_action1(S, q_table)
               S_, R = get_env_feedback1(S, A)  # take action & get next state and reward
               q_predict = q_table.loc[S, A]
               print(q_table)
               q_target = R + GAMMA * q_table.loc[S_, :].max()  # next state is not terminal
               q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
               S = S_  # move to next state
               #update_env(S, episode, step_counter)
               #step_counter += 1
               if S==N_STATES[4]:
                  S=N_STATES[7]
           elif  S == N_STATES[6] or S == N_STATES[7] :
               pwm.setServoPulse(14, 1600)  # 底部馬達置中
               print("ok")
               time.sleep(3)
               A = choose_action2(S, q_table)
               S_, R = get_env_feedback2(S, A)  # take action & get next state and reward
               q_predict = q_table.loc[S, A]
               print(q_table)
               q_target = R + GAMMA * q_table.loc[S_, :].max()  # next state is not terminal
               q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
               S = S_  # move to next state
               if S == N_STATES[6]:
                 is_terminated = True
               #update_env(S, episode, step_counter )
    return q_table


def coordinate_pwm_pid():
  global p_prev_error,p_integral,x
  # ----------------------------------------
  #計算各關節角度
  while True:
    current= x
    p_error=320-current
    p_output=2.0599999999999987*p_error+ 0.1*p_integral+0.00*(p_error - p_prev_error)
    p_integral +=p_error
    p_prev_error=p_error 
    current+= p_output 
    pwm_1=0.6156*current+1396
    print(pwm_1)
    if(pwm_1<1500):
        pwm_1=1500
    elif(pwm_1>1750):
        pwm_1=1750    
    pwm.setServoPulse(14,pwm_1)
    time.sleep(0.1)
    
if __name__ == "__main__":
    pwm = PCA9685(0x60, debug=False)
    pwm.setPWMFreq(50)
    #board = Board()
    #dis_min = 0   #Minimum ranging threshold: 0mm
    #dis_max = 4500 #Highest ranging threshold: 4500mm
    #board.set_dis_range(dis_min, dis_max)
    client = mqtt.Client()
    client.username_pw_set(username='aisky-client', password='aiskyc')
    client.connect("60.250.156.234", 1883, 60)
    client.on_connect = on_connect
    client.on_message = on_message
    client.loop_start()
    #a = raw_input("input:")
    a="t"
    if (a=="a"):
      q_table = rl()
      print('\r\nQ-table:\n')
      print(q_table)
      q_table.to_csv("/home/pi/pid.csv")
    else:
      coordinate_pwm_pid()