Tuesday, July 31, 2018

Linux Clock - clk_hw_register_composite

note for myself:
clk_register_composite -> clk_hw_register_composite

struct clk *clk_register_composite(struct device *dev, const char *name,
   const char * const *parent_names, int num_parents,
   struct clk_hw *mux_hw, const struct clk_ops *mux_ops,
   struct clk_hw *rate_hw, const struct clk_ops *rate_ops,
   struct clk_hw *gate_hw, const struct clk_ops *gate_ops,
   unsigned long flags)
{
 struct clk_hw *hw;

 hw = clk_hw_register_composite(dev, name, parent_names, num_parents,
   mux_hw, mux_ops, rate_hw, rate_ops, gate_hw, gate_ops,
   flags);
 if (IS_ERR(hw))
  return ERR_CAST(hw);
 return hw->clk;
}

Key functions:
struct clk_hw *clk_hw_register_composite(struct device *dev, const char *name,
   const char * const *parent_names, int num_parents,
   struct clk_hw *mux_hw, const struct clk_ops *mux_ops,
   struct clk_hw *rate_hw, const struct clk_ops *rate_ops,
   struct clk_hw *gate_hw, const struct clk_ops *gate_ops,
   unsigned long flags)
{
 struct clk_hw *hw;
 struct clk_init_data init;
 struct clk_composite *composite;
 struct clk_ops *clk_composite_ops;
 int ret;

 composite = kzalloc(sizeof(*composite), GFP_KERNEL);
 if (!composite)
  return ERR_PTR(-ENOMEM);

 init.name = name;
 init.flags = flags | CLK_IS_BASIC;
 init.parent_names = parent_names;
 init.num_parents = num_parents;
 hw = &composite->hw;

 clk_composite_ops = &composite->ops;

 if (mux_hw && mux_ops) {
  if (!mux_ops->get_parent) {
   hw = ERR_PTR(-EINVAL);
   goto err;
  }

  composite->mux_hw = mux_hw;
  composite->mux_ops = mux_ops;
  clk_composite_ops->get_parent = clk_composite_get_parent;
  if (mux_ops->set_parent)
   clk_composite_ops->set_parent = clk_composite_set_parent;
  if (mux_ops->determine_rate)
   clk_composite_ops->determine_rate = clk_composite_determine_rate;
 }

 if (rate_hw && rate_ops) {
  if (!rate_ops->recalc_rate) {
   hw = ERR_PTR(-EINVAL);
   goto err;
  }
  clk_composite_ops->recalc_rate = clk_composite_recalc_rate;

  if (rate_ops->determine_rate)
   clk_composite_ops->determine_rate =
    clk_composite_determine_rate;
  else if (rate_ops->round_rate)
   clk_composite_ops->round_rate =
    clk_composite_round_rate;

  /* .set_rate requires either .round_rate or .determine_rate */
  if (rate_ops->set_rate) {
   if (rate_ops->determine_rate || rate_ops->round_rate)
    clk_composite_ops->set_rate =
      clk_composite_set_rate;
   else
    WARN(1, "%s: missing round_rate op is required\n",
      __func__);
  }

  composite->rate_hw = rate_hw;
  composite->rate_ops = rate_ops;
 }

 if (mux_hw && mux_ops && rate_hw && rate_ops) {
  if (mux_ops->set_parent && rate_ops->set_rate)
   clk_composite_ops->set_rate_and_parent =
   clk_composite_set_rate_and_parent;
 }

 if (gate_hw && gate_ops) {
  if (!gate_ops->is_enabled || !gate_ops->enable ||
      !gate_ops->disable) {
   hw = ERR_PTR(-EINVAL);
   goto err;
  }

  composite->gate_hw = gate_hw;
  composite->gate_ops = gate_ops;
  clk_composite_ops->is_enabled = clk_composite_is_enabled;
  clk_composite_ops->enable = clk_composite_enable;
  clk_composite_ops->disable = clk_composite_disable;
 }

 init.ops = clk_composite_ops;
 composite->hw.init = &init;

 ret = clk_hw_register(dev, hw);
 if (ret) {
  hw = ERR_PTR(ret);
  goto err;
 }

 if (composite->mux_hw)
  composite->mux_hw->clk = hw->clk;

 if (composite->rate_hw)
  composite->rate_hw->clk = hw->clk;

 if (composite->gate_hw)
  composite->gate_hw->clk = hw->clk;

 return hw;

err:
 kfree(composite);
 return hw;
}
Composite is rather a large piece, decode part by part.
Composite is combined of mux, rate and gate.
All these: mux, rate and gate are not to be worried as these structs are passed from outside to inside.

what these do:
clk_composite_ops->is_enabled = clk_composite_is_enabled;
clk_composite_ops->enable = clk_composite_enable;
clk_composite_ops->disable = clk_composite_disable;
A rough look at clk_composite, it is doing the clock gating.

example usage at drivers/clk/clk-stm32h7.c#L1323
  /* Register the 3 output dividers */
  for (odf = 0; odf < 3; odf++) {
   int idx = n * 3 + odf;

   get_cfg_composite_div(&odf_clk_gcfg, &stm32_odf[n][odf],
     &c_cfg, &stm32rcc_lock);

   hws[ODF_BANK + idx] = clk_hw_register_composite(NULL,
     stm32_odf[n][odf].name,
     stm32_odf[n][odf].parent_name,
     stm32_odf[n][odf].num_parents,
     c_cfg.mux_hw, c_cfg.mux_ops,
     c_cfg.div_hw, c_cfg.div_ops,
     c_cfg.gate_hw, c_cfg.gate_ops,
     stm32_odf[n][odf].flags);
  }

All the c_cfg information come from "stm32_odf", which is passed through "get_cfg_composite_div", while ops is assigned at "odf_clk_gcfg".
1.
static struct composite_clk_gcfg odf_clk_gcfg = {
 M_CFG_DIV(&odf_divider_ops, 0),
 M_CFG_GATE(&odf_gate_ops, 0),
};
2.
#define M_CFG_DIV(_rate_ops, _rate_flags)\ .div = &(struct composite_clk_gcfg_t) {_rate_flags, _rate_ops} #define M_CFG_GATE(_gate_ops, _gate_flags)\ .gate = &(struct composite_clk_gcfg_t) { _gate_flags, _gate_ops}
3.
/* * General config definition of a composite clock (only clock diviser for rate) */ struct composite_clk_gcfg { struct composite_clk_gcfg_t *mux; struct composite_clk_gcfg_t *div; struct composite_clk_gcfg_t *gate; };

"stm32_odf"

#define M_ODF_F(_name, _parent, _gate_offset,  _bit_idx, _rate_offset,\
  _rate_shift, _rate_width, _flags)\
{\
 .mux = NULL,\
 .div = &(struct muxdiv_cfg) {_rate_offset, _rate_shift, _rate_width},\
 .gate = &(struct gate_cfg) {_gate_offset, _bit_idx },\
 .name = _name,\
 .parent_name = &(const char *) {_parent},\
 .num_parents = 1,\
 .flags = _flags,\
}

static const struct composite_clk_cfg stm32_odf[3][3] = {
 {
  M_ODF_F("pll1_p", "vco1", RCC_PLLCFGR, 16, RCC_PLL1DIVR,  9, 7,
    CLK_IGNORE_UNUSED),
  M_ODF_F("pll1_q", "vco1", RCC_PLLCFGR, 17, RCC_PLL1DIVR, 16, 7,
    CLK_IGNORE_UNUSED),
  M_ODF_F("pll1_r", "vco1", RCC_PLLCFGR, 18, RCC_PLL1DIVR, 24, 7,
    CLK_IGNORE_UNUSED),
 },
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
_gate_offset,  _bit_idx == RCC_PLLCFGR, 16:
Bit 16 DIVP1EN: PLL1 DIVP divider output enable
Set and reset by software to enable the pll1_p_ck output of the PLL1.
0: pll1_p_ck output is disabled
1: pll1_p_ck output is enabled (default after reset)
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
_rate_offset, _rate_shift, _rate_width == RCC_PLL1DIVR,  9, 7:
Bits 15:9 DIVP1[6:0]: PLL1 DIVP division factor
Set and reset by software to control the frequency of the pll1_p_ck clock.
0000000: Not allowed
0000001: pll1_p_ck = vco1_ck / 2 (default after reset)
0000010: Not allowed
0000011: pll1_p_ck = vco1_ck / 4
...
1111111: pll1_p_ck = vco1_ck / 128
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define CLK_IGNORE_UNUSED BIT(3) /* do not gate even if unused */
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

Just the particular function's register and bit information are passed inside. Cannot find the settings.

I found one  PLLs Initialization Flowchart


Seem like it is registering the PLL clock, which is consisted of gate and div. Still not sure where the actual settings are done. Maybe in uboot side - i guess.


Sunday, July 22, 2018

Linux Clock - clk_hw_register_mux_table

note for myself:

For mux, I still dont understand most of its.
The frame:
/**
 * struct clk_mux - multiplexer clock
 *
 * @hw:  handle between common and hardware-specific interfaces
 * @reg: register controlling multiplexer
 * @table: array of register values corresponding to the parent index
 * @shift: shift to multiplexer bit field
 * @mask: mask of mutliplexer bit field
 * @flags: hardware-specific flags
 * @lock: register lock
 *
 * Clock with multiple selectable parents.  Implements .get_parent, .set_parent
 * and .recalc_rate
 *
 * Flags:
 * CLK_MUX_INDEX_ONE - register index starts at 1, not 0
 * CLK_MUX_INDEX_BIT - register index is a single bit (power of two)
 * CLK_MUX_HIWORD_MASK - The mux settings are only in lower 16-bit of this
 * register, and mask of mux bits are in higher 16-bit of this register.
 * While setting the mux bits, higher 16-bit should also be updated to
 * indicate changing mux bits.
 * CLK_MUX_ROUND_CLOSEST - Use the parent rate that is closest to the desired
 * frequency.
 */
struct clk_mux {
 struct clk_hw hw;
 void __iomem *reg;
 u32  *table;
 u32  mask;
 u8  shift;
 u8  flags;
 spinlock_t *lock;
};

#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)

#define CLK_MUX_INDEX_ONE  BIT(0)
#define CLK_MUX_INDEX_BIT  BIT(1)
#define CLK_MUX_HIWORD_MASK  BIT(2)
#define CLK_MUX_READ_ONLY  BIT(3) /* mux can't be changed */
#define CLK_MUX_ROUND_CLOSEST  BIT(4)

extern const struct clk_ops clk_mux_ops;
extern const struct clk_ops clk_mux_ro_ops;

struct clk *clk_register_mux(struct device *dev, const char *name,
  const char * const *parent_names, u8 num_parents,
  unsigned long flags,
  void __iomem *reg, u8 shift, u8 width,
  u8 clk_mux_flags, spinlock_t *lock);
struct clk_hw *clk_hw_register_mux(struct device *dev, const char *name,
  const char * const *parent_names, u8 num_parents,
  unsigned long flags,
  void __iomem *reg, u8 shift, u8 width,
  u8 clk_mux_flags, spinlock_t *lock);

struct clk *clk_register_mux_table(struct device *dev, const char *name,
  const char * const *parent_names, u8 num_parents,
  unsigned long flags,
  void __iomem *reg, u8 shift, u32 mask,
  u8 clk_mux_flags, u32 *table, spinlock_t *lock);
struct clk_hw *clk_hw_register_mux_table(struct device *dev, const char *name,
  const char * const *parent_names, u8 num_parents,
  unsigned long flags,
  void __iomem *reg, u8 shift, u32 mask,
  u8 clk_mux_flags, u32 *table, spinlock_t *lock);

int clk_mux_val_to_index(struct clk_hw *hw, u32 *table, unsigned int flags,
    unsigned int val);
unsigned int clk_mux_index_to_val(u32 *table, unsigned int flags, u8 index);

void clk_unregister_mux(struct clk *clk);
void clk_hw_unregister_mux(struct clk_hw *hw);

And from "drivers/clk/clk-mux.c"
clk_register_mux -> clk_register_mux_table -> clk_hw_register_mux_table
clk_hw_register_mux ->  clk_hw_register_mux_table

Let see the differences:
struct clk *clk_register_mux_table(struct device *dev, const char *name,
  const char * const *parent_names, u8 num_parents,
  unsigned long flags,
  void __iomem *reg, u8 shift, u32 mask,
  u8 clk_mux_flags, u32 *table, spinlock_t *lock)
{
 struct clk_hw *hw;

 hw = clk_hw_register_mux_table(dev, name, parent_names, num_parents,
           flags, reg, shift, mask, clk_mux_flags,
           table, lock);
 if (IS_ERR(hw))
  return ERR_CAST(hw);
 return hw->clk;
}
EXPORT_SYMBOL_GPL(clk_register_mux_table);

struct clk *clk_register_mux(struct device *dev, const char *name,
  const char * const *parent_names, u8 num_parents,
  unsigned long flags,
  void __iomem *reg, u8 shift, u8 width,
  u8 clk_mux_flags, spinlock_t *lock)
{
 u32 mask = BIT(width) - 1;

 return clk_register_mux_table(dev, name, parent_names, num_parents,
          flags, reg, shift, mask, clk_mux_flags,
          NULL, lock);
}
EXPORT_SYMBOL_GPL(clk_register_mux);

struct clk_hw *clk_hw_register_mux(struct device *dev, const char *name,
  const char * const *parent_names, u8 num_parents,
  unsigned long flags,
  void __iomem *reg, u8 shift, u8 width,
  u8 clk_mux_flags, spinlock_t *lock)
{
 u32 mask = BIT(width) - 1;

 return clk_hw_register_mux_table(dev, name, parent_names, num_parents,
          flags, reg, shift, mask, clk_mux_flags,
          NULL, lock);
}
EXPORT_SYMBOL_GPL(clk_hw_register_mux);

clk_register_mux  and clk_hw_register_mux:
do "u32 mask = BIT(width) - 1;"
and passing NULL to xxx_register_mux_table

clk_register_mux_table and clk_hw_register_mux_table:
almost the same, but clk_register_mux_table do error checking.

I should study "clk_hw_register_mux_table" is the function doing all the work
struct clk_hw *clk_hw_register_mux_table(struct device *dev, const char *name,
  const char * const *parent_names, u8 num_parents,
  unsigned long flags,
  void __iomem *reg, u8 shift, u32 mask,
  u8 clk_mux_flags, u32 *table, spinlock_t *lock)
{
 struct clk_mux *mux;
 struct clk_hw *hw;
 struct clk_init_data init;
 u8 width = 0;
 int ret;

 if (clk_mux_flags & CLK_MUX_HIWORD_MASK) {
  width = fls(mask) - ffs(mask) + 1;
  if (width + shift > 16) {
   pr_err("mux value exceeds LOWORD field\n");
   return ERR_PTR(-EINVAL);
  }
 }

 /* allocate the mux */
 mux = kzalloc(sizeof(*mux), GFP_KERNEL);
 if (!mux)
  return ERR_PTR(-ENOMEM);

 init.name = name;
 if (clk_mux_flags & CLK_MUX_READ_ONLY)
  init.ops = &clk_mux_ro_ops;
 else
  init.ops = &clk_mux_ops;
 init.flags = flags | CLK_IS_BASIC;
 init.parent_names = parent_names;
 init.num_parents = num_parents;

 /* struct clk_mux assignments */
 mux->reg = reg;
 mux->shift = shift;
 mux->mask = mask;
 mux->flags = clk_mux_flags;
 mux->lock = lock;
 mux->table = table;
 mux->hw.init = &init;

 hw = &mux->hw;
 ret = clk_hw_register(dev, hw);
 if (ret) {
  kfree(mux);
  hw = ERR_PTR(ret);
 }

 return hw;
}
EXPORT_SYMBOL_GPL(clk_hw_register_mux_table);
Copy everything here for better viewing.
there are two ops, this code is self-explainable.
if (clk_mux_flags & CLK_MUX_READ_ONLY)
init.ops = &clk_mux_ro_ops;
else
init.ops = &clk_mux_ops;


const struct clk_ops clk_mux_ops = {
 .get_parent = clk_mux_get_parent,
 .set_parent = clk_mux_set_parent,
 .determine_rate = clk_mux_determine_rate,
};
EXPORT_SYMBOL_GPL(clk_mux_ops);

const struct clk_ops clk_mux_ro_ops = {
 .get_parent = clk_mux_get_parent,
};
EXPORT_SYMBOL_GPL(clk_mux_ro_ops);

clk_mux_set_parent:
static int clk_mux_set_parent(struct clk_hw *hw, u8 index)
{
 struct clk_mux *mux = to_clk_mux(hw);
 u32 val = clk_mux_index_to_val(mux->table, mux->flags, index);
 unsigned long flags = 0;
 u32 reg;

 if (mux->lock)
  spin_lock_irqsave(mux->lock, flags);
 else
  __acquire(mux->lock);

 if (mux->flags & CLK_MUX_HIWORD_MASK) {
  reg = mux->mask << (mux->shift + 16);
 } else {
  reg = clk_readl(mux->reg);
  reg &= ~(mux->mask << mux->shift);
 }
 val = val << mux->shift;
 reg |= val;
 clk_writel(reg, mux->reg);

 if (mux->lock)
  spin_unlock_irqrestore(mux->lock, flags);
 else
  __release(mux->lock);

 return 0;
}

clk_mux_index_to_val:

unsigned int clk_mux_index_to_val(u32 *table, unsigned int flags, u8 index)
{
 unsigned int val = index;

 if (table) {
  val = table[index];
 } else {
  if (flags & CLK_MUX_INDEX_BIT)
   val = 1 << index;

  if (flags & CLK_MUX_INDEX_ONE)
   val++;
 }

 return val;
}
EXPORT_SYMBOL_GPL(clk_mux_index_to_val);
4 ways to obtain the val:
1. from predefined table val = table[index];
2. one bit mask is corresponding to one source
3. value + 1 corresponded to one clock source
4. value directly read out from register

After the val is obtained, it is normal write to particular register.
val = val << mux->shift;
reg |= val;
clk_writel(reg, mux->reg);

clk_mux_get_parent:
now is how to get the parent clock.

static u8 clk_mux_get_parent(struct clk_hw *hw)
{
 struct clk_mux *mux = to_clk_mux(hw);
 u32 val;

 val = clk_readl(mux->reg) >> mux->shift;
 val &= mux->mask;

 return clk_mux_val_to_index(hw, mux->table, mux->flags, val);
}

// all information is at clk_mux_val_to_index

int clk_mux_val_to_index(struct clk_hw *hw, u32 *table, unsigned int flags,
    unsigned int val)
{
 int num_parents = clk_hw_get_num_parents(hw);

 if (table) {
  int i;

  for (i = 0; i < num_parents; i++)
   if (table[i] == val)
    return i;
  return -EINVAL;
 }

 if (val && (flags & CLK_MUX_INDEX_BIT))
  val = ffs(val) - 1;

 if (val && (flags & CLK_MUX_INDEX_ONE))
  val--;

 if (val >= num_parents)
  return -EINVAL;

 return val;
}
EXPORT_SYMBOL_GPL(clk_mux_val_to_index);
still 3 ways to do the mapping:
1. get the val from table - easiest to understand
2. return the least significant index bit value, for example, bit4 is passed to ffs, ffs will return 5. "val = ffs(val) - 1" = 4.
3. register index starts at 1, not 0.
4. register index starts at 0.

I need to read reference code to understand how to use this mux register.

Linux Clock - clk_hw_register_custom_function

note for myself:

Driver clk-stm32h7.c is creating own clock function - clk_register_ready_gate
static struct clk_hw *clk_register_ready_gate(struct device *dev,
  const char *name, const char *parent_name,
  void __iomem *reg, u8 bit_idx, u8 bit_rdy,
  unsigned long flags, spinlock_t *lock)
{
 struct stm32_ready_gate *rgate;
 struct clk_init_data init = { NULL };
 struct clk_hw *hw;
 int ret;

 rgate = kzalloc(sizeof(*rgate), GFP_KERNEL);
 if (!rgate)
  return ERR_PTR(-ENOMEM);

 init.name = name;
 init.ops = &ready_gate_clk_ops;
 init.flags = flags;
 init.parent_names = &parent_name;
 init.num_parents = 1;

 rgate->bit_rdy = bit_rdy;
 rgate->gate.lock = lock;
 rgate->gate.reg = reg;
 rgate->gate.bit_idx = bit_idx;
 rgate->gate.hw.init = &init;

 hw = &rgate->gate.hw;
 ret = clk_hw_register(dev, hw);
 if (ret) {
  kfree(rgate);
  hw = ERR_PTR(ret);
 }

 return hw;
}

clk_hw_register is the common function to register clock.

what functions needed to create your own clock function.
static const struct clk_ops
static const struct clk_ops ready_gate_clk_ops = {
 .enable  = ready_gate_clk_enable,
 .disable = ready_gate_clk_disable,
 .is_enabled = clk_gate_is_enabled,
};
enable, disable, is_enabled are needed.

Look at ready_gate_clk_enable
static int ready_gate_clk_enable(struct clk_hw *hw)
{
 struct clk_gate *gate = to_clk_gate(hw);
 struct stm32_ready_gate *rgate = to_ready_gate_clk(gate);
 int bit_status;
 unsigned int timeout = RGATE_TIMEOUT;

 if (clk_gate_ops.is_enabled(hw))
  return 0;

 clk_gate_ops.enable(hw);

 /* We can't use readl_poll_timeout() because we can blocked if
  * someone enables this clock before clocksource changes.
  * Only jiffies counter is available. Jiffies are incremented by
  * interruptions and enable op does not allow to be interrupted.
  */
 do {
  bit_status = !(readl(gate->reg) & BIT(rgate->bit_rdy));

  if (bit_status)
   udelay(100);

 } while (bit_status && --timeout);

 return bit_status;
}

it is checking
1. whether the clock is enabled or not,
2. enable the clock
3. check the register for particular bit, whether the particular bit is ready.

Same goes to ready_gate_clk_disable:
static void ready_gate_clk_disable(struct clk_hw *hw)
{
 struct clk_gate *gate = to_clk_gate(hw);
 struct stm32_ready_gate *rgate = to_ready_gate_clk(gate);
 int bit_status;
 unsigned int timeout = RGATE_TIMEOUT;

 if (!clk_gate_ops.is_enabled(hw))
  return;

 clk_gate_ops.disable(hw);

 do {
  bit_status = !!(readl(gate->reg) & BIT(rgate->bit_rdy));

  if (bit_status)
   udelay(100);

 } while (bit_status && --timeout);
}

Almost he same
1. whether the clock is disable or not,
2. disable the clock
3. check the register for particular bit, whether the particular bit is ready.

one thing to take note, in
ready_gate_clk_enable,
ready_gate_clk_disable,
it is using existing "clk_gate_ops.disable(hw);" and "clk_gate_ops.enable(hw);" to disable and enable clock.
However it is adding a layer of checking to check whether the clock is ready or not.

Erm, now, where does "clk_register_ready_gate" is used.

 hws[HSE_CK] = clk_register_ready_gate(NULL,
    "hse_ck",
    hse_clk,
    RCC_CR + base,
    16, 17,
    0,
    &stm32rcc_lock);

 hws[LSE_CK] = clk_register_ready_gate(NULL,
    "lse_ck",
    lse_clk,
    RCC_BDCR + base,
    0, 1,
    0,
    &stm32rcc_lock);

Go read databook for RCC_CR and RCC_BDCR.
RCC_CR:
Bit 17 HSERDY: HSE clock ready flag
0: HSE clock is not ready (default after reset)
1: HSE clock is ready
Bit 16 HSEON: HSE clock enable
0: HSE is OFF (default after reset)
1: HSE is ON

RCC_BDCR:
Bit 1 LSERDY: LSE oscillator ready
0: LSE oscillator not ready (default after backup domain reset)
1: LSE oscillator ready
Bit 0 LSEON: LSE oscillator enabled
0: LSE oscillator OFF (default after backup domain reset)
1: LSE oscillator ON

From here, we know that this function "clk_register_ready_gate" still performing normal clock gating using default gate-clock framework, but it is adding a wait function to allow the clock to stabilize. After the clock is stabilized, it is indicated by ready_bit.

ah.., I couldnt understand the code without databook.
Most drivers at linux mainline doesnt provide databook to public.
So I am thankful to have MCU manufacturer to open out the databook.
STM is the one to thank in this case. Thank you.