rv1126-uboot/drivers/mmc/dw_mmc.c

690 lines
16 KiB
C
Raw Normal View History

/*
* (C) Copyright 2012 SAMSUNG Electronics
* Jaehoon Chung <jh80.chung@samsung.com>
* Rajeshawari Shinde <rajeshwari.s@samsung.com>
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <bouncebuf.h>
#include <common.h>
#include <errno.h>
#include <malloc.h>
#include <memalign.h>
#include <mmc.h>
#include <dwmmc.h>
#define PAGE_SIZE 4096
mmc: dwmmc: Add stride PIO for better burst mode support This patch enables stride PIO for better burst mode support on some rockchip platforms, for instance rk3128, etc. It uses ldm and stm to make multiple accessing from CPU under some ARM architecture. Design Note: 1) Carefully test it beyond the armv7(m) core since I don't test them! If you disassemble the code, dwmci_memcpy_fromio may be changed to use ldmia.w and stmia.w, as well as some other optimization for push/pop stack operation, but they are all fine! 2) Do *NOT* remove noinline and __volatile attributes as you could fall into trouble by the optimization of GCC. And please invent new attributes if you use *OTHER* compilers which have their own keywords claiming the function to be a "plase don't inline the function and please don't reorder the groups". 3) If you want to use this feature for other rockchip platforms, you could append new config in rockchip_dw_mmc.c. But now we only have CONFIG_ROCKCHIP_RK3128 which means we just enable it for rk312x platforms. 4) Stride PIO is determined by both of host->stride_pio and the data payload. The intention of it is to make the block accessing faster. So now only enable it for who's data payload is larger(or equal to)than 512 Bytes. 5) MAX_STRIDE means we support burst MAX_STRIDE * 4 Bytes per stride. Of course you could change that, but please read the comment before it in the code and do it carefully. How to test? 1) Prepare a memory, for instance, 0x70000000 on RK3126c. => mw 0x70000000 0x55aa55aa 0x200 2) Check it to see if 0x70000000 ~ 0x700000200 are all 0x55aa55aa => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... 3) Fetch data of 0x70000000 ~ 0x700000200 and wrrite them to eMMC, for instance, LBA 0x10000, and blk count is 1 as one blk means 0x200 bytes. => mmc write 0x70000000 0x10000 1 MMC write: dev # 0, block # 65536, count 1 ... 1 blocks written: OK 4) Clean the merory buffer and double check it => mw 0x70000000 0x0 0x200 => md 0x70000000 0x200 70000000: 00000000 00000000 00000000 00000000 ................ 70000010: 00000000 00000000 00000000 00000000 ................ 70000020: 00000000 00000000 00000000 00000000 ................ .... 5) Well, now let's read back the data from eMMC from the LBA we wrote before. => mmc read 0x70000000 0x10000 1 MMC read: dev # 0, block # 65536, count 1 ... 1 blocks read: OK 6) We expect the 0x70000000 ~ 0x700000200 should be 0x55aa55aa instead of 0x0 which is cleared by step 4). => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... Great, we see that the write and read eMMC are finished well and the data payload is correct as expected! Change-Id: I9b68c335449550b95f1a8f5841d46821346e45af Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
2017-10-09 09:51:29 +00:00
/*
* Currently it supports read/write up to 8*8*4 Bytes per
* stride as a burst mode. Please note that if you change
* MAX_STRIDE, you should also update dwmci_memcpy_fromio
* to augment the groups of {ldm, stm}.
*/
#define MAX_STRIDE 64
#if CONFIG_ARM && CONFIG_CPU_V7
void noinline dwmci_memcpy_fromio(void *buffer, void *fifo_addr)
{
__asm__ __volatile__ (
"push {r2, r3, r4, r5, r6, r7, r8, r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"pop {r2, r3, r4, r5, r6,r7,r8,r9}\n"
:::"memory"
);
}
void noinline dwmci_memcpy_toio(void *buffer, void *fifo_addr)
{
__asm__ __volatile__ (
"push {r2, r3, r4, r5, r6, r7, r8, r9}\n"
"ldm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"pop {r2, r3, r4, r5, r6,r7,r8,r9}\n"
:::"memory"
);
mmc: dwmmc: Add stride PIO for better burst mode support This patch enables stride PIO for better burst mode support on some rockchip platforms, for instance rk3128, etc. It uses ldm and stm to make multiple accessing from CPU under some ARM architecture. Design Note: 1) Carefully test it beyond the armv7(m) core since I don't test them! If you disassemble the code, dwmci_memcpy_fromio may be changed to use ldmia.w and stmia.w, as well as some other optimization for push/pop stack operation, but they are all fine! 2) Do *NOT* remove noinline and __volatile attributes as you could fall into trouble by the optimization of GCC. And please invent new attributes if you use *OTHER* compilers which have their own keywords claiming the function to be a "plase don't inline the function and please don't reorder the groups". 3) If you want to use this feature for other rockchip platforms, you could append new config in rockchip_dw_mmc.c. But now we only have CONFIG_ROCKCHIP_RK3128 which means we just enable it for rk312x platforms. 4) Stride PIO is determined by both of host->stride_pio and the data payload. The intention of it is to make the block accessing faster. So now only enable it for who's data payload is larger(or equal to)than 512 Bytes. 5) MAX_STRIDE means we support burst MAX_STRIDE * 4 Bytes per stride. Of course you could change that, but please read the comment before it in the code and do it carefully. How to test? 1) Prepare a memory, for instance, 0x70000000 on RK3126c. => mw 0x70000000 0x55aa55aa 0x200 2) Check it to see if 0x70000000 ~ 0x700000200 are all 0x55aa55aa => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... 3) Fetch data of 0x70000000 ~ 0x700000200 and wrrite them to eMMC, for instance, LBA 0x10000, and blk count is 1 as one blk means 0x200 bytes. => mmc write 0x70000000 0x10000 1 MMC write: dev # 0, block # 65536, count 1 ... 1 blocks written: OK 4) Clean the merory buffer and double check it => mw 0x70000000 0x0 0x200 => md 0x70000000 0x200 70000000: 00000000 00000000 00000000 00000000 ................ 70000010: 00000000 00000000 00000000 00000000 ................ 70000020: 00000000 00000000 00000000 00000000 ................ .... 5) Well, now let's read back the data from eMMC from the LBA we wrote before. => mmc read 0x70000000 0x10000 1 MMC read: dev # 0, block # 65536, count 1 ... 1 blocks read: OK 6) We expect the 0x70000000 ~ 0x700000200 should be 0x55aa55aa instead of 0x0 which is cleared by step 4). => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... Great, we see that the write and read eMMC are finished well and the data payload is correct as expected! Change-Id: I9b68c335449550b95f1a8f5841d46821346e45af Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
2017-10-09 09:51:29 +00:00
}
#else
void dwmci_memcpy_fromio(void *buffer, void *fifo_addr) {};
void dwmci_memcpy_toio(void *buffer, void *fifo_addr) {};
#endif
static int dwmci_wait_reset(struct dwmci_host *host, u32 value)
{
unsigned long timeout = 1000;
u32 ctrl;
dwmci_writel(host, DWMCI_CTRL, value);
while (timeout--) {
ctrl = dwmci_readl(host, DWMCI_CTRL);
if (!(ctrl & DWMCI_RESET_ALL))
return 1;
}
return 0;
}
static void dwmci_set_idma_desc(struct dwmci_idmac *idmac,
u32 desc0, u32 desc1, u32 desc2)
{
struct dwmci_idmac *desc = idmac;
desc->flags = desc0;
desc->cnt = desc1;
desc->addr = desc2;
desc->next_addr = (ulong)desc + sizeof(struct dwmci_idmac);
}
static void dwmci_prepare_data(struct dwmci_host *host,
struct mmc_data *data,
struct dwmci_idmac *cur_idmac,
void *bounce_buffer)
{
unsigned long ctrl;
unsigned int i = 0, flags, cnt, blk_cnt;
ulong data_start, data_end;
blk_cnt = data->blocks;
dwmci_wait_reset(host, DWMCI_CTRL_FIFO_RESET);
data_start = (ulong)cur_idmac;
dwmci_writel(host, DWMCI_DBADDR, (ulong)cur_idmac);
do {
flags = DWMCI_IDMAC_OWN | DWMCI_IDMAC_CH ;
flags |= (i == 0) ? DWMCI_IDMAC_FS : 0;
if (blk_cnt <= 8) {
flags |= DWMCI_IDMAC_LD;
cnt = data->blocksize * blk_cnt;
} else
cnt = data->blocksize * 8;
dwmci_set_idma_desc(cur_idmac, flags, cnt,
(ulong)bounce_buffer + (i * PAGE_SIZE));
if (blk_cnt <= 8)
break;
blk_cnt -= 8;
cur_idmac++;
i++;
} while(1);
data_end = (ulong)cur_idmac;
flush_dcache_range(data_start, data_end + ARCH_DMA_MINALIGN);
ctrl = dwmci_readl(host, DWMCI_CTRL);
ctrl |= DWMCI_IDMAC_EN | DWMCI_DMA_EN;
dwmci_writel(host, DWMCI_CTRL, ctrl);
ctrl = dwmci_readl(host, DWMCI_BMOD);
ctrl |= DWMCI_BMOD_IDMAC_FB | DWMCI_BMOD_IDMAC_EN;
dwmci_writel(host, DWMCI_BMOD, ctrl);
dwmci_writel(host, DWMCI_BLKSIZ, data->blocksize);
dwmci_writel(host, DWMCI_BYTCNT, data->blocksize * data->blocks);
}
static int dwmci_data_transfer(struct dwmci_host *host, struct mmc_data *data)
{
int ret = 0;
u32 timeout = 240000;
u32 status, ctrl, mask, size, i, len = 0;
u32 *buf = NULL;
ulong start = get_timer(0);
u32 fifo_depth = (((host->fifoth_val & RX_WMARK_MASK) >>
RX_WMARK_SHIFT) + 1) * 2;
mmc: dwmmc: Add stride PIO for better burst mode support This patch enables stride PIO for better burst mode support on some rockchip platforms, for instance rk3128, etc. It uses ldm and stm to make multiple accessing from CPU under some ARM architecture. Design Note: 1) Carefully test it beyond the armv7(m) core since I don't test them! If you disassemble the code, dwmci_memcpy_fromio may be changed to use ldmia.w and stmia.w, as well as some other optimization for push/pop stack operation, but they are all fine! 2) Do *NOT* remove noinline and __volatile attributes as you could fall into trouble by the optimization of GCC. And please invent new attributes if you use *OTHER* compilers which have their own keywords claiming the function to be a "plase don't inline the function and please don't reorder the groups". 3) If you want to use this feature for other rockchip platforms, you could append new config in rockchip_dw_mmc.c. But now we only have CONFIG_ROCKCHIP_RK3128 which means we just enable it for rk312x platforms. 4) Stride PIO is determined by both of host->stride_pio and the data payload. The intention of it is to make the block accessing faster. So now only enable it for who's data payload is larger(or equal to)than 512 Bytes. 5) MAX_STRIDE means we support burst MAX_STRIDE * 4 Bytes per stride. Of course you could change that, but please read the comment before it in the code and do it carefully. How to test? 1) Prepare a memory, for instance, 0x70000000 on RK3126c. => mw 0x70000000 0x55aa55aa 0x200 2) Check it to see if 0x70000000 ~ 0x700000200 are all 0x55aa55aa => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... 3) Fetch data of 0x70000000 ~ 0x700000200 and wrrite them to eMMC, for instance, LBA 0x10000, and blk count is 1 as one blk means 0x200 bytes. => mmc write 0x70000000 0x10000 1 MMC write: dev # 0, block # 65536, count 1 ... 1 blocks written: OK 4) Clean the merory buffer and double check it => mw 0x70000000 0x0 0x200 => md 0x70000000 0x200 70000000: 00000000 00000000 00000000 00000000 ................ 70000010: 00000000 00000000 00000000 00000000 ................ 70000020: 00000000 00000000 00000000 00000000 ................ .... 5) Well, now let's read back the data from eMMC from the LBA we wrote before. => mmc read 0x70000000 0x10000 1 MMC read: dev # 0, block # 65536, count 1 ... 1 blocks read: OK 6) We expect the 0x70000000 ~ 0x700000200 should be 0x55aa55aa instead of 0x0 which is cleared by step 4). => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... Great, we see that the write and read eMMC are finished well and the data payload is correct as expected! Change-Id: I9b68c335449550b95f1a8f5841d46821346e45af Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
2017-10-09 09:51:29 +00:00
bool stride;
size = data->blocksize * data->blocks / 4;
mmc: dwmmc: Add stride PIO for better burst mode support This patch enables stride PIO for better burst mode support on some rockchip platforms, for instance rk3128, etc. It uses ldm and stm to make multiple accessing from CPU under some ARM architecture. Design Note: 1) Carefully test it beyond the armv7(m) core since I don't test them! If you disassemble the code, dwmci_memcpy_fromio may be changed to use ldmia.w and stmia.w, as well as some other optimization for push/pop stack operation, but they are all fine! 2) Do *NOT* remove noinline and __volatile attributes as you could fall into trouble by the optimization of GCC. And please invent new attributes if you use *OTHER* compilers which have their own keywords claiming the function to be a "plase don't inline the function and please don't reorder the groups". 3) If you want to use this feature for other rockchip platforms, you could append new config in rockchip_dw_mmc.c. But now we only have CONFIG_ROCKCHIP_RK3128 which means we just enable it for rk312x platforms. 4) Stride PIO is determined by both of host->stride_pio and the data payload. The intention of it is to make the block accessing faster. So now only enable it for who's data payload is larger(or equal to)than 512 Bytes. 5) MAX_STRIDE means we support burst MAX_STRIDE * 4 Bytes per stride. Of course you could change that, but please read the comment before it in the code and do it carefully. How to test? 1) Prepare a memory, for instance, 0x70000000 on RK3126c. => mw 0x70000000 0x55aa55aa 0x200 2) Check it to see if 0x70000000 ~ 0x700000200 are all 0x55aa55aa => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... 3) Fetch data of 0x70000000 ~ 0x700000200 and wrrite them to eMMC, for instance, LBA 0x10000, and blk count is 1 as one blk means 0x200 bytes. => mmc write 0x70000000 0x10000 1 MMC write: dev # 0, block # 65536, count 1 ... 1 blocks written: OK 4) Clean the merory buffer and double check it => mw 0x70000000 0x0 0x200 => md 0x70000000 0x200 70000000: 00000000 00000000 00000000 00000000 ................ 70000010: 00000000 00000000 00000000 00000000 ................ 70000020: 00000000 00000000 00000000 00000000 ................ .... 5) Well, now let's read back the data from eMMC from the LBA we wrote before. => mmc read 0x70000000 0x10000 1 MMC read: dev # 0, block # 65536, count 1 ... 1 blocks read: OK 6) We expect the 0x70000000 ~ 0x700000200 should be 0x55aa55aa instead of 0x0 which is cleared by step 4). => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... Great, we see that the write and read eMMC are finished well and the data payload is correct as expected! Change-Id: I9b68c335449550b95f1a8f5841d46821346e45af Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
2017-10-09 09:51:29 +00:00
/* Still use legacy PIO mode if size < 512(128 * 4) Bytes */
stride = host->stride_pio && size > 128;
if (data->flags == MMC_DATA_READ)
buf = (unsigned int *)data->dest;
else
buf = (unsigned int *)data->src;
for (;;) {
mask = dwmci_readl(host, DWMCI_RINTSTS);
/* Error during data transfer. */
if (mask & (DWMCI_DATA_ERR | DWMCI_DATA_TOUT)) {
debug("%s: DATA ERROR!\n", __func__);
dwmci_wait_reset(host, DWMCI_RESET_ALL);
dwmci_writel(host, DWMCI_CMD, DWMCI_CMD_PRV_DAT_WAIT |
DWMCI_CMD_UPD_CLK | DWMCI_CMD_START);
do {
status = dwmci_readl(host, DWMCI_CMD);
if (timeout-- < 0)
ret = -ETIMEDOUT;
} while (status & DWMCI_CMD_START);
if (!host->fifo_mode) {
ctrl = dwmci_readl(host, DWMCI_BMOD);
ctrl |= DWMCI_BMOD_IDMAC_RESET;
dwmci_writel(host, DWMCI_BMOD, ctrl);
}
ret = -EINVAL;
break;
}
if (host->fifo_mode && size) {
len = 0;
if (data->flags == MMC_DATA_READ &&
(mask & DWMCI_INTMSK_RXDR)) {
while (size) {
len = dwmci_readl(host, DWMCI_STATUS);
len = (len >> DWMCI_FIFO_SHIFT) &
DWMCI_FIFO_MASK;
len = min(size, len);
mmc: dwmmc: Add stride PIO for better burst mode support This patch enables stride PIO for better burst mode support on some rockchip platforms, for instance rk3128, etc. It uses ldm and stm to make multiple accessing from CPU under some ARM architecture. Design Note: 1) Carefully test it beyond the armv7(m) core since I don't test them! If you disassemble the code, dwmci_memcpy_fromio may be changed to use ldmia.w and stmia.w, as well as some other optimization for push/pop stack operation, but they are all fine! 2) Do *NOT* remove noinline and __volatile attributes as you could fall into trouble by the optimization of GCC. And please invent new attributes if you use *OTHER* compilers which have their own keywords claiming the function to be a "plase don't inline the function and please don't reorder the groups". 3) If you want to use this feature for other rockchip platforms, you could append new config in rockchip_dw_mmc.c. But now we only have CONFIG_ROCKCHIP_RK3128 which means we just enable it for rk312x platforms. 4) Stride PIO is determined by both of host->stride_pio and the data payload. The intention of it is to make the block accessing faster. So now only enable it for who's data payload is larger(or equal to)than 512 Bytes. 5) MAX_STRIDE means we support burst MAX_STRIDE * 4 Bytes per stride. Of course you could change that, but please read the comment before it in the code and do it carefully. How to test? 1) Prepare a memory, for instance, 0x70000000 on RK3126c. => mw 0x70000000 0x55aa55aa 0x200 2) Check it to see if 0x70000000 ~ 0x700000200 are all 0x55aa55aa => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... 3) Fetch data of 0x70000000 ~ 0x700000200 and wrrite them to eMMC, for instance, LBA 0x10000, and blk count is 1 as one blk means 0x200 bytes. => mmc write 0x70000000 0x10000 1 MMC write: dev # 0, block # 65536, count 1 ... 1 blocks written: OK 4) Clean the merory buffer and double check it => mw 0x70000000 0x0 0x200 => md 0x70000000 0x200 70000000: 00000000 00000000 00000000 00000000 ................ 70000010: 00000000 00000000 00000000 00000000 ................ 70000020: 00000000 00000000 00000000 00000000 ................ .... 5) Well, now let's read back the data from eMMC from the LBA we wrote before. => mmc read 0x70000000 0x10000 1 MMC read: dev # 0, block # 65536, count 1 ... 1 blocks read: OK 6) We expect the 0x70000000 ~ 0x700000200 should be 0x55aa55aa instead of 0x0 which is cleared by step 4). => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... Great, we see that the write and read eMMC are finished well and the data payload is correct as expected! Change-Id: I9b68c335449550b95f1a8f5841d46821346e45af Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
2017-10-09 09:51:29 +00:00
if (!stride) {
/* Legacy pio mode */
for (i = 0; i < len; i++)
*buf++ = dwmci_readl(host, DWMCI_DATA);
goto read_again;
}
/* dwmci_memcpy_fromio now bursts 256 Bytes once */
if (len < MAX_STRIDE)
continue;
for (i = 0; i < len / MAX_STRIDE; i++) {
dwmci_memcpy_fromio(buf, host->ioaddr + DWMCI_DATA);
buf += MAX_STRIDE;
}
len = i * MAX_STRIDE;
read_again:
size = size > len ? (size - len) : 0;
}
dwmci_writel(host, DWMCI_RINTSTS,
DWMCI_INTMSK_RXDR);
} else if (data->flags == MMC_DATA_WRITE &&
(mask & DWMCI_INTMSK_TXDR)) {
while (size) {
len = dwmci_readl(host, DWMCI_STATUS);
len = fifo_depth - ((len >>
DWMCI_FIFO_SHIFT) &
DWMCI_FIFO_MASK);
len = min(size, len);
mmc: dwmmc: Add stride PIO for better burst mode support This patch enables stride PIO for better burst mode support on some rockchip platforms, for instance rk3128, etc. It uses ldm and stm to make multiple accessing from CPU under some ARM architecture. Design Note: 1) Carefully test it beyond the armv7(m) core since I don't test them! If you disassemble the code, dwmci_memcpy_fromio may be changed to use ldmia.w and stmia.w, as well as some other optimization for push/pop stack operation, but they are all fine! 2) Do *NOT* remove noinline and __volatile attributes as you could fall into trouble by the optimization of GCC. And please invent new attributes if you use *OTHER* compilers which have their own keywords claiming the function to be a "plase don't inline the function and please don't reorder the groups". 3) If you want to use this feature for other rockchip platforms, you could append new config in rockchip_dw_mmc.c. But now we only have CONFIG_ROCKCHIP_RK3128 which means we just enable it for rk312x platforms. 4) Stride PIO is determined by both of host->stride_pio and the data payload. The intention of it is to make the block accessing faster. So now only enable it for who's data payload is larger(or equal to)than 512 Bytes. 5) MAX_STRIDE means we support burst MAX_STRIDE * 4 Bytes per stride. Of course you could change that, but please read the comment before it in the code and do it carefully. How to test? 1) Prepare a memory, for instance, 0x70000000 on RK3126c. => mw 0x70000000 0x55aa55aa 0x200 2) Check it to see if 0x70000000 ~ 0x700000200 are all 0x55aa55aa => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... 3) Fetch data of 0x70000000 ~ 0x700000200 and wrrite them to eMMC, for instance, LBA 0x10000, and blk count is 1 as one blk means 0x200 bytes. => mmc write 0x70000000 0x10000 1 MMC write: dev # 0, block # 65536, count 1 ... 1 blocks written: OK 4) Clean the merory buffer and double check it => mw 0x70000000 0x0 0x200 => md 0x70000000 0x200 70000000: 00000000 00000000 00000000 00000000 ................ 70000010: 00000000 00000000 00000000 00000000 ................ 70000020: 00000000 00000000 00000000 00000000 ................ .... 5) Well, now let's read back the data from eMMC from the LBA we wrote before. => mmc read 0x70000000 0x10000 1 MMC read: dev # 0, block # 65536, count 1 ... 1 blocks read: OK 6) We expect the 0x70000000 ~ 0x700000200 should be 0x55aa55aa instead of 0x0 which is cleared by step 4). => md 0x70000000 0x200 70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U 70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa .U.U.U.U.U.U.U.U .... Great, we see that the write and read eMMC are finished well and the data payload is correct as expected! Change-Id: I9b68c335449550b95f1a8f5841d46821346e45af Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
2017-10-09 09:51:29 +00:00
if (!stride) {
for (i = 0; i < len; i++)
dwmci_writel(host, DWMCI_DATA,
*buf++);
goto write_again;
}
/* dwmci_memcpy_toio now bursts 256 Bytes once */
if (len < MAX_STRIDE)
continue;
for (i = 0; i < len / MAX_STRIDE; i++) {
dwmci_memcpy_toio(buf, host->ioaddr + DWMCI_DATA);
buf += MAX_STRIDE;
}
len = i * MAX_STRIDE;
write_again:
size = size > len ? (size - len) : 0;
}
dwmci_writel(host, DWMCI_RINTSTS,
DWMCI_INTMSK_TXDR);
}
}
/* Data arrived correctly. */
if (mask & DWMCI_INTMSK_DTO) {
ret = 0;
break;
}
/* Check for timeout. */
if (get_timer(start) > timeout) {
debug("%s: Timeout waiting for data!\n",
__func__);
ret = -ETIMEDOUT;
break;
}
}
dwmci_writel(host, DWMCI_RINTSTS, mask);
return ret;
}
static int dwmci_set_transfer_mode(struct dwmci_host *host,
struct mmc_data *data)
{
unsigned long mode;
mode = DWMCI_CMD_DATA_EXP;
if (data->flags & MMC_DATA_WRITE)
mode |= DWMCI_CMD_RW;
return mode;
}
#ifdef CONFIG_DM_MMC
static int dwmci_send_cmd(struct udevice *dev, struct mmc_cmd *cmd,
struct mmc_data *data)
{
struct mmc *mmc = mmc_get_mmc_dev(dev);
#else
static int dwmci_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
struct mmc_data *data)
{
#endif
struct dwmci_host *host = mmc->priv;
ALLOC_CACHE_ALIGN_BUFFER(struct dwmci_idmac, cur_idmac,
data ? DIV_ROUND_UP(data->blocks, 8) : 0);
int ret = 0, flags = 0, i;
unsigned int timeout = 500;
u32 retry = 100000;
u32 mask, ctrl;
ulong start = get_timer(0);
struct bounce_buffer bbstate;
while (dwmci_readl(host, DWMCI_STATUS) & DWMCI_BUSY) {
if (get_timer(start) > timeout) {
debug("%s: Timeout on data busy\n", __func__);
return -ETIMEDOUT;
}
}
dwmci_writel(host, DWMCI_RINTSTS, DWMCI_INTMSK_ALL);
if (data) {
if (host->fifo_mode) {
dwmci_writel(host, DWMCI_BLKSIZ, data->blocksize);
dwmci_writel(host, DWMCI_BYTCNT,
data->blocksize * data->blocks);
dwmci_wait_reset(host, DWMCI_CTRL_FIFO_RESET);
} else {
if (data->flags == MMC_DATA_READ) {
bounce_buffer_start(&bbstate, (void*)data->dest,
data->blocksize *
data->blocks, GEN_BB_WRITE);
} else {
bounce_buffer_start(&bbstate, (void*)data->src,
data->blocksize *
data->blocks, GEN_BB_READ);
}
dwmci_prepare_data(host, data, cur_idmac,
bbstate.bounce_buffer);
}
}
dwmci_writel(host, DWMCI_CMDARG, cmd->cmdarg);
if (data)
flags = dwmci_set_transfer_mode(host, data);
if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY))
return -1;
if (cmd->cmdidx == MMC_CMD_STOP_TRANSMISSION)
flags |= DWMCI_CMD_ABORT_STOP;
else
flags |= DWMCI_CMD_PRV_DAT_WAIT;
if (cmd->resp_type & MMC_RSP_PRESENT) {
flags |= DWMCI_CMD_RESP_EXP;
if (cmd->resp_type & MMC_RSP_136)
flags |= DWMCI_CMD_RESP_LENGTH;
}
if (cmd->resp_type & MMC_RSP_CRC)
flags |= DWMCI_CMD_CHECK_CRC;
flags |= (cmd->cmdidx | DWMCI_CMD_START | DWMCI_CMD_USE_HOLD_REG);
debug("Sending CMD%d\n",cmd->cmdidx);
dwmci_writel(host, DWMCI_CMD, flags);
for (i = 0; i < retry; i++) {
mask = dwmci_readl(host, DWMCI_RINTSTS);
if (mask & DWMCI_INTMSK_CDONE) {
if (!data)
dwmci_writel(host, DWMCI_RINTSTS, mask);
break;
}
}
if (i == retry) {
debug("%s: Timeout.\n", __func__);
return -ETIMEDOUT;
}
if (mask & DWMCI_INTMSK_RTO) {
/*
* Timeout here is not necessarily fatal. (e)MMC cards
* will splat here when they receive CMD55 as they do
* not support this command and that is exactly the way
* to tell them apart from SD cards. Thus, this output
* below shall be debug(). eMMC cards also do not favor
* CMD8, please keep that in mind.
*/
debug("%s: Response Timeout.\n", __func__);
return -ETIMEDOUT;
} else if (mask & DWMCI_INTMSK_RE) {
debug("%s: Response Error.\n", __func__);
return -EIO;
}
if (cmd->resp_type & MMC_RSP_PRESENT) {
if (cmd->resp_type & MMC_RSP_136) {
cmd->response[0] = dwmci_readl(host, DWMCI_RESP3);
cmd->response[1] = dwmci_readl(host, DWMCI_RESP2);
cmd->response[2] = dwmci_readl(host, DWMCI_RESP1);
cmd->response[3] = dwmci_readl(host, DWMCI_RESP0);
} else {
cmd->response[0] = dwmci_readl(host, DWMCI_RESP0);
}
}
if (data) {
ret = dwmci_data_transfer(host, data);
/* only dma mode need it */
if (!host->fifo_mode) {
ctrl = dwmci_readl(host, DWMCI_CTRL);
ctrl &= ~(DWMCI_DMA_EN);
dwmci_writel(host, DWMCI_CTRL, ctrl);
bounce_buffer_stop(&bbstate);
}
}
udelay(100);
return ret;
}
static int dwmci_setup_bus(struct dwmci_host *host, u32 freq)
{
u32 div, status;
int timeout = 10000;
unsigned long sclk;
if (freq == 0)
return 0;
/*
* If host->get_mmc_clk isn't defined,
* then assume that host->bus_hz is source clock value.
* host->bus_hz should be set by user.
*/
if (host->get_mmc_clk)
sclk = host->get_mmc_clk(host, freq);
else if (host->bus_hz)
sclk = host->bus_hz;
else {
debug("%s: Didn't get source clock value.\n", __func__);
return -EINVAL;
}
if (sclk == freq)
div = 0; /* bypass mode */
else
div = DIV_ROUND_UP(sclk, 2 * freq);
dwmci_writel(host, DWMCI_CLKENA, 0);
dwmci_writel(host, DWMCI_CLKSRC, 0);
dwmci_writel(host, DWMCI_CLKDIV, div);
dwmci_writel(host, DWMCI_CMD, DWMCI_CMD_PRV_DAT_WAIT |
DWMCI_CMD_UPD_CLK | DWMCI_CMD_START);
do {
status = dwmci_readl(host, DWMCI_CMD);
if (timeout-- < 0) {
debug("%s: Timeout!\n", __func__);
return -ETIMEDOUT;
}
} while (status & DWMCI_CMD_START);
dwmci_writel(host, DWMCI_CLKENA, DWMCI_CLKEN_ENABLE |
DWMCI_CLKEN_LOW_PWR);
dwmci_writel(host, DWMCI_CMD, DWMCI_CMD_PRV_DAT_WAIT |
DWMCI_CMD_UPD_CLK | DWMCI_CMD_START);
timeout = 10000;
do {
status = dwmci_readl(host, DWMCI_CMD);
if (timeout-- < 0) {
debug("%s: Timeout!\n", __func__);
return -ETIMEDOUT;
}
} while (status & DWMCI_CMD_START);
host->clock = freq;
return 0;
}
#ifdef CONFIG_DM_MMC
static bool dwmci_card_busy(struct udevice *dev)
{
struct mmc *mmc = mmc_get_mmc_dev(dev);
#else
static bool dwmci_card_busy(struct mmc *mmc)
{
#endif
u32 status;
struct dwmci_host *host = (struct dwmci_host *)mmc->priv;
/*
* Check the busy bit which is low when DAT[3:0]
* (the data lines) are 0000
*/
status = dwmci_readl(host, DWMCI_STATUS);
return !!(status & DWMCI_BUSY);
}
#ifdef CONFIG_DM_MMC
static int dwmci_execute_tuning(struct udevice *dev, u32 opcode)
{
struct mmc *mmc = mmc_get_mmc_dev(dev);
#else
static int dwmci_execute_tuning(struct mmc *mmc, u32 opcode)
{
#endif
struct dwmci_host *host = (struct dwmci_host *)mmc->priv;
if (!host->execute_tuning)
return -EIO;
return host->execute_tuning(host, opcode);
}
#ifdef CONFIG_DM_MMC
static int dwmci_set_ios(struct udevice *dev)
{
struct mmc *mmc = mmc_get_mmc_dev(dev);
#else
static int dwmci_set_ios(struct mmc *mmc)
{
#endif
struct dwmci_host *host = (struct dwmci_host *)mmc->priv;
u32 ctype, regs;
debug("Buswidth = %d, clock: %d\n", mmc->bus_width, mmc->clock);
dwmci_setup_bus(host, mmc->clock);
switch (mmc->bus_width) {
case 8:
ctype = DWMCI_CTYPE_8BIT;
break;
case 4:
ctype = DWMCI_CTYPE_4BIT;
break;
default:
ctype = DWMCI_CTYPE_1BIT;
break;
}
dwmci_writel(host, DWMCI_CTYPE, ctype);
regs = dwmci_readl(host, DWMCI_UHS_REG);
if (mmc_card_ddr(mmc))
regs |= DWMCI_DDR_MODE;
else
regs &= ~DWMCI_DDR_MODE;
dwmci_writel(host, DWMCI_UHS_REG, regs);
if (host->clksel)
host->clksel(host);
return 0;
}
static int dwmci_init(struct mmc *mmc)
{
struct dwmci_host *host = mmc->priv;
uint32_t use_dma;
if (host->board_init)
host->board_init(host);
dwmci_writel(host, DWMCI_PWREN, 1);
if (!dwmci_wait_reset(host, DWMCI_RESET_ALL)) {
debug("%s[%d] Fail-reset!!\n", __func__, __LINE__);
return -EIO;
}
use_dma = SDMMC_GET_TRANS_MODE(dwmci_readl(host, DWMCI_HCON));
if (use_dma == DMA_INTERFACE_IDMA) {
host->fifo_mode = 0;
} else {
host->fifo_mode = 1;
}
/* Enumerate at 400KHz */
dwmci_setup_bus(host, mmc->cfg->f_min);
dwmci_writel(host, DWMCI_RINTSTS, 0xFFFFFFFF);
dwmci_writel(host, DWMCI_INTMASK, 0);
dwmci_writel(host, DWMCI_TMOUT, 0xFFFFFFFF);
dwmci_writel(host, DWMCI_IDINTEN, 0);
dwmci_writel(host, DWMCI_BMOD, 1);
if (!host->fifoth_val) {
uint32_t fifo_size;
fifo_size = dwmci_readl(host, DWMCI_FIFOTH);
fifo_size = ((fifo_size & RX_WMARK_MASK) >> RX_WMARK_SHIFT) + 1;
host->fifoth_val = MSIZE(0x2) | RX_WMARK(fifo_size / 2 - 1) |
TX_WMARK(fifo_size / 2);
}
dwmci_writel(host, DWMCI_FIFOTH, host->fifoth_val);
dwmci_writel(host, DWMCI_CLKENA, 0);
dwmci_writel(host, DWMCI_CLKSRC, 0);
return 0;
}
#ifdef CONFIG_DM_MMC
int dwmci_probe(struct udevice *dev)
{
struct mmc *mmc = mmc_get_mmc_dev(dev);
return dwmci_init(mmc);
}
const struct dm_mmc_ops dm_dwmci_ops = {
.card_busy = dwmci_card_busy,
.send_cmd = dwmci_send_cmd,
.set_ios = dwmci_set_ios,
.execute_tuning = dwmci_execute_tuning,
};
#else
static const struct mmc_ops dwmci_ops = {
.card_busy = dwmci_card_busy,
.send_cmd = dwmci_send_cmd,
.set_ios = dwmci_set_ios,
.init = dwmci_init,
.execute_tuning = dwmci_execute_tuning,
};
#endif
void dwmci_setup_cfg(struct mmc_config *cfg, struct dwmci_host *host,
u32 max_clk, u32 min_clk)
{
cfg->name = host->name;
#ifndef CONFIG_DM_MMC
cfg->ops = &dwmci_ops;
#endif
cfg->f_min = min_clk;
cfg->f_max = max_clk;
cfg->voltages = MMC_VDD_32_33 | MMC_VDD_33_34 | MMC_VDD_165_195;
cfg->host_caps = host->caps;
if (host->buswidth == 8) {
cfg->host_caps |= MMC_MODE_8BIT | MMC_MODE_4BIT;
} else {
cfg->host_caps |= MMC_MODE_4BIT;
cfg->host_caps &= ~MMC_MODE_8BIT;
}
cfg->host_caps |= MMC_MODE_HS | MMC_MODE_HS_52MHz;
cfg->b_max = CONFIG_SYS_MMC_MAX_BLK_COUNT;
}
#ifdef CONFIG_BLK
int dwmci_bind(struct udevice *dev, struct mmc *mmc, struct mmc_config *cfg)
{
return mmc_bind(dev, mmc, cfg);
}
#else
int add_dwmci(struct dwmci_host *host, u32 max_clk, u32 min_clk)
{
dwmci_setup_cfg(&host->cfg, host, max_clk, min_clk);
host->mmc = mmc_create(&host->cfg, host);
if (host->mmc == NULL)
return -1;
return 0;
}
#endif