From: Aaron Williams <awilliams@marvell.com>
Date: Thu, 7 Apr 2022 07:11:39 +0000 (+0200)
Subject: mips: octeon: Add cvmx-pko3-compat.c
X-Git-Tag: v2025.01-rc5-pxa1908~1420^2~34
X-Git-Url: http://git.dujemihanovic.xyz/img/static/html/%7B%7B?a=commitdiff_plain;h=53a422cc5056878f2fb10f030017b38515aab3ef;p=u-boot.git

mips: octeon: Add cvmx-pko3-compat.c

Import cvmx-pko3-compat.c from 2013 U-Boot. It will be used by the later
added drivers to support networking on the MIPS Octeon II / III
platforms.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---

diff --git a/arch/mips/mach-octeon/cvmx-pko3-compat.c b/arch/mips/mach-octeon/cvmx-pko3-compat.c
new file mode 100644
index 0000000000..3e142322db
--- /dev/null
+++ b/arch/mips/mach-octeon/cvmx-pko3-compat.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018-2022 Marvell International Ltd.
+ *
+ */
+
+#include <errno.h>
+#include <log.h>
+#include <time.h>
+#include <linux/delay.h>
+
+#include <mach/cvmx-regs.h>
+#include <mach/cvmx-csr.h>
+#include <mach/cvmx-bootmem.h>
+#include <mach/octeon-model.h>
+#include <mach/cvmx-fuse.h>
+#include <mach/octeon-feature.h>
+#include <mach/cvmx-qlm.h>
+#include <mach/octeon_qlm.h>
+#include <mach/cvmx-pcie.h>
+#include <mach/cvmx-coremask.h>
+
+#include <mach/cvmx-agl-defs.h>
+#include <mach/cvmx-bgxx-defs.h>
+#include <mach/cvmx-ciu-defs.h>
+#include <mach/cvmx-gmxx-defs.h>
+#include <mach/cvmx-gserx-defs.h>
+#include <mach/cvmx-ilk-defs.h>
+#include <mach/cvmx-ipd-defs.h>
+#include <mach/cvmx-pcsx-defs.h>
+#include <mach/cvmx-pcsxx-defs.h>
+#include <mach/cvmx-pki-defs.h>
+#include <mach/cvmx-pko-defs.h>
+#include <mach/cvmx-xcv-defs.h>
+
+#include <mach/cvmx-scratch.h>
+#include <mach/cvmx-hwfau.h>
+#include <mach/cvmx-fau.h>
+
+#include <mach/cvmx-hwpko.h>
+#include <mach/cvmx-ilk.h>
+#include <mach/cvmx-pki.h>
+#include <mach/cvmx-pko3.h>
+#include <mach/cvmx-pko3-queue.h>
+#include <mach/cvmx-pko3-resources.h>
+
+#include <mach/cvmx-helper.h>
+#include <mach/cvmx-helper-board.h>
+#include <mach/cvmx-helper-cfg.h>
+
+#include <mach/cvmx-helper-bgx.h>
+#include <mach/cvmx-helper-cfg.h>
+#include <mach/cvmx-helper-util.h>
+#include <mach/cvmx-helper-pki.h>
+
+/* #undef CVMX_ENABLE_PARAMETER_CHECKING */
+/* #define CVMX_ENABLE_PARAMETER_CHECKING 1 */
+/* #define	__PKO3_NATIVE_PTR */
+
+static inline u64 cvmx_pko3_legacy_paddr(unsigned int node, u64 addr)
+{
+	u64 paddr;
+
+	paddr = node;
+	paddr = (addr & ((1ull << 40) - 1)) | (paddr << 40);
+	return paddr;
+}
+
+#if CVMX_ENABLE_PARAMETER_CHECKING
+/**
+ * @INTERNAL
+ *
+ * Verify the integrity of a legacy buffer link pointer,
+ *
+ * Note that the IPD/PIP/PKO hardware would sometimes
+ * round-up the buf_ptr->size field of the last buffer in a chain  to the next
+ * cache line size, so the sum of buf_ptr->size
+ * fields for a packet may exceed total_bytes by up to 127 bytes.
+ *
+ * @returns 0 on success, a negative number on error.
+ */
+static int cvmx_pko3_legacy_bufptr_validate(cvmx_buf_ptr_t buf_ptr,
+					    unsigned int gather,
+					    unsigned int buffers,
+					    unsigned int total_bytes)
+{
+	unsigned int node = cvmx_get_node_num();
+	unsigned int segs = 0, bytes = 0;
+	unsigned int phys_addr;
+	cvmx_buf_ptr_t ptr;
+	int delta;
+
+	if (buffers == 0) {
+		return -1;
+	} else if (buffers == 1) {
+		delta = buf_ptr.s.size - total_bytes;
+		if (delta < 0 || delta > 127)
+			return -2;
+	} else if (gather) {
+		cvmx_buf_ptr_t *vptr;
+		/* Validate gather list */
+		if (buf_ptr.s.size < buffers)
+			return -3;
+		phys_addr = cvmx_pko3_legacy_paddr(node, buf_ptr.s.addr);
+		vptr = cvmx_phys_to_ptr(phys_addr);
+		for (segs = 0; segs < buffers; segs++)
+			bytes += vptr[segs].s.size;
+		delta = bytes - total_bytes;
+		if (delta < 0 || delta > 127)
+			return -4;
+	} else {
+		void *vptr;
+		/* Validate linked buffers */
+		ptr = buf_ptr;
+		for (segs = 0; segs < buffers; segs++) {
+			bytes += ptr.s.size;
+			phys_addr = cvmx_pko3_legacy_paddr(node, ptr.s.addr);
+			vptr = cvmx_phys_to_ptr(phys_addr);
+			memcpy(&ptr, vptr - sizeof(u64), sizeof(u64));
+		}
+		delta = bytes - total_bytes;
+		if (delta < 0 || delta > 127)
+			return -5;
+	}
+	return 0;
+}
+#endif /* CVMX_ENABLE_PARAMETER_CHECKING */
+
+/*
+ * @INTERNAL
+ *
+ * Implementation note:
+ * When the packet is sure to not need a jump_buf,
+ * it will be written directly into cvmseg.
+ * When the packet might not fit into cvmseg with all
+ * of its descriptors, a jump_buf is allocated a priori,
+ * and only header is first placed into cvmseg, all other
+ * descriptors are placed into jump_buf, and finally
+ * the PKO_SEND_JUMP_S is written to cvmseg.
+ * This is because if there are no EXT or TSO descriptors,
+ * then HDR must be first, and JMP second and that is all
+ * that should go into cvmseg.
+ */
+struct __cvmx_pko3_legacy_desc {
+	u64 *cmd_words;
+	u64 *jump_buf_base_ptr;
+	unsigned short word_count;
+	short last_pool;
+	u8 port_node;
+	u8 aura_node;
+	u8 jump_buf_size;
+};
+
+/**
+ * @INTERNAL
+ *
+ * Add a subdescriptor into a command buffer,
+ * and handle command-buffer overflow by allocating a JUMP_s buffer
+ * from PKO3 internal AURA.
+ */
+static int __cvmx_pko3_cmd_subdc_add(struct __cvmx_pko3_legacy_desc *desc,
+				     u64 subdc)
+{
+	/* SEND_JUMP_S missing on Pass1.X */
+	if (desc->word_count >= 15) {
+		printf("%s: ERROR: too many segments\n", __func__);
+		return -EBADF;
+	}
+
+	/* Handle small commands simply */
+	if (cvmx_likely(!desc->jump_buf_base_ptr)) {
+		desc->cmd_words[desc->word_count] = subdc;
+		(desc->word_count)++;
+		return desc->word_count;
+	}
+
+	if (cvmx_unlikely(desc->jump_buf_size >= 255))
+		return -ENOMEM;
+
+	desc->jump_buf_base_ptr[desc->jump_buf_size++] = subdc;
+
+	return desc->word_count + desc->jump_buf_size;
+}
+
+/**
+ * @INTERNAL
+ *
+ * Finalize command buffer
+ *
+ * @returns: number of command words in command buffer and jump buffer
+ * or negative number on error.
+ */
+
+static int __cvmx_pko3_cmd_done(struct __cvmx_pko3_legacy_desc *desc)
+{
+	short pko_aura;
+	cvmx_pko_buf_ptr_t jump_s;
+	cvmx_pko_send_aura_t aura_s;
+
+	/* no jump buffer, nothing to do */
+	if (!desc->jump_buf_base_ptr)
+		return desc->word_count;
+
+	desc->word_count++;
+
+	/* Verify number of words is 15 */
+	if (desc->word_count != 2) {
+		printf("ERROR: %s: internal error, word_count=%d\n", __func__,
+		       desc->word_count);
+		return -EINVAL;
+	}
+
+	/* Add SEND_AURA_S at the end of jump_buf */
+	pko_aura = __cvmx_pko3_aura_get(desc->port_node);
+
+	aura_s.u64 = 0;
+	aura_s.s.aura = pko_aura;
+	aura_s.s.offset = 0;
+	aura_s.s.alg = AURAALG_NOP;
+	aura_s.s.subdc4 = CVMX_PKO_SENDSUBDC_AURA;
+
+	desc->jump_buf_base_ptr[desc->jump_buf_size++] = aura_s.u64;
+
+	/* Add SEND_JUMPS to point to jump_buf */
+	jump_s.u64 = 0;
+	jump_s.s.subdc3 = CVMX_PKO_SENDSUBDC_JUMP;
+	jump_s.s.addr = cvmx_ptr_to_phys(desc->jump_buf_base_ptr);
+	jump_s.s.i = 1; /* F=1: Free this buffer when done */
+	jump_s.s.size = desc->jump_buf_size;
+	desc->cmd_words[1] = jump_s.u64;
+
+	return desc->word_count + desc->jump_buf_size;
+}
+
+/**
+ * @INTERNAL
+ *
+ * Handle buffer pools for PKO legacy transmit operation
+ */
+static inline int cvmx_pko3_legacy_pool(struct __cvmx_pko3_legacy_desc *desc,
+					int pool)
+{
+	cvmx_pko_send_aura_t aura_s;
+	unsigned int aura;
+
+	if (cvmx_unlikely(desc->last_pool == pool))
+		return 0;
+
+	aura = desc->aura_node << 10; /* LAURA=AURA[0..9] */
+	aura |= pool;
+
+	if (cvmx_likely(desc->last_pool < 0)) {
+		cvmx_pko_send_hdr_t *hdr_s;
+
+		hdr_s = (void *)&desc->cmd_words[0];
+		/* Create AURA from legacy pool (assume LAURA==POOL */
+		hdr_s->s.aura = aura;
+		desc->last_pool = pool;
+		return 0;
+	}
+
+	aura_s.u64 = 0;
+	aura_s.s.subdc4 = CVMX_PKO_SENDSUBDC_AURA;
+	aura_s.s.offset = 0;
+	aura_s.s.alg = AURAALG_NOP;
+	aura |= pool;
+	aura_s.s.aura = aura;
+	desc->last_pool = pool;
+	return __cvmx_pko3_cmd_subdc_add(desc, aura_s.u64);
+}
+
+/**
+ * @INTERNAL
+ *
+ * Backward compatibility for packet transmission using legacy PKO command.
+ *
+ * NOTE: Only supports output on node-local ports.
+ *
+ * TBD: Could embed destination node in extended DQ number.
+ */
+cvmx_pko_return_value_t
+cvmx_pko3_legacy_xmit(unsigned int dq, cvmx_pko_command_word0_t pko_command,
+		      cvmx_buf_ptr_t packet, u64 addr, bool tag_sw)
+{
+	cvmx_pko_query_rtn_t pko_status;
+	cvmx_pko_send_hdr_t *hdr_s;
+	struct __cvmx_pko3_legacy_desc desc;
+	u8 *data_ptr;
+	unsigned int node, seg_cnt;
+	int res;
+	cvmx_buf_ptr_pki_t bptr;
+
+	seg_cnt = pko_command.s.segs;
+	desc.cmd_words = cvmx_pko3_cvmseg_addr();
+
+	/* Allocate from local aura, assume all old-pools are local */
+	node = cvmx_get_node_num();
+	desc.aura_node = node;
+
+	/* Derive destination node from dq */
+	desc.port_node = dq >> 10;
+	dq &= (1 << 10) - 1;
+
+	desc.word_count = 1;
+	desc.last_pool = -1;
+
+	/* For small packets, write descriptors directly to CVMSEG
+	 * but for longer packets use jump_buf
+	 */
+	if (seg_cnt < 7 || OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X)) {
+		desc.jump_buf_size = 0;
+		desc.jump_buf_base_ptr = NULL;
+	} else {
+		unsigned int pko_aura = __cvmx_pko3_aura_get(desc.port_node);
+
+		cvmx_fpa3_gaura_t aura =
+			__cvmx_fpa3_gaura(pko_aura >> 10, pko_aura & 0x3ff);
+
+		/* Allocate from internal AURA, size is 4KiB */
+		desc.jump_buf_base_ptr = cvmx_fpa3_alloc(aura);
+
+		if (!desc.jump_buf_base_ptr)
+			return -ENOMEM;
+		desc.jump_buf_size = 0;
+	}
+
+	/* Native buffer-pointer for error checiing */
+	bptr.u64 = packet.u64;
+
+#if CVMX_ENABLE_PARAMETER_CHECKING
+	if (seg_cnt == 1 && bptr.size == pko_command.s.total_bytes) {
+		/*
+		 * Special case for native buffer pointer:
+		 * This is the only case where the native pointer-style can be
+		 * automatically identified, that is when an entire packet
+		 * fits into a single buffer by the PKI.
+		 * The use of the native buffers with this function
+		 * should be avoided.
+		 */
+		debug("%s: WARNING: Native buffer-pointer\n", __func__);
+	} else {
+		/* The buffer ptr is assume to be received in legacy format */
+		res = cvmx_pko3_legacy_bufptr_validate(
+			packet, pko_command.s.gather, pko_command.s.segs,
+			pko_command.s.total_bytes);
+		if (res < 0) {
+			debug("%s: ERROR: Not a valid packet pointer <%d>\n",
+			      __func__, res);
+			return CVMX_PKO_CMD_QUEUE_INIT_ERROR;
+		}
+	}
+#endif /* CVMX_ENABLE_PARAMETER_CHECKING */
+
+	/* Squash warnings */
+	(void)bptr;
+
+	/*** Translate legacy PKO fields into PKO3 PKO_SEND_HDR_S ***/
+
+	/* PKO_SEND_HDR_S is alwasy the first word in the command */
+	hdr_s = (void *)&desc.cmd_words[0];
+	hdr_s->u64 = 0;
+
+	/* Copy total packet size */
+	hdr_s->s.total = pko_command.s.total_bytes;
+
+	/* Endianness */
+	hdr_s->s.le = pko_command.s.le;
+
+	/* N2 is the same meaning */
+	if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+		hdr_s->s.n2 = 0; /* L2 allocate everything */
+	else
+		hdr_s->s.n2 = pko_command.s.n2;
+
+	/* DF bit has the same meaning */
+	hdr_s->s.df = pko_command.s.dontfree;
+
+	/* II bit has the same meaning */
+	hdr_s->s.ii = pko_command.s.ignore_i;
+
+	/* non-zero IP header offset requires L3/L4 checksum calculation */
+	if (cvmx_unlikely(pko_command.s.ipoffp1 > 0)) {
+		u8 ipoff, ip0, l4_proto = 0;
+
+		/* Get data pointer for header inspection below */
+		if (pko_command.s.gather) {
+			cvmx_buf_ptr_t *p_ptr;
+			cvmx_buf_ptr_t blk;
+
+			p_ptr = cvmx_phys_to_ptr(
+				cvmx_pko3_legacy_paddr(node, packet.s.addr));
+			blk = p_ptr[0];
+			data_ptr = cvmx_phys_to_ptr(
+				cvmx_pko3_legacy_paddr(node, blk.s.addr));
+		} else {
+			data_ptr = cvmx_phys_to_ptr(
+				cvmx_pko3_legacy_paddr(node, packet.s.addr));
+		}
+
+		/* Get IP header offset */
+		ipoff = pko_command.s.ipoffp1 - 1;
+
+		/* Parse IP header, version, L4 protocol */
+		hdr_s->s.l3ptr = ipoff;
+		ip0 = data_ptr[ipoff];
+
+		/* IPv4 header length, checksum offload */
+		if ((ip0 >> 4) == 4) {
+			hdr_s->s.l4ptr = hdr_s->s.l3ptr + ((ip0 & 0xf) << 2);
+			l4_proto = data_ptr[ipoff + 9];
+			hdr_s->s.ckl3 = 1; /* Only valid for IPv4 */
+		}
+		/* IPv6 header length is fixed, no checksum */
+		if ((ip0 >> 4) == 6) {
+			hdr_s->s.l4ptr = hdr_s->s.l3ptr + 40;
+			l4_proto = data_ptr[ipoff + 6];
+		}
+		/* Set L4 checksum algo based on L4 protocol */
+		if (l4_proto == 6)
+			hdr_s->s.ckl4 = /* TCP */ 2;
+		else if (l4_proto == 17)
+			hdr_s->s.ckl4 = /* UDP */ 1;
+		else if (l4_proto == 132)
+			hdr_s->s.ckl4 = /* SCTP */ 3;
+		else
+			hdr_s->s.ckl4 = /* Unknown */ 0;
+	}
+
+	if (pko_command.s.gather) {
+		/* Process legacy gather list */
+		cvmx_pko_buf_ptr_t gather_s;
+		cvmx_buf_ptr_t *p_ptr;
+		cvmx_buf_ptr_t blk;
+		unsigned int i;
+
+		/* Get gather list pointer */
+		p_ptr = cvmx_phys_to_ptr(
+			cvmx_pko3_legacy_paddr(node, packet.s.addr));
+		blk = p_ptr[0];
+		/* setup data_ptr */
+		data_ptr = cvmx_phys_to_ptr(
+			cvmx_pko3_legacy_paddr(node, blk.s.addr));
+
+		for (i = 0; i < seg_cnt; i++) {
+			if (cvmx_unlikely(cvmx_pko3_legacy_pool(
+						  &desc, blk.s.pool) < 0))
+				return CVMX_PKO_NO_MEMORY;
+
+			/* Insert PKO_SEND_GATHER_S for the current buffer */
+			gather_s.u64 = 0;
+			gather_s.s.subdc3 = CVMX_PKO_SENDSUBDC_GATHER;
+			gather_s.s.size = blk.s.size;
+			gather_s.s.i = blk.s.i;
+			gather_s.s.addr =
+				cvmx_pko3_legacy_paddr(node, blk.s.addr);
+
+			res = __cvmx_pko3_cmd_subdc_add(&desc, gather_s.u64);
+			if (res < 0)
+				return CVMX_PKO_NO_MEMORY;
+
+			/* get next bufptr */
+			blk = p_ptr[i + 1];
+		} /* for i */
+
+		/* Free original gather-list buffer */
+		if ((pko_command.s.ignore_i && !pko_command.s.dontfree) ||
+		    packet.s.i == pko_command.s.dontfree)
+			cvmx_fpa_free_nosync(p_ptr, packet.s.pool,
+					     (i - 1) / 16 + 1);
+	} else {
+		/* Process legacy linked buffer list */
+		cvmx_pko_buf_ptr_t gather_s;
+		cvmx_buf_ptr_t blk;
+		void *vptr;
+
+		data_ptr = cvmx_phys_to_ptr(
+			cvmx_pko3_legacy_paddr(node, packet.s.addr));
+		blk = packet;
+
+		/*
+		 * Legacy linked-buffers converted into flat gather list
+		 * so that the AURA can optionally be changed to reflect
+		 * the POOL number in the legacy pointers
+		 */
+		do {
+			/* Insert PKO_SEND_AURA_S if pool changes */
+			if (cvmx_unlikely(cvmx_pko3_legacy_pool(
+						  &desc, blk.s.pool) < 0))
+				return CVMX_PKO_NO_MEMORY;
+
+			/* Insert PKO_SEND_GATHER_S for the current buffer */
+			gather_s.u64 = 0;
+			gather_s.s.subdc3 = CVMX_PKO_SENDSUBDC_GATHER;
+			gather_s.s.size = blk.s.size;
+			gather_s.s.i = blk.s.i;
+			gather_s.s.addr =
+				cvmx_pko3_legacy_paddr(node, blk.s.addr);
+
+			res = __cvmx_pko3_cmd_subdc_add(&desc, gather_s.u64);
+			if (res < 0)
+				return CVMX_PKO_NO_MEMORY;
+
+			/* Get the next buffer pointer */
+			vptr = cvmx_phys_to_ptr(
+				cvmx_pko3_legacy_paddr(node, blk.s.addr));
+			memcpy(&blk, vptr - sizeof(blk), sizeof(blk));
+
+			/* Decrement segment count */
+			seg_cnt--;
+
+		} while (seg_cnt > 0);
+	}
+
+	/* This field indicates the presence of 3rd legacy command word */
+	/* NOTE: legacy 3rd word may contain CN78XX native phys addr already */
+	if (cvmx_unlikely(pko_command.s.rsp)) {
+		/* PTP bit in word3 is not supported -
+		 * can not be distibguished from larger phys_addr[42..41]
+		 */
+		if (pko_command.s.wqp) {
+			/* <addr> is an SSO WQE */
+			cvmx_wqe_word1_t *wqe_p;
+			cvmx_pko_send_work_t work_s;
+
+			work_s.u64 = 0;
+			work_s.s.subdc4 = CVMX_PKO_SENDSUBDC_WORK;
+			work_s.s.addr = addr;
+			/* Assume WQE is legacy format too */
+			wqe_p = cvmx_phys_to_ptr(addr + sizeof(u64));
+			work_s.s.grp = wqe_p->cn38xx.grp;
+			work_s.s.tt = wqe_p->tag_type;
+
+			res = __cvmx_pko3_cmd_subdc_add(&desc, work_s.u64);
+		} else {
+			cvmx_pko_send_mem_t mem_s;
+			/* MEMALG_SET broken on Pass1 */
+			if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_0)) {
+				debug("%s: ERROR: PKO byte-clear not supported\n",
+				      __func__);
+			}
+			/* <addr> is a physical address of byte clear */
+			mem_s.u64 = 0;
+			mem_s.s.subdc4 = CVMX_PKO_SENDSUBDC_MEM;
+			mem_s.s.addr = addr;
+			mem_s.s.dsz = MEMDSZ_B8;
+			mem_s.s.alg = MEMALG_SET;
+			mem_s.s.offset = 0;
+
+			res = __cvmx_pko3_cmd_subdc_add(&desc, mem_s.u64);
+		}
+		if (res < 0)
+			return CVMX_PKO_NO_MEMORY;
+	}
+
+	/* FAU counter binding reg0 */
+	if (pko_command.s.reg0) {
+		cvmx_pko_send_mem_t mem_s;
+
+		debug("%s: Legacy FAU commands: reg0=%#x sz0=%#x\n", __func__,
+		      pko_command.s.reg0, pko_command.s.size0);
+		mem_s.u64 = 0;
+		mem_s.s.subdc4 = CVMX_PKO_SENDSUBDC_MEM;
+		mem_s.s.addr = cvmx_ptr_to_phys(
+			CASTPTR(void, __cvmx_fau_sw_addr(pko_command.s.reg0)));
+		if (cvmx_likely(pko_command.s.size0 == CVMX_FAU_OP_SIZE_64))
+			mem_s.s.dsz = MEMDSZ_B64;
+		else if (pko_command.s.size0 == CVMX_FAU_OP_SIZE_32)
+			mem_s.s.dsz = MEMDSZ_B32;
+		else if (pko_command.s.size0 == CVMX_FAU_OP_SIZE_16)
+			mem_s.s.dsz = MEMDSZ_B16;
+		else
+			mem_s.s.dsz = MEMDSZ_B8;
+
+		if (mem_s.s.dsz == MEMDSZ_B16 || mem_s.s.dsz == MEMDSZ_B8)
+			debug("%s: ERROR: 8/16 bit decrement unsupported",
+			      __func__);
+
+		mem_s.s.offset = pko_command.s.subone0;
+		if (mem_s.s.offset)
+			mem_s.s.alg = MEMALG_SUB;
+		else
+			mem_s.s.alg = MEMALG_SUBLEN;
+
+		res = __cvmx_pko3_cmd_subdc_add(&desc, mem_s.u64);
+		if (res < 0)
+			return CVMX_PKO_NO_MEMORY;
+	}
+
+	/* FAU counter binding reg1 */
+	if (cvmx_unlikely(pko_command.s.reg1)) {
+		cvmx_pko_send_mem_t mem_s;
+
+		debug("%s: Legacy FAU commands: reg1=%#x sz1=%#x\n", __func__,
+		      pko_command.s.reg1, pko_command.s.size1);
+		mem_s.u64 = 0;
+		mem_s.s.subdc4 = CVMX_PKO_SENDSUBDC_MEM;
+		mem_s.s.addr = cvmx_ptr_to_phys(
+			CASTPTR(void, __cvmx_fau_sw_addr(pko_command.s.reg1)));
+		if (cvmx_likely(pko_command.s.size1 == CVMX_FAU_OP_SIZE_64))
+			mem_s.s.dsz = MEMDSZ_B64;
+		else if (pko_command.s.size1 == CVMX_FAU_OP_SIZE_32)
+			mem_s.s.dsz = MEMDSZ_B32;
+		else if (pko_command.s.size1 == CVMX_FAU_OP_SIZE_16)
+			mem_s.s.dsz = MEMDSZ_B16;
+		else
+			mem_s.s.dsz = MEMDSZ_B8;
+
+		if (mem_s.s.dsz == MEMDSZ_B16 || mem_s.s.dsz == MEMDSZ_B8)
+			printf("%s: ERROR: 8/16 bit decrement unsupported",
+			       __func__);
+
+		mem_s.s.offset = pko_command.s.subone1;
+		if (mem_s.s.offset)
+			mem_s.s.alg = MEMALG_SUB;
+		else
+			mem_s.s.alg = MEMALG_SUBLEN;
+
+		res = __cvmx_pko3_cmd_subdc_add(&desc, mem_s.u64);
+		if (res < 0)
+			return CVMX_PKO_NO_MEMORY;
+	}
+
+	/* These PKO_HDR_S fields are not used: */
+	/* hdr_s->s.ds does not have legacy equivalent, remains 0 */
+	/* hdr_s->s.format has no legacy equivalent, remains 0 */
+
+	/*** Finalize command buffer ***/
+	res = __cvmx_pko3_cmd_done(&desc);
+	if (res < 0)
+		return CVMX_PKO_NO_MEMORY;
+
+	/*** Send the PKO3 command into the Descriptor Queue ***/
+	pko_status =
+		__cvmx_pko3_lmtdma(desc.port_node, dq, desc.word_count, tag_sw);
+
+	/*** Map PKO3 result codes to legacy return values ***/
+	if (cvmx_likely(pko_status.s.dqstatus == PKO_DQSTATUS_PASS))
+		return CVMX_PKO_SUCCESS;
+
+	debug("%s: ERROR: failed to enqueue: %s\n", __func__,
+	      pko_dqstatus_error(pko_status.s.dqstatus));
+
+	if (pko_status.s.dqstatus == PKO_DQSTATUS_ALREADY)
+		return CVMX_PKO_PORT_ALREADY_SETUP;
+	if (pko_status.s.dqstatus == PKO_DQSTATUS_NOFPABUF ||
+	    pko_status.s.dqstatus == PKO_DQSTATUS_NOPKOBUF)
+		return CVMX_PKO_NO_MEMORY;
+	if (pko_status.s.dqstatus == PKO_DQSTATUS_NOTCREATED)
+		return CVMX_PKO_INVALID_QUEUE;
+	if (pko_status.s.dqstatus == PKO_DQSTATUS_BADSTATE)
+		return CVMX_PKO_CMD_QUEUE_INIT_ERROR;
+	if (pko_status.s.dqstatus == PKO_DQSTATUS_SENDPKTDROP)
+		return CVMX_PKO_INVALID_PORT;
+
+	return CVMX_PKO_INVALID_PORT;
+}