add support for multibyte nop sequences
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 5ae708d..da2eabd 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -2390,6 +2390,36 @@
 #ifdef XBYAK_UNDEF_JNL
 	#undef jnl
 #endif
+
+	void nop(int size = 1)
+	{
+		/*
+			AMD and Intel seem to agree on the same sequences for up to 9 bytes:
+			https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf
+			https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+		*/
+		static const uint8_t nopSeq[9][9] = {
+			{0x90},
+			{0x66, 0x90},
+			{0x0F, 0x1F, 0x00},
+			{0x0F, 0x1F, 0x40, 0x00},
+			{0x0F, 0x1F, 0x44, 0x00, 0x00},
+			{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
+			{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+			{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+			{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+		};
+		static const int numSeq = sizeof(nopSeq) / sizeof(nopSeq[0]);
+		while (size) {
+			int len = size > numSeq ? numSeq : size;
+			const uint8_t *seq = nopSeq[len - 1];
+			for (int i = 0; i < len; i++) {
+				db(seq[i]);
+			}
+			size -= len;
+		}
+	}
+
 #ifndef XBYAK_DONT_READ_LIST
 #include "xbyak_mnemonic.h"
 	void align(int x = 16)
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 95014a0..ea7c6a3 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -497,7 +497,6 @@
 void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf6, true); }
 void mwait() { db(0x0F); db(0x01); db(0xC9); }
 void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
-void nop() { db(0x90); }
 void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
 void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
 void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }