add lds/lss/les/lfs/lgs
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 9e58498..b414fd9 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -231,6 +231,18 @@
 	}
 }
 
+void putMemOp(const char *name, uint8 prefix, uint8 ext, uint8 code1, int code2, int bit = 32)
+{
+	printf("void %s(const Address& addr) { ", name);
+	if (prefix) printf("db(0x%02X); ", prefix);
+	printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2);
+}
+
+void putLoadSeg(const char *name, uint8 code1, int code2 = NONE)
+{
+	printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2);
+}
+
 void put()
 {
 	const int NO = CodeGenerator::NONE;
@@ -759,6 +771,9 @@
 		putGeneric(tbl, NUM_OF_ARRAY(tbl));
 		puts("void enter(uint16 x, uint8 y) { db(0xC8); dw(x); db(y); }");
 		puts("void int_(uint8 x) { db(0xCD); db(x); }");
+		putLoadSeg("lss", 0x0F, 0xB2);
+		putLoadSeg("lfs", 0x0F, 0xB4);
+		putLoadSeg("lgs", 0x0F, 0xB5);
 	}
 	{
 		const struct Tbl {
@@ -966,9 +981,7 @@
 		};
 		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 			const Tbl *p = &tbl[i];
-			printf("void %s(const Address& addr) { ", p->name);
-			if (p->prefix) printf("db(0x%02X); ", p->prefix);
-			printf("opModM(addr, Reg32(%d), 0x%02X, 0x%02X); }\n", p->ext, p->code1, p->code2);
+			putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2);
 		}
 		puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }");
 		puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }");
@@ -1734,6 +1747,8 @@
 		{ "popa", 0x61 },
 	};
 	putGeneric(tbl, NUM_OF_ARRAY(tbl));
+	putLoadSeg("lds", 0xC5, NONE);
+	putLoadSeg("les", 0xC4, NONE);
 }
 
 void put64()
@@ -1756,8 +1771,8 @@
 	};
 	putGeneric(tbl, NUM_OF_ARRAY(tbl));
 
-	puts("void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }");
-	puts("void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }");
+	putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64);
+	putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64);
 	puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }");
 	puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }");
 	puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }");
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 9fb7dfa..69b6bab 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -1173,6 +1173,30 @@
 			put("mov", REG64, tbl[i].a, tbl[i].b);
 		}
 	}
+	void putLoadSeg() const
+	{
+		const struct Tbl {
+			const char *name;
+			bool support64Bit;
+		} tbl[] = {
+#ifdef XBYAK32
+			{ "lds", false },
+			{ "les", false },
+#endif
+			{ "lss", true },
+			{ "lfs", true },
+			{ "lgs", true },
+		};
+		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+			const Tbl *p = &tbl[i];
+			put(p->name, REG16|REG32, MEM);
+#ifdef XBYAK64
+			if (p->support64Bit) {
+				put(p->name, REG64, MEM);
+			}
+#endif
+		}
+	}
 	// only nasm
 	void putMovImm64() const
 	{
@@ -2476,6 +2500,7 @@
 		putPushPop();
 		putTest();
 		separateFunc();
+		putLoadSeg();
 		putEtc();
 		putShift();
 		putShxd();
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index b9d7fe8..0efe4e1 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -1714,6 +1714,14 @@
 		db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
 		opAddr(addr, reg.getIdx(), immSize);
 	}
+	void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
+	{
+		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
+		if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+		rex(addr, reg);
+		db(code0); if (code1 != NONE) db(code1);
+		opAddr(addr, reg.getIdx());
+	}
 	void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
 	{
 		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index d1753c4..b7db103 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -456,6 +456,8 @@
 void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }
 void leave() { db(0xC9); }
 void lfence() { db(0x0F); db(0xAE); db(0xE8); }
+void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
+void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); }
 void lock() { db(0xF0); }
 void lodsb() { db(0xAC); }
 void lodsd() { db(0xAD); }
@@ -469,6 +471,7 @@
 void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
 void loopne(const char *label) { loopne(std::string(label)); }
 void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
+void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
 void lzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
 void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66);  opModR(reg1, reg2, 0x0F, 0xF7); }
 void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }
@@ -1639,6 +1642,8 @@
 void pushad() { db(0x60); }
 void pushfd() { db(0x9C); }
 void popa() { db(0x61); }
+void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); }
+void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); }
 #endif
 #ifndef XBYAK_NO_OP_NAMES
 void and(const Operand& op1, const Operand& op2) { and_(op1, op2); }