diff --git a/gcc/common.opt b/gcc/common.opt
index 67048db..e6f8fd3 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1281,7 +1281,7 @@ ffast-math
 Common
 
 ffat-lto-objects
-Common Var(flag_fat_lto_objects)
+Common Var(flag_fat_lto_objects) Init(1)
 Output lto objects containing both the intermediate language and binary output.
 
 ffinite-math-only
diff --git a/gcc/common/config/nds32/nds32-common.c b/gcc/common/config/nds32/nds32-common.c
index fb75956..66ea95c 100644
--- a/gcc/common/config/nds32/nds32-common.c
+++ b/gcc/common/config/nds32/nds32-common.c
@@ -53,6 +53,16 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
 
       return true;
 
+    case OPT_misr_secure_:
+      /* Check the valid security level: 0 1 2 3.  */
+      if (value < 0 || value > 3)
+	{
+	  error_at (loc, "for the option -misr-secure=X, the valid X "
+			 "must be: 0, 1, 2, or 3");
+	  return false;
+	}
+      return true;
+
     case OPT_mcache_block_size_:
       /* Check valid value: 4 8 16 32 64 128 256 512.  */
       if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
@@ -74,15 +84,69 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options nds32_option_optimization_table[] =
 {
-  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
-  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+#ifdef TARGET_DEFAULT_NO_MATH_ERRNO
+  /* Under some configuration, we would like to use -fno-math-errno by default
+     at all optimization levels for performance and code size consideration.
+     Please check gcc/config.gcc for more implementation details.  */
+  { OPT_LEVELS_ALL,               OPT_fmath_errno,         NULL, 0 },
+#endif
+#if TARGET_LINUX_ABI == 0
+  /* Disable -fdelete-null-pointer-checks by default in ELF toolchain.  */
+  { OPT_LEVELS_ALL,               OPT_fdelete_null_pointer_checks,
+							   NULL, 0 },
+#endif
+  /* Enable -fsched-pressure by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_fsched_pressure,     NULL, 1 },
+  /* Enable -fomit-frame-pointer by default at all optimization levels.  */
+  { OPT_LEVELS_ALL,               OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mrelax-hint by default at all optimization levels.  */
+  { OPT_LEVELS_ALL,               OPT_mrelax_hint,         NULL, 1 },
+  /* Enable -mabi-compatible by default at all optimization levels.  */
+  { OPT_LEVELS_ALL,               OPT_mabi_compatible,     NULL, 1 },
+  /* Enalbe -malways-align by default at -O1 and above, but not -Os or -Og.  */
+  { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_malways_align,       NULL, 1 },
   /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
-  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
-
-  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+  { OPT_LEVELS_SIZE,              OPT_mv3push,             NULL, 1 },
+  /* Enable -mload-store-opt by default at -Os.  */
+  { OPT_LEVELS_SIZE,              OPT_mload_store_opt,     NULL, 1 },
+  /* Enable -mregrename by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mregrename,          NULL, 1 },
+  /* Enable -mgcse by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mgcse,               NULL, 1 },
+  /* Enable -msign-conversion by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_msign_conversion,    NULL, 1 },
+  /* Enable -mscalbn-transform by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mscalbn_transform,   NULL, 1 },
+  /* Enable -mconst_remeterialization by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mconst_remater, NULL, 1 },
+  /* Enable -mcprop-acc by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mcprop_acc,   NULL, 1 },
+#ifdef TARGET_OS_DEFAULT_IFC
+  /* Enable -mifc by default at -Os, but it is useless under V2/V3M ISA.  */
+  { OPT_LEVELS_SIZE,              OPT_mifc,                NULL, 1 },
+#endif
+#ifdef TARGET_OS_DEFAULT_EX9
+  /* Enable -mex9 by default at -Os, but it is useless under V2/V3M ISA.  */
+  { OPT_LEVELS_SIZE,              OPT_mex9,                NULL, 1 },
+#endif
+
+  { OPT_LEVELS_NONE,              0,                       NULL, 0 }
 };
 
 /* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_EXCEPT_UNWIND_INFO.  */
+static enum unwind_info_type
+nds32_except_unwind_info (struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  if (TARGET_LINUX_ABI)
+    return UI_DWARF2;
+
+  return UI_SJLJ;
+}
+
+/* ------------------------------------------------------------------------ */
+
 
 /* Run-time Target Specification.  */
 
@@ -95,14 +159,22 @@ static const struct default_options nds32_option_optimization_table[] =
 
    Other MASK_XXX flags are set individually.
    By default we enable
-     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
-     TARGET_PERF_EXT : Generate performance extention instrcution.
-     TARGET_CMOV     : Generate conditional move instruction.  */
+     TARGET_16_BIT     : Generate 16/32 bit mixed length instruction.
+     TARGET_EXT_PERF   : Generate performance extention instrcution.
+     TARGET_EXT_PERF2  : Generate performance extention version 2 instrcution.
+     TARGET_EXT_STRING : Generate string extention instrcution.
+     TARGET_HW_ABS     : Generate hardware abs instruction.
+     TARGET_CMOV       : Generate conditional move instruction.  */
 #undef TARGET_DEFAULT_TARGET_FLAGS
 #define TARGET_DEFAULT_TARGET_FLAGS		\
   (TARGET_CPU_DEFAULT				\
+   | TARGET_DEFAULT_FPU_ISA			\
+   | TARGET_DEFAULT_FPU_FMA			\
    | MASK_16_BIT				\
-   | MASK_PERF_EXT				\
+   | MASK_EXT_PERF				\
+   | MASK_EXT_PERF2				\
+   | MASK_EXT_STRING				\
+   | MASK_HW_ABS				\
    | MASK_CMOV)
 
 #undef TARGET_HANDLE_OPTION
@@ -115,7 +187,7 @@ static const struct default_options nds32_option_optimization_table[] =
 /* Defining the Output Assembler Language.  */
 
 #undef TARGET_EXCEPT_UNWIND_INFO
-#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+#define TARGET_EXCEPT_UNWIND_INFO nds32_except_unwind_info
 
 /* ------------------------------------------------------------------------ */
 
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 1d5b23f..367a821 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -433,8 +433,28 @@ mips*-*-*)
 	;;
 nds32*)
 	cpu_type=nds32
-	extra_headers="nds32_intrinsic.h"
-	extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o"
+	extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc"
+	case ${target} in
+	  nds32*-*-linux*)
+	    extra_options="${extra_options} nds32/nds32-linux.opt"
+	    ;;
+	  nds32*-*-elf*)
+	    extra_options="${extra_options} nds32/nds32-elf.opt"
+	    ;;
+	  *)
+	    ;;
+	esac
+	extra_options="${extra_options} g.opt"
+	extra_objs="nds32-cost.o nds32-intrinsic.o nds32-md-auxiliary.o \
+		    nds32-pipelines-auxiliary.o nds32-predicates.o \
+		    nds32-memory-manipulation.o nds32-fp-as-gp.o \
+		    nds32-load-store-opt.o nds32-soft-fp-comm.o nds32-isr.o \
+		    nds32-regrename.o nds32-gcse.o nds32-relax-opt.o \
+		    nds32-sign-conversion.o \
+		    nds32-scalbn-transform.o nds32-lmwsmw.o \
+		    nds32-reg-utils.o nds32-const-remater.o \
+		    nds32-utils.o nds32-abi-compatible.o \
+		    nds32-cprop-acc.o"
 	;;
 nios2-*-*)
 	cpu_type=nios2
@@ -2265,17 +2285,67 @@ msp430*-*-*)
 	tmake_file="${tmake_file} msp430/t-msp430"
 	extra_gcc_objs="driver-msp430.o"
 	;;
-nds32le-*-*)
+nds32*-*-*)
 	target_cpu_default="0"
 	tm_defines="${tm_defines}"
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
-	tmake_file="nds32/t-nds32 nds32/t-mlibs"
-	;;
-nds32be-*-*)
-	target_cpu_default="0|MASK_BIG_ENDIAN"
-	tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
-	tmake_file="nds32/t-nds32 nds32/t-mlibs"
+	case ${target} in
+	  nds32le*-*-*)
+	    ;;
+	  nds32be-*-*)
+	    target_cpu_default="${target_cpu_default}|MASK_BIG_ENDIAN"
+	    tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+	    ;;
+	esac
+	case ${target} in
+	  nds32*-*-elf*)
+	    tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/elf.h nds32/nds32_intrinsic.h"
+	    tmake_file="nds32/t-nds32 nds32/t-elf"
+	    ;;
+	  nds32*-*-linux*)
+	    tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h nds32/linux.h nds32/nds32_intrinsic.h"
+	    tmake_file="${tmake_file} nds32/t-nds32 nds32/t-linux"
+	    ;;
+	esac
+	nds32_multilibs="${with_multilib_list}"
+	if test "$nds32_multilibs" = "default"; then
+	  nds32_multilibs=""
+	fi
+	nds32_multilibs=`echo $nds32_multilibs | sed -e 's/,/ /g'`
+	for nds32_multilib in ${nds32_multilibs}; do
+		case ${nds32_multilib} in
+		dsp | zol | v3m+ | graywolf )
+			TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG} ${nds32_multilib}"
+			;;
+		*)
+			echo "--with-multilib-list=${nds32_multilib} not supported."
+			exit 1
+		esac
+	done
+
+	# Handle --enable-default-relax setting.
+	if test x${enable_default_relax} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_RELAX=1"
+	fi
+	# Handle --enable-Os-default-ifc setting.
+	if test x${enable_Os_default_ifc} = xyes; then
+		tm_defines="${tm_defines} TARGET_OS_DEFAULT_IFC=1"
+	fi
+	# Handle --enable-Os-default-ex9 setting.
+	if test x${enable_Os_default_ex9} = xyes; then
+		tm_defines="${tm_defines} TARGET_OS_DEFAULT_EX9=1"
+	fi
+	# Handle --with-ext-dsp
+	if test x${with_ext_dsp} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1"
+	fi
+	if test x${with_ext_zol} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_HWLOOP=1"
+	fi
+	# Handle --with-16bit-ext, and default is on
+	if test x${with_ext_16bit} != xno; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_16BIT=1"
+	fi
+
 	;;
 nios2-*-*)
 	tm_file="elfos.h ${tm_file}"
@@ -4097,15 +4167,51 @@ case "${target}" in
 		;;
 
 	nds32*-*-*)
-		supported_defaults="arch nds32_lib"
+		supported_defaults="arch cpu nds32_lib float fpu_config memory_model"
 
 		# process --with-arch
 		case "${with_arch}" in
-		"" | v2 | v3 | v3m)
+		"" | v3 | v3j)
+			# OK
+			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0"
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
+			;;
+		v2 | v2j | v3m)
+			# OK
+			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0"
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=16"
+			;;
+		v3f)
+			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=1"
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
+			;;
+		v3s)
+			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=2"
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
+			;;
+		*)
+			echo "Cannot accept --with-arch=$with_arch, available values are: v2 v2j v3 v3j v3m v3f v3s" 1>&2
+			exit 1
+			;;
+		esac
+
+		# process --with-memory-model
+		case "${with_memory_model}" in
+		"" | fast | slow)
+			;;
+		*)
+			echo "Cannot accept --with-memory-model=$with_memory_model, available values are: fast slow" 1>&2
+			exit 1
+			;;
+		esac
+
+		# process --with-cpu
+		case "${with_cpu}" in
+		"" | n7 | n8 | e8 | s8 | n9 | n10 | d10 | graywolf | n12 | n13 | panther)
 			# OK
 			;;
 		*)
-			echo "Cannot accept --with-arch=$with_arch, available values are: v2 v3 v3m" 1>&2
+			echo "Cannot accept --with-cpu=$with_cpu, available values are: n7 n8 e8 s8 n9 n10 d10 graywolf n12 n13 panther" 1>&2
 			exit 1
 			;;
 		esac
@@ -4115,31 +4221,56 @@ case "${target}" in
 		"")
 			# the default library is newlib
 			with_nds32_lib=newlib
+			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
 			;;
 		newlib)
 			# OK
+			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
 			;;
 		mculib)
 			# OK
+			# for the arch=v3f or arch=v3s under mculib toolchain,
+			# we would like to set -fno-math-errno as default
+			case "${with_arch}" in
+			v3f | v3s)
+				tm_defines="${tm_defines} TARGET_DEFAULT_NO_MATH_ERRNO=1"
+				;;
+			esac
+			;;
+		glibc)
+			# OK
+			tm_defines="${tm_defines} TARGET_DEFAULT_TLSDESC_TRAMPOLINE=1"
+			;;
+		uclibc)
 			;;
 		*)
-			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2
+			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib glibc uclibc" 1>&2
+			exit 1
+			;;
+		esac
+
+		# process --with-float
+		case "${with_float}" in
+		"" | soft | hard)
+			# OK
+			;;
+		*)
+			echo "Cannot accept --with-float=$with_float, available values are: soft hard" 1>&2
+			exit 1
+			;;
+		esac
+
+		# process --with-config-fpu
+		case "${with_config_fpu}" in
+		"" | 0 | 1 | 2 | 3)
+			# OK
+			;;
+		*)
+			echo "Cannot accept --with-config-fpu=$with_config_fpu, available values from 0 to 7" 1>&2
 			exit 1
 			;;
 		esac
-		;;
 
-	nios2*-*-*)
-		supported_defaults="arch"
-			case "$with_arch" in
-			"" | r1 | r2)
-				# OK
-				;;
-			*)
-				echo "Unknown arch used in --with-arch=$with_arch" 1>&2
-				exit 1
-				;;
-			esac
 		;;
 
 	powerpc*-*-* | rs6000-*-*)
@@ -4527,7 +4658,7 @@ case ${target} in
 esac
 
 t=
-all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls"
+all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls memory_model"
 for option in $all_defaults
 do
 	eval "val=\$with_"`echo $option | sed s/-/_/g`
diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md
index bea42ee..6c92412 100644
--- a/gcc/config/nds32/constants.md
+++ b/gcc/config/nds32/constants.md
@@ -23,25 +23,176 @@
 (define_constants
   [(R8_REGNUM  8)
    (TA_REGNUM 15)
+   (TP_REGNUM 25)
    (FP_REGNUM 28)
    (GP_REGNUM 29)
    (LP_REGNUM 30)
    (SP_REGNUM 31)
+   (LB_REGNUM 98)
+   (LE_REGNUM 99)
+   (LC_REGNUM 100)
   ])
 
 
+;; The unpec operation index.
+(define_c_enum "unspec_element" [
+  UNSPEC_COPYSIGN
+  UNSPEC_FCPYNSD
+  UNSPEC_FCPYNSS
+  UNSPEC_FCPYSD
+  UNSPEC_FCPYSS
+  UNSPEC_CLIP
+  UNSPEC_CLIPS
+  UNSPEC_CLO
+  UNSPEC_PBSAD
+  UNSPEC_PBSADA
+  UNSPEC_BSE
+  UNSPEC_BSE_2
+  UNSPEC_BSP
+  UNSPEC_BSP_2
+  UNSPEC_FFB
+  UNSPEC_FFMISM
+  UNSPEC_FLMISM
+  UNSPEC_KDMBB
+  UNSPEC_KDMBT
+  UNSPEC_KDMTB
+  UNSPEC_KDMTT
+  UNSPEC_KHMBB
+  UNSPEC_KHMBT
+  UNSPEC_KHMTB
+  UNSPEC_KHMTT
+  UNSPEC_KSLRAW
+  UNSPEC_KSLRAWU
+  UNSPEC_SVA
+  UNSPEC_SVS
+  UNSPEC_WSBH
+  UNSPEC_LWUP
+  UNSPEC_LBUP
+  UNSPEC_SWUP
+  UNSPEC_SBUP
+  UNSPEC_LMWZB
+  UNSPEC_SMWZB
+  UNSPEC_UALOAD_HW
+  UNSPEC_UALOAD_W
+  UNSPEC_UALOAD_DW
+  UNSPEC_UASTORE_HW
+  UNSPEC_UASTORE_W
+  UNSPEC_UASTORE_DW
+  UNSPEC_GOTINIT
+  UNSPEC_GOT
+  UNSPEC_GOTOFF
+  UNSPEC_PLT
+  UNSPEC_TLSGD
+  UNSPEC_TLSLD
+  UNSPEC_TLSIE
+  UNSPEC_TLSLE
+  UNSPEC_ROUND
+  UNSPEC_VEC_COMPARE
+  UNSPEC_KHM
+  UNSPEC_KHMX
+  UNSPEC_CLIP_OV
+  UNSPEC_CLIPS_OV
+  UNSPEC_BITREV
+  UNSPEC_KABS
+  UNSPEC_LOOP_END
+  UNSPEC_TLS_DESC
+  UNSPEC_TLS_IE
+  UNSPEC_ADD32
+  UNSPEC_ICT
+])
+
+
 ;; The unspec_volatile operation index.
 (define_c_enum "unspec_volatile_element" [
-  UNSPEC_VOLATILE_FUNC_RETURN
+  UNSPEC_VOLATILE_EH_RETURN
   UNSPEC_VOLATILE_ISYNC
   UNSPEC_VOLATILE_ISB
+  UNSPEC_VOLATILE_DSB
+  UNSPEC_VOLATILE_MSYNC
+  UNSPEC_VOLATILE_MSYNC_ALL
+  UNSPEC_VOLATILE_MSYNC_STORE
   UNSPEC_VOLATILE_MFSR
   UNSPEC_VOLATILE_MFUSR
   UNSPEC_VOLATILE_MTSR
   UNSPEC_VOLATILE_MTUSR
   UNSPEC_VOLATILE_SETGIE_EN
   UNSPEC_VOLATILE_SETGIE_DIS
+  UNSPEC_VOLATILE_FMFCSR
+  UNSPEC_VOLATILE_FMTCSR
+  UNSPEC_VOLATILE_FMFCFG
+  UNSPEC_VOLATILE_JR_ITOFF
+  UNSPEC_VOLATILE_JR_TOFF
+  UNSPEC_VOLATILE_JRAL_ITON
+  UNSPEC_VOLATILE_JRAL_TON
+  UNSPEC_VOLATILE_RET_ITOFF
+  UNSPEC_VOLATILE_RET_TOFF
+  UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT
+  UNSPEC_VOLATILE_STANDBY_WAKE_GRANT
+  UNSPEC_VOLATILE_STANDBY_WAKE_DONE
+  UNSPEC_VOLATILE_TEQZ
+  UNSPEC_VOLATILE_TNEZ
+  UNSPEC_VOLATILE_TRAP
+  UNSPEC_VOLATILE_SETEND_BIG
+  UNSPEC_VOLATILE_SETEND_LITTLE
+  UNSPEC_VOLATILE_BREAK
+  UNSPEC_VOLATILE_SYSCALL
+  UNSPEC_VOLATILE_NOP
+  UNSPEC_VOLATILE_RES_DEP
+  UNSPEC_VOLATILE_DATA_DEP
+  UNSPEC_VOLATILE_LLW
+  UNSPEC_VOLATILE_SCW
+  UNSPEC_VOLATILE_CCTL_L1D_INVALALL
+  UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL
+  UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL
+  UNSPEC_VOLATILE_CCTL_IDX_WRITE
+  UNSPEC_VOLATILE_CCTL_IDX_READ
+  UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1
+  UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA
+  UNSPEC_VOLATILE_CCTL_IDX_WBINVAL
+  UNSPEC_VOLATILE_CCTL_VA_LCK
+  UNSPEC_VOLATILE_DPREF_QW
+  UNSPEC_VOLATILE_DPREF_HW
+  UNSPEC_VOLATILE_DPREF_W
+  UNSPEC_VOLATILE_DPREF_DW
+  UNSPEC_VOLATILE_TLBOP_TRD
+  UNSPEC_VOLATILE_TLBOP_TWR
+  UNSPEC_VOLATILE_TLBOP_RWR
+  UNSPEC_VOLATILE_TLBOP_RWLK
+  UNSPEC_VOLATILE_TLBOP_UNLK
+  UNSPEC_VOLATILE_TLBOP_PB
+  UNSPEC_VOLATILE_TLBOP_INV
+  UNSPEC_VOLATILE_TLBOP_FLUA
+  UNSPEC_VOLATILE_ENABLE_INT
+  UNSPEC_VOLATILE_DISABLE_INT
+  UNSPEC_VOLATILE_SET_PENDING_SWINT
+  UNSPEC_VOLATILE_CLR_PENDING_SWINT
+  UNSPEC_VOLATILE_CLR_PENDING_HWINT
+  UNSPEC_VOLATILE_GET_ALL_PENDING_INT
+  UNSPEC_VOLATILE_GET_PENDING_INT
+  UNSPEC_VOLATILE_SET_INT_PRIORITY
+  UNSPEC_VOLATILE_GET_INT_PRIORITY
+  UNSPEC_VOLATILE_SET_TRIG_LEVEL
+  UNSPEC_VOLATILE_SET_TRIG_EDGE
+  UNSPEC_VOLATILE_GET_TRIG_TYPE
+  UNSPEC_VOLATILE_RELAX_GROUP
+  UNSPEC_VOLATILE_INNERMOST_LOOP_BEGIN
+  UNSPEC_VOLATILE_INNERMOST_LOOP_END
+  UNSPEC_VOLATILE_OMIT_FP_BEGIN
+  UNSPEC_VOLATILE_OMIT_FP_END
   UNSPEC_VOLATILE_POP25_RETURN
+  UNSPEC_VOLATILE_SIGNATURE_BEGIN
+  UNSPEC_VOLATILE_SIGNATURE_END
+  UNSPEC_VOLATILE_NO_HWLOOP
+  UNSPEC_VOLATILE_NO_IFC_BEGIN
+  UNSPEC_VOLATILE_NO_IFC_END
+  UNSPEC_VOLATILE_NO_EX9_BEGIN
+  UNSPEC_VOLATILE_NO_EX9_END
+  UNSPEC_VOLATILE_UNALIGNED_FEATURE
+  UNSPEC_VOLATILE_ENABLE_UNALIGNED
+  UNSPEC_VOLATILE_DISABLE_UNALIGNED
+  UNSPEC_VOLATILE_RDOV
+  UNSPEC_VOLATILE_CLROV
+  UNSPEC_VOLATILE_HWLOOP_LAST_INSN
 ])
 
 ;; ------------------------------------------------------------------------
diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md
index 1f44a1a..8163f46 100644
--- a/gcc/config/nds32/constraints.md
+++ b/gcc/config/nds32/constraints.md
@@ -25,9 +25,6 @@
 ;; Machine-dependent floating: G H
 
 
-(define_register_constraint "w" "(TARGET_ISA_V3 || TARGET_ISA_V3M) ? LOW_REGS : NO_REGS"
-  "LOW register class $r0 ~ $r7 constraint for V3/V3M ISA")
-
 (define_register_constraint "l" "LOW_REGS"
   "LOW register class $r0 ~ $r7")
 
@@ -41,9 +38,59 @@
 (define_register_constraint "t" "R15_TA_REG"
   "Temporary Assist register $ta (i.e. $r15)")
 
+(define_register_constraint "e" "R8_REG"
+  "Function Entry register $r8)")
+
 (define_register_constraint "k" "STACK_REG"
   "Stack register $sp")
 
+(define_register_constraint "v" "R5_REG"
+  "Register $r5")
+
+(define_register_constraint "x" "FRAME_POINTER_REG"
+  "Frame pointer register $fp")
+
+(define_register_constraint "f"
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ? FP_REGS : NO_REGS"
+ "The Floating point registers $fs0 ~ $fs31")
+
+(define_register_constraint "A" "LOOP_REGS"
+  "Loop register class")
+
+(define_constraint "Iv00"
+  "Constant value 0"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "Iv01"
+  "Constant value 1"
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "Iv02"
+  "Constant value 2"
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "Iv04"
+  "Constant value 4"
+  (and (match_code "const_int")
+       (match_test "ival == 4")))
+
+(define_constraint "Iv08"
+  "Constant value 8"
+  (and (match_code "const_int")
+       (match_test "ival == 8")))
+
+(define_constraint "Iu01"
+  "Unsigned immediate 1-bit value"
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 0")))
+
+(define_constraint "Iu02"
+  "Unsigned immediate 2-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 2) && ival >= 0")))
 
 (define_constraint "Iu03"
   "Unsigned immediate 3-bit value"
@@ -65,6 +112,11 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 4) && ival >= -(1 << 4)")))
 
+(define_constraint "Cs05"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_double")
+       (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 4), (1 << 4))")))
+
 (define_constraint "Iu05"
   "Unsigned immediate 5-bit value"
   (and (match_code "const_int")
@@ -75,6 +127,11 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -31, 0)")))
 
+(define_constraint "Iu06"
+  "Unsigned immediate 6-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 6) && ival >= 0")))
+
 ;; Ip05 is special and dedicated for v3 movpi45 instruction.
 ;; movpi45 has imm5u field but the range is 16 ~ 47.
 (define_constraint "Ip05"
@@ -84,10 +141,10 @@
 		    && ival >= (0 + 16)
 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
 
-(define_constraint "Iu06"
+(define_constraint "IU06"
   "Unsigned immediate 6-bit value constraint for addri36.sp instruction"
   (and (match_code "const_int")
-       (match_test "ival < (1 << 6)
+       (match_test "ival < (1 << 8)
 		    && ival >= 0
 		    && (ival % 4 == 0)
 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
@@ -103,6 +160,11 @@
        (match_test "ival < (1 << 9) && ival >= 0")))
 
 
+(define_constraint "Is08"
+  "Signed immediate 8-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 7) && ival >= -(1 << 7)")))
+
 (define_constraint "Is10"
   "Signed immediate 10-bit value"
   (and (match_code "const_int")
@@ -113,6 +175,10 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 10) && ival >= -(1 << 10)")))
 
+(define_constraint "Is14"
+  "Signed immediate 14-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 13) && ival >= -(1 << 13)")))
 
 (define_constraint "Is15"
   "Signed immediate 15-bit value"
@@ -194,12 +260,21 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 19) && ival >= -(1 << 19)")))
 
+(define_constraint "Cs20"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_double")
+       (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 19), (1 << 19))")))
 
 (define_constraint "Ihig"
   "The immediate value that can be simply set high 20-bit"
   (and (match_code "const_int")
        (match_test "(ival != 0) && ((ival & 0xfff) == 0)")))
 
+(define_constraint "Chig"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "high")
+       (match_test "GET_CODE (XEXP (op, 0)) == CONST_DOUBLE")))
+
 (define_constraint "Izeb"
   "The immediate value 0xff"
   (and (match_code "const_int")
@@ -213,12 +288,12 @@
 (define_constraint "Ixls"
   "The immediate value 0x01"
   (and (match_code "const_int")
-       (match_test "TARGET_PERF_EXT && (ival == 0x1)")))
+       (match_test "TARGET_EXT_PERF && (ival == 0x1)")))
 
 (define_constraint "Ix11"
   "The immediate value 0x7ff"
   (and (match_code "const_int")
-       (match_test "TARGET_PERF_EXT && (ival == 0x7ff)")))
+       (match_test "TARGET_EXT_PERF && (ival == 0x7ff)")))
 
 (define_constraint "Ibms"
   "The immediate value with power of 2"
@@ -232,23 +307,70 @@
        (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M)
 		    && (IN_RANGE (exact_log2 (ival + 1), 1, 8))")))
 
+(define_constraint "CVp5"
+  "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVp5_p (op)")))
+
+(define_constraint "CVs5"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs5_p (op)")))
+
+(define_constraint "CVs2"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs2_p (op)")))
+
+(define_constraint "CVhi"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVhi_p (op)")))
 
 (define_memory_constraint "U33"
   "Memory constraint for 333 format"
   (and (match_code "mem")
-       (match_test "nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
+       (match_test "nds32_mem_format (op) == ADDRESS_POST_INC_LO_REG_IMM3U
+		    || nds32_mem_format (op) == ADDRESS_POST_MODIFY_LO_REG_IMM3U
+		    || nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
 
 (define_memory_constraint "U45"
   "Memory constraint for 45 format"
   (and (match_code "mem")
        (match_test "(nds32_mem_format (op) == ADDRESS_REG)
-		    && (GET_MODE (op) == SImode)")))
+		    && ((GET_MODE (op) == SImode)
+		       || (GET_MODE (op) == SFmode))")))
+
+(define_memory_constraint "Ufe"
+  "Memory constraint for fe format"
+  (and (match_code "mem")
+       (match_test "nds32_mem_format (op) == ADDRESS_R8_IMM7U
+		    && (GET_MODE (op) == SImode
+			|| GET_MODE (op) == SFmode)")))
 
 (define_memory_constraint "U37"
   "Memory constraint for 37 format"
   (and (match_code "mem")
        (match_test "(nds32_mem_format (op) == ADDRESS_SP_IMM7U
 		    || nds32_mem_format (op) == ADDRESS_FP_IMM7U)
-		    && (GET_MODE (op) == SImode)")))
+		    && (GET_MODE (op) == SImode
+			|| GET_MODE (op) == SFmode)")))
+
+(define_memory_constraint "Umw"
+  "Memory constraint for lwm/smw"
+  (and (match_code "mem")
+       (match_test "nds32_valid_smw_lwm_base_p (op)")))
+
+(define_memory_constraint "Da"
+  "Memory constraint for non-offset loads/stores"
+  (and (match_code "mem")
+       (match_test "REG_P (XEXP (op, 0))
+		    || (GET_CODE (XEXP (op, 0)) == POST_INC)")))
+
+(define_memory_constraint "Q"
+  "Memory constraint for no symbol_ref and const"
+  (and (match_code "mem")
+       (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+		     && nds32_float_mem_operand_p (op)")))
 
 ;; ------------------------------------------------------------------------
diff --git a/gcc/config/nds32/elf.h b/gcc/config/nds32/elf.h
new file mode 100644
index 0000000..315dcd8
--- /dev/null
+++ b/gcc/config/nds32/elf.h
@@ -0,0 +1,83 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_LINUX_ABI 0
+
+/* In the configure stage we may use options --enable-default-relax,
+   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
+   the default spec of passing --relax, --mifc, and --mex9 to linker.
+   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
+   so that we can customize them conveniently.  */
+#define LINK_SPEC \
+  " %{G*}" \
+  " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+  " %{shared:-shared}" \
+  NDS32_RELAX_SPEC \
+  NDS32_IFC_SPEC \
+  NDS32_EX9_SPEC
+
+#define LIB_SPEC \
+  " -lc -lgloss"
+
+#define LIBGCC_SPEC \
+  " -lgcc"
+
+/* The option -mno-ctor-dtor can disable constructor/destructor feature
+   by applying different crt stuff.  In the convention, crt0.o is the
+   startup file without constructor/destructor;
+   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
+   startup files with constructor/destructor.
+   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
+   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
+   currently provided by GCC for nds32 target.
+
+   For nds32 target so far:
+   If -mno-ctor-dtor, we are going to link
+   "crt0.o [user objects]".
+   If -mctor-dtor, we are going to link
+   "crt1.o crtbegin1.o [user objects] crtend1.o".
+
+   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
+   default behavior.  Check gcc/config.gcc for more information.  */
+#ifdef TARGET_DEFAULT_CTOR_DTOR
+  #define STARTFILE_SPEC \
+    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+    " %{mcrt-arg:crtarg.o%s}"
+  #define ENDFILE_SPEC \
+    " %{!mno-ctor-dtor:crtend1.o%s}"
+#else
+  #define STARTFILE_SPEC \
+    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
+    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
+    " %{mcrt-arg:crtarg.o%s}"
+  #define ENDFILE_SPEC \
+    " %{mctor-dtor|coverage:crtend1.o%s}"
+#endif
+
+#define STARTFILE_CXX_SPEC \
+  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+  " %{mcrt-arg:crtarg.o%s}"
+#define ENDFILE_CXX_SPEC \
+  " %{!mno-ctor-dtor:crtend1.o%s}"
diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md
index ab0f103..6023b9c 100644
--- a/gcc/config/nds32/iterators.md
+++ b/gcc/config/nds32/iterators.md
@@ -26,30 +26,99 @@
 ;; A list of integer modes that are up to one word long.
 (define_mode_iterator QIHISI [QI HI SI])
 
+;; A list of integer modes for one word and double word.
+(define_mode_iterator SIDI [SI DI])
+
 ;; A list of integer modes that are up to one half-word long.
 (define_mode_iterator QIHI [QI HI])
 
 ;; A list of the modes that are up to double-word long.
 (define_mode_iterator DIDF [DI DF])
 
+;; A list of the modes that are up to one word long vector.
+(define_mode_iterator VQIHI [V4QI V2HI])
+
+;; A list of the modes that are up to one word long vector and scalar.
+(define_mode_iterator VSQIHI [V4QI V2HI QI HI])
+
+(define_mode_iterator VSQIHIDI [V4QI V2HI QI HI DI])
+
+(define_mode_iterator VQIHIDI [V4QI V2HI DI])
+
+;; A list of the modes that are up to one word long vector
+;; and scalar for HImode.
+(define_mode_iterator VSHI [V2HI HI])
+
+;; A list of the modes that are up to double-word long.
+(define_mode_iterator ANYF [(SF "TARGET_FPU_SINGLE")
+			    (DF "TARGET_FPU_DOUBLE")])
 
 ;;----------------------------------------------------------------------------
 ;; Mode attributes.
 ;;----------------------------------------------------------------------------
 
-(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
+(define_mode_attr size [(QI "b") (HI "h") (SI "w") (SF "s") (DF "d")])
 
-(define_mode_attr byte [(QI "1") (HI "2") (SI "4")])
+(define_mode_attr byte [(QI "1") (HI "2") (SI "4") (V4QI "4") (V2HI "4")])
 
+(define_mode_attr bits [(V4QI "8") (QI "8") (V2HI "16") (HI "16") (DI "64")])
+
+(define_mode_attr VELT [(V4QI "QI") (V2HI "HI")])
 
 ;;----------------------------------------------------------------------------
 ;; Code iterators.
 ;;----------------------------------------------------------------------------
 
+;; shifts
+(define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert])
+
+(define_code_iterator shifts [ashift ashiftrt lshiftrt])
+
+(define_code_iterator shiftrt [ashiftrt lshiftrt])
+
+(define_code_iterator sat_plus [ss_plus us_plus])
+
+(define_code_iterator all_plus [plus ss_plus us_plus])
+
+(define_code_iterator sat_minus [ss_minus us_minus])
+
+(define_code_iterator all_minus [minus ss_minus us_minus])
+
+(define_code_iterator plus_minus [plus minus])
+
+(define_code_iterator extend [sign_extend zero_extend])
+
+(define_code_iterator sumax [smax umax])
+
+(define_code_iterator sumin [smin umin])
+
+(define_code_iterator sumin_max [smax umax smin umin])
 
 ;;----------------------------------------------------------------------------
 ;; Code attributes.
 ;;----------------------------------------------------------------------------
 
+;; shifts
+(define_code_attr shift
+  [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")])
+
+(define_code_attr su
+  [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")])
+
+(define_code_attr zs
+  [(sign_extend "s") (zero_extend "z")])
+
+(define_code_attr uk
+  [(plus "") (ss_plus "k") (us_plus "uk")
+   (minus "") (ss_minus "k") (us_minus "uk")])
+
+(define_code_attr opcode
+  [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")])
+
+(define_code_attr add_rsub
+  [(plus "a") (minus "rs")])
+
+(define_code_attr add_sub
+  [(plus "a") (minus "s")])
 
 ;;----------------------------------------------------------------------------
diff --git a/gcc/config/nds32/linux.h b/gcc/config/nds32/linux.h
new file mode 100644
index 0000000..36ddf2f
--- /dev/null
+++ b/gcc/config/nds32/linux.h
@@ -0,0 +1,78 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_LINUX_ABI 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#ifdef TARGET_DEFAULT_TLSDESC_TRAMPOLINE
+  #define NDS32_TLSDESC_TRAMPOLINE_SPEC \
+    " %{!mno-tlsdesc-trampoline:--mtlsdesc-trampoline}"
+#else
+  #define NDS32_TLSDESC_TRAMPOLINE_SPEC ""
+#endif
+
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      GNU_USER_TARGET_OS_CPP_BUILTINS();           \
+    }                                           \
+  while (0)
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+/* In the configure stage we may use options --enable-default-relax,
+   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
+   the default spec of passing --relax, --mifc, and --mex9 to linker.
+   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
+   so that we can customize them conveniently.  */
+#define LINK_SPEC \
+ " %{G*}" \
+ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+ " %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+    %{static:-static}}" \
+  NDS32_RELAX_SPEC \
+  NDS32_IFC_SPEC \
+  NDS32_EX9_SPEC \
+  NDS32_TLSDESC_TRAMPOLINE_SPEC
+
+#define LINK_PIE_SPEC "%{pie:%{!fno-pie:%{!fno-PIE:%{!static:-pie}}}} "
+
+
+/* The SYNC operations are implemented as library functions, not
+   INSN patterns.  As a result, the HAVE defines for the patterns are
+   not defined.  We need to define them to generate the corresponding
+   __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE
+   defines.
+   Ref: https://sourceware.org/ml/libc-alpha/2014-09/msg00322.html  */
+#define HAVE_sync_compare_and_swapqi 1
+#define HAVE_sync_compare_and_swaphi 1
+#define HAVE_sync_compare_and_swapsi 1
diff --git a/gcc/config/nds32/nds32-abi-compatible.c b/gcc/config/nds32/nds32-abi-compatible.c
new file mode 100644
index 0000000..f2ed006
--- /dev/null
+++ b/gcc/config/nds32/nds32-abi-compatible.c
@@ -0,0 +1,315 @@
+/* A Gimple-level pass of Andes NDS32 cpu for GNU compiler.
+   This pass collects the usage of float-point.
+
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "vec.h"
+#include "double-int.h"
+#include "input.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"   /* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"     /* For DFA state_t.  */
+#include "insn-codes.h"    /* For CODE_FOR_xxx.  */
+#include "reload.h"     /* For push_reload ().  */
+#include "flags.h"
+#include "input.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "predict.h"
+#include "basic-block.h"
+#include "bitmap.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"     /* For add_builtin_function ().  */
+#include "ggc.h"
+#include "tree-pass.h"
+#include "tree-ssa-alias.h"
+#include "fold-const.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimple-ssa.h"
+#include "ipa-ref.h"
+#include "lto-streamer.h"
+#include "cgraph.h"
+#include "tree-cfg.h"
+#include "tree-phinodes.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-pass.h"
+#include "gimple-pretty-print.h"
+#include "gimple-walk.h"
+
+/* Indicate the translation unit whether including floating-point arithmetic
+   or not.  */
+bool nds32_include_fp_arith = false;
+
+/* Return true if the return type and argument types of current function
+   pass the insepction. Furthermore, the global value NDS32_INCLUDE_FP_ARITH
+   is modified.  */
+
+static bool
+nds32_acd_func_rtn_args_check (tree fn_decl)
+{
+  tree fn_type = TREE_TYPE (fn_decl);
+  function_args_iterator iter;
+  tree arg_type = NULL_TREE;
+  tree rtn_type = NULL_TREE;
+  unsigned argno = 1;
+
+  gcc_assert (fn_type);
+
+  rtn_type = TREE_TYPE (fn_type);
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       " Check the return & arguments for function %s\n"
+	       "  Prototype:",
+	       fndecl_name (fn_decl));
+      print_generic_decl (dump_file, fn_decl, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  /* Check the return type.  */
+  if (FLOAT_TYPE_P (rtn_type)
+      || RECORD_OR_UNION_TYPE_P (rtn_type))
+    {
+      if (dump_file)
+	fprintf (dump_file, "  ! Return type is FP or record/union type\n");
+      nds32_include_fp_arith = true;
+
+      return false;
+    }
+
+  /* Check if the function has a variable argument list.  */
+  if (stdarg_p (fn_type))
+    {
+      if (dump_file)
+	fprintf (dump_file, "  ! Has variable argument list (i.e. ,...)\n");
+      nds32_include_fp_arith = true;
+
+      return false;
+    }
+
+  /* Check the arguments.  */
+  FOREACH_FUNCTION_ARGS (fn_type, arg_type, iter)
+    {
+      if (arg_type == void_type_node)
+	break;
+
+      if (FLOAT_TYPE_P (arg_type)
+	  || RECORD_OR_UNION_TYPE_P (arg_type))
+	{
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "  ! No.%d argument is FP or record/union type\n",
+		     argno);
+	  nds32_include_fp_arith = true;
+
+	  return false;
+	}
+      argno++;
+    }
+
+  if (dump_file)
+    fprintf (dump_file,
+	     "  >> Pass the inspection of return & arguments type\n");
+
+  return true;
+}
+
+/* Helper for nds32_abi_compatible. Return *TP if it is a floating-point
+   -related operand.  */
+
+static tree
+nds32_acd_walk_op_fn (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
+{
+  tree t = *tp;
+
+  if (t && TREE_TYPE (t)
+      && (FLOAT_TYPE_P (TREE_TYPE (t))
+	  || TREE_CODE (t) == REAL_CST
+	  || TREE_CODE (t) == COMPLEX_CST
+	  || TREE_CODE (t) == FLOAT_EXPR
+	  || TREE_CODE (t) == REALPART_EXPR))
+    {
+      *walk_subtrees = 0;
+      return t;
+    }
+
+  return NULL_TREE;
+}
+
+/* Helper for nds32_abi_compatible. Return non-NULL tree and set
+   *HANDLED_OPS_P to true if *GSI_P is an ASM stmt.  */
+
+static tree
+nds32_acd_walk_stmt_fn (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
+		       	struct walk_stmt_info *wi ATTRIBUTE_UNUSED)
+{
+  gimple *stmt = gsi_stmt (*gsi_p);
+
+  switch (gimple_code (stmt))
+    {
+    case GIMPLE_DEBUG:
+      *handled_ops_p = true;
+      break;
+
+    case GIMPLE_ASM:
+      *handled_ops_p = true;
+      return (tree) -1;
+      break;
+
+    case GIMPLE_CALL:
+	{
+	  tree call_decl = gimple_call_fndecl (stmt);
+	  if (!call_decl
+	      || !nds32_acd_func_rtn_args_check (call_decl))
+	    {
+	      *handled_ops_p = true;
+	      return call_decl;
+	    }
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+/* This function is the entry of ABI compatible detection pass.  */
+
+static int
+nds32_abi_compatible (void)
+{
+  basic_block bb;
+  struct walk_stmt_info wi;
+
+  memset (&wi, 0, sizeof (wi));
+
+  if (!nds32_acd_func_rtn_args_check (current_function_decl))
+    return 0;
+
+  if (dump_file)
+    fprintf (dump_file, "Check function body %s\n",
+	     function_name (cfun));
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      gimple *ret;
+      gimple_seq seq = bb_seq (bb);
+
+      ret = walk_gimple_seq (seq,
+			     nds32_acd_walk_stmt_fn,
+			     nds32_acd_walk_op_fn,
+			     &wi);
+      if (ret != NULL)
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, " ! NO PASS: ");
+	      print_gimple_stmt (dump_file, ret, 0, TDF_SLIM|TDF_RAW);
+	    }
+	  nds32_include_fp_arith = true;
+	  break;
+	}
+    }
+
+  if (dump_file)
+    if (!nds32_include_fp_arith)
+      fprintf (dump_file,
+	       " >> Pass the inspection of FP operand for function body\n");
+
+  return 0;
+}
+
+static bool
+gate_nds32_abi_compatible (void)
+{
+  return flag_nds32_abi_compatible
+    && !nds32_include_fp_arith;
+}
+
+const pass_data pass_data_nds32_abi_compatible =
+{
+  GIMPLE_PASS,				/* type */
+  "abi_compatible",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  TV_MACH_DEP,				/* tv_id */
+  ( PROP_cfg | PROP_ssa ),		/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  0,					/* todo_flags_finish */
+};
+
+class pass_nds32_abi_compatible : public gimple_opt_pass
+{
+public:
+  pass_nds32_abi_compatible (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_nds32_abi_compatible, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) { return gate_nds32_abi_compatible (); }
+  unsigned int execute (function *) { return nds32_abi_compatible (); }
+};
+
+gimple_opt_pass *
+make_pass_nds32_abi_compatible (gcc::context *ctxt)
+{
+  return new pass_nds32_abi_compatible (ctxt);
+}
diff --git a/gcc/config/nds32/nds32-const-remater.c b/gcc/config/nds32/nds32-const-remater.c
new file mode 100644
index 0000000..760e567
--- /dev/null
+++ b/gcc/config/nds32/nds32-const-remater.c
@@ -0,0 +1,461 @@
+/* Global CSE pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "rtl.h"
+#include "df.h"
+#include "alias.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "builtins.h"
+#include "cpplib.h"
+#include "params.h"
+#include "tree-pass.h"
+#include "dbgcnt.h"
+#include "df.h"
+#include "tm-constrs.h"
+
+/* ------------------------------------------------------------------------ */
+
+typedef struct reg_avail_info
+{
+  rtx insn;
+  unsigned int uint;
+  unsigned int regno;
+} reg_avail_info_t;
+
+
+static void find_common_const (void);
+static bool try_rematerialize (rtx_insn *, unsigned int,
+			       auto_vec *);
+static void clean_reg_avail_info (rtx ,const_rtx, void *);
+static rtx get_const (rtx);
+static bool addsi3_format_p (rtx);
+
+/* Search the register records.  */
+static bool
+try_rematerialize (rtx_insn *insn, unsigned int uint_r,
+		   auto_vec *reg_avail_infos)
+{
+  unsigned int i, uint_i, cl_i, cl_r, ct_i, ct_r;
+  rtx pat, src, dest, new_insn;
+  bool done = FALSE;
+  df_ref df_rec;
+  df_link *link;
+
+  cl_r = __builtin_clz (uint_r);
+  ct_r = __builtin_ctz (uint_r);
+  for (i = 0; i < reg_avail_infos->length (); ++i)
+    {
+      if ((*reg_avail_infos)[i].uint != uint_r)
+	{
+	  uint_i = (*reg_avail_infos)[i].uint;
+	  if (dump_file)
+	    fprintf (dump_file, "Try rematerialize %08x with const %08x\n",
+		     uint_r, uint_i);
+	  cl_i = __builtin_clz (uint_i);
+	  ct_i = __builtin_ctz (uint_i);
+	  src = SET_DEST (PATTERN ((*reg_avail_infos)[i].insn));
+	  dest = SET_DEST (PATTERN (insn));
+
+	  if (cl_r > cl_i
+	      && (uint_i >> (cl_r - cl_i)) == uint_r)
+	    {
+	      /* Right shift logical.  */
+	      pat = gen_rtx_LSHIFTRT (SImode, src, GEN_INT (cl_r - cl_i));
+	      done = TRUE;
+	      if (dump_file)
+		fprintf (dump_file,
+			 "Rematerialize %08x with const %08x by l>> %d\n",
+			 uint_r, uint_i, (cl_r - cl_i));
+	    }
+	  else if (ct_i >= ct_r
+		   && ((int) uint_i >> (ct_i - ct_r)) == (int) uint_r)
+	    {
+	      /* Right shift arithmetic.  */
+	      pat = gen_rtx_ASHIFTRT (SImode, src, GEN_INT (ct_i - ct_r));
+	      done = TRUE;
+	      if (dump_file)
+		fprintf (dump_file,
+			 "Rematerialize %08x with const %08x by a>> %d\n",
+			 uint_r, uint_i, (cl_r - cl_i));
+	    }
+	  else if (ct_r > ct_i
+		   && (uint_i << (ct_r - ct_i)) == uint_r)
+	    {
+	      /* Left shift.  */
+	      pat = gen_rtx_ASHIFT (SImode, src, GEN_INT (ct_r - ct_i));
+	      done = TRUE;
+	      if (dump_file)
+		fprintf (dump_file,
+			 "Rematerialize %08x with const %08x by << %d\n",
+			uint_r, uint_i, (ct_r - ct_i));
+	    }
+	  else if (TARGET_EXT_PERF && __builtin_popcount (uint_r ^ uint_i) == 1)
+	    {
+	      unsigned int val = uint_r ^ uint_i;
+	      if ((uint_r & (uint_r ^ uint_i)) != 0)
+		{
+		  if (val > (1 << 5))
+		    {
+		      /* Bit set.  */
+		      pat = gen_rtx_IOR (SImode, src, GEN_INT (val));
+		      done = TRUE;
+		      if (dump_file)
+			fprintf (dump_file,
+				 "Rematerialize %08x with const %08x by | %08x\n",
+				 uint_r, uint_i, uint_r ^ uint_i);
+		    }
+		  else
+		    {
+		      /* Transform to plus if immediate can fit addi45.  */
+		      pat = gen_rtx_PLUS (SImode, src, GEN_INT (val));
+		      done = TRUE;
+		      if (dump_file)
+			fprintf (dump_file,
+				 "Rematerialize %08x with const %08x by | %08x\n",
+				 uint_r, uint_i, uint_r ^ uint_i);
+		    }
+		}
+	      else
+		{
+		  if (val > (1 << 5))
+		    {
+		      /* Bit clear.  */
+		      pat = gen_rtx_AND (SImode, src, GEN_INT (~(uint_r ^ uint_i)));
+		      done = TRUE;
+		      if (dump_file)
+			fprintf (dump_file,
+				 "Rematerialize %08x with const %08x by & %08x\n",
+				 uint_r, uint_i, ~(uint_r ^ uint_i));
+		    }
+		  else
+		    {
+		      /* Transform to plus if immediate can fit subi45.  */
+		      pat = gen_rtx_PLUS (SImode, src, GEN_INT ((int) -val));
+		      done = TRUE;
+		      if (dump_file)
+			fprintf (dump_file,
+				 "Rematerialize %08x with const %08x by | %08x\n",
+				 uint_r, uint_i, uint_r ^ uint_i);
+		    }
+		}
+	    }
+	  else if  ((uint_r > uint_i ? uint_r - uint_i
+		     : uint_i - uint_r) < 0x4000)
+	    {
+	      /* Check insn_info existence because the instruction
+		 maybe be deleted.*/
+	      if (DF_INSN_INFO_GET ((*reg_avail_infos)[i].insn))
+		{
+		  df_rec = DF_INSN_DEFS ((*reg_avail_infos)[i].insn);
+		  link = DF_REF_CHAIN (df_rec);
+
+		  /* Do not use the dead instruction. */
+		  /* Do not use the original matched sethi.  */
+		  if (!link)
+		    continue;
+		  for (link = DF_REF_CHAIN (df_rec); link; link = link->next)
+		    {
+		      if (DF_REF_REGNO (link->ref) == 0
+			  || !DF_REF_INSN_INFO (link->ref)
+			  || DF_REF_INSN (link->ref) == insn)
+			break;
+		    }
+		  if (link)
+		    continue;
+		}
+
+	      /* Add.  */
+	      if (uint_r > uint_i)
+		{
+		  pat = gen_rtx_PLUS (SImode, src, GEN_INT (uint_r - uint_i));
+		  done = TRUE;
+		}
+	      else
+		{
+		  pat = gen_rtx_PLUS (SImode, src, GEN_INT ((HOST_WIDE_INT)
+							    uint_r - uint_i));
+		  done = TRUE;
+		}
+	    }
+
+	  if (done)
+	    {
+	      /* Emit the new instruction.  */
+	      new_insn = gen_move_insn (dest, pat);
+	      emit_insn_before (new_insn, insn);
+	      set_dst_reg_note (new_insn, REG_EQUAL, GEN_INT (uint_r), dest);
+	      return TRUE;
+	    }
+	}
+    }
+  return FALSE;
+}
+
+/* Clean the reg_avail_info value.  */
+static void
+clean_reg_avail_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
+		      void *data)
+{
+  unsigned int i;
+  auto_vec *reg_avail_infos =
+    (auto_vec *) data;
+
+  if (GET_CODE (dest) == SUBREG)
+    dest = SUBREG_REG (dest);
+
+  if (REG_P (dest))
+    for (i = 0; i < reg_avail_infos->length (); ++i)
+      if ((*reg_avail_infos)[i].regno == REGNO (dest)
+	  || (GET_MODE_SIZE (GET_MODE (dest)) == 8
+	      && (*reg_avail_infos)[i].regno == REGNO (dest) + 1))
+	reg_avail_infos->unordered_remove (i--);
+}
+
+/* Return the const if the setting value is a constant integer.  */
+static rtx
+get_const (rtx insn)
+{
+  rtx note;
+
+  if (GET_CODE (PATTERN (insn)) != SET
+      || !REG_P (SET_DEST (PATTERN (insn)))
+      || GET_MODE (SET_DEST (PATTERN (insn))) != SImode)
+    return NULL_RTX;
+
+  /* Constant move instruction.  */
+  if (CONST_INT_P (XEXP (PATTERN (insn), 1)))
+    return XEXP (PATTERN (insn), 1);
+
+  note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
+  if (!note)
+    note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
+
+  if (note && CONST_INT_P (XEXP (note, 0)))
+    return XEXP (note, 0);
+
+  return NULL_RTX;
+}
+
+/* Return true if the instruction is addi format.  */
+static bool
+addsi3_format_p (rtx insn)
+{
+  if (GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
+      && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT)
+    return TRUE;
+
+  return FALSE;
+}
+
+/* Return true if the instruction is sethi format.  */
+static bool
+sethi_format_p (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) == SET
+      && GET_CODE (XEXP (PATTERN (insn), 1)) == CONST_INT
+      && satisfies_constraint_Ihig (XEXP (PATTERN (insn), 1)))
+    return TRUE;
+  return FALSE;
+}
+
+/* Return true if the register definition only be used by insn.  */
+static bool
+use_only_p (rtx insn)
+{
+  rtx def_insn;
+  df_ref rec;
+  df_link *link;
+  rec = DF_INSN_USES (insn);
+  link = DF_REF_CHAIN (rec);
+
+  if (!link
+      || DF_REF_REGNO (link->ref) == 0
+      || !DF_REF_INSN_INFO (link->ref))
+    return FALSE;
+
+  def_insn = DF_REF_INSN (link->ref);
+
+  if (!sethi_format_p (def_insn))
+    return FALSE;
+
+  rec = DF_INSN_DEFS (def_insn);
+  link = DF_REF_CHAIN (rec);
+
+  if (!link
+      || link->next
+      || DF_REF_REGNO (link->ref) == 0
+      || !DF_REF_INSN_INFO (link->ref))
+    return FALSE;
+
+  return TRUE;
+}
+
+/* Traverse instructions in each basic block, and save the value of
+   setting constant instructions.  */
+static void
+find_common_const (void)
+{
+  basic_block bb;
+  unsigned int i;
+
+  /* Save register constant value.  */
+  auto_vec reg_avail_infos;
+  reg_avail_info_t reg_avail_info;
+
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx_insn *insn;
+      rtx dest, cst;
+
+      /* Clear the vector.  */
+      while (!reg_avail_infos.is_empty ())
+	reg_avail_infos.pop ();
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  if (CALL_P (insn))
+	    {
+	      /* Clean hard register.  */
+	      for (i = 0; i < reg_avail_infos.length ();)
+		{
+		  if (HARD_REGISTER_NUM_P (reg_avail_infos[i].regno)
+		      && call_used_regs[reg_avail_infos[i].regno])
+		    reg_avail_infos.unordered_remove (i);
+		  else
+		    ++i;
+		}
+	    }
+
+	  cst = get_const (insn);
+	  if (cst == NULL_RTX)
+	    {
+	      note_stores (PATTERN (insn), clean_reg_avail_info,
+			   ®_avail_infos);
+	      continue;
+	    }
+
+	  dest = SET_DEST (PATTERN (insn));
+
+	  if (addsi3_format_p (insn)
+	      && use_only_p (insn)
+	      && try_rematerialize (insn, XUINT (cst, 0), ®_avail_infos))
+	    {
+	      delete_insn (insn);
+	      df_insn_rescan_all ();
+	    }
+
+	  note_stores (PATTERN (insn), clean_reg_avail_info, ®_avail_infos);
+	  reg_avail_info.insn = insn;
+	  reg_avail_info.uint = XUINT (cst, 0);
+	  reg_avail_info.regno = REGNO (dest);
+	  if (dump_file)
+	    fprintf (dump_file, "Find const %08x on %u\n",
+		     reg_avail_info.uint, reg_avail_info.regno);
+	  reg_avail_infos.safe_push (reg_avail_info);
+	}
+    }
+}
+
+static unsigned int
+nds32_const_remater_opt (void)
+{
+  df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
+  df_note_add_problem ();
+  df_insn_rescan_all ();
+  df_analyze ();
+
+  find_common_const ();
+
+  df_insn_rescan_all ();
+  return 0;
+}
+
+const pass_data pass_data_nds32_const_remater_opt =
+{
+  RTL_PASS,				/* type */
+  "const_remater_opt",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_df_finish,	/* todo_flags_finish */
+};
+
+class pass_nds32_const_remater_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_const_remater_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_const_remater_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) { return flag_nds32_const_remater_opt; }
+  unsigned int execute (function *) { return nds32_const_remater_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_const_remater_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_const_remater_opt (ctxt);
+}
+
+/* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-cost.c b/gcc/config/nds32/nds32-cost.c
index e6a29fc..881d086 100644
--- a/gcc/config/nds32/nds32-cost.c
+++ b/gcc/config/nds32/nds32-cost.c
@@ -24,73 +24,447 @@
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
-#include "target.h"
-#include "rtl.h"
 #include "tree.h"
-#include "tm_p.h"
-#include "optabs.h"		/* For GEN_FCN.  */
+#include "rtl.h"
+#include "df.h"
+#include "alias.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
+#include "expr.h"
 #include "recog.h"
+#include "diagnostic-core.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "tm_p.h"
 #include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "builtins.h"
+#include "tree-pass.h"
 
 /* ------------------------------------------------------------------------ */
 
-bool
-nds32_rtx_costs_impl (rtx x,
-		      machine_mode mode ATTRIBUTE_UNUSED,
-		      int outer_code,
-		      int opno ATTRIBUTE_UNUSED,
-		      int *total,
-		      bool speed)
-{
-  int code = GET_CODE (x);
+typedef bool (*rtx_cost_func) (rtx, int, int, int, int*);
 
-  /* According to 'speed', goto suitable cost model section.  */
-  if (speed)
-    goto performance_cost;
-  else
-    goto size_cost;
+struct rtx_cost_model_t {
+  rtx_cost_func speed_prefer;
+  rtx_cost_func size_prefer;
+};
 
+static rtx_cost_model_t rtx_cost_model;
 
-performance_cost:
-  /* This is section for performance cost model.  */
+static int insn_size_16bit; /* Initial at nds32_init_rtx_costs.  */
+static const int insn_size_32bit = 4;
+
+static bool
+nds32_rtx_costs_speed_prefer (rtx x ATTRIBUTE_UNUSED,
+			      int code,
+			      int outer_code ATTRIBUTE_UNUSED,
+			      int opno ATTRIBUTE_UNUSED,
+			      int *total)
+{
+  rtx op0;
+  rtx op1;
+  enum machine_mode mode = GET_MODE (x);
+  /* Scale cost by mode size.  */
+  int cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
 
-  /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
-     We treat it as 4-cycle cost for each instruction
-     under performance consideration.  */
   switch (code)
     {
-    case SET:
-      /* For 'SET' rtx, we need to return false
-         so that it can recursively calculate costs.  */
-      return false;
-
     case USE:
       /* Used in combine.c as a marker.  */
       *total = 0;
-      break;
+      return true;
+
+    case CONST_INT:
+      /* When not optimizing for size, we care more about the cost
+	 of hot code, and hot code is often in a loop.  If a constant
+	 operand needs to be forced into a register, we will often be
+	 able to hoist the constant load out of the loop, so the load
+	 should not contribute to the cost.  */
+      if (outer_code == SET || outer_code == PLUS)
+	*total = satisfies_constraint_Is20 (x) ? 0 : 4;
+      else if (outer_code == AND || outer_code == IOR || outer_code == XOR
+	       || outer_code == MINUS)
+	*total = satisfies_constraint_Iu15 (x) ? 0 : 4;
+      else if (outer_code == ASHIFT || outer_code == ASHIFTRT
+	       || outer_code == LSHIFTRT)
+	*total = satisfies_constraint_Iu05 (x) ? 0 : 4;
+      else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
+	       || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
+	*total = satisfies_constraint_Is16 (x) ? 0 : 4;
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST:
+    case LO_SUM:
+    case HIGH:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case MEM:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case SET:
+      op0 = SET_DEST (x);
+      op1 = SET_SRC (x);
+      mode = GET_MODE (op0);
+      /* Scale cost by mode size.  */
+      cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
+
+      switch (GET_CODE (op1))
+	{
+	case REG:
+	case SUBREG:
+	  /* Register move and Store instructions.  */
+	  if ((REG_P (op0) || MEM_P (op0))
+	      && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = cost;
+	  return true;
+
+	case MEM:
+	  /* Load instructions.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = cost;
+	  return true;
+
+	case CONST_INT:
+	  /* movi instruction.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
+	    {
+	      if (satisfies_constraint_Is20 (op1))
+		*total = COSTS_N_INSNS (1) - 1;
+	      else
+		*total = COSTS_N_INSNS (2);
+	    }
+	  else
+	    *total = cost;
+	  return true;
+
+	case CONST:
+	case SYMBOL_REF:
+	case LABEL_REF:
+	  /* la instruction.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1) - 1;
+	  else
+	    *total = cost;
+	  return true;
+	case VEC_SELECT:
+	  *total = cost;
+	  return true;
+
+	default:
+	  *total = cost;
+	  return true;
+	}
+
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
+	       || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
+	{
+	  /* ALU_SHIFT */
+	  if (TARGET_PIPELINE_PANTHER)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	}
+      else if ((GET_CODE (op1) == CONST_INT
+		&& satisfies_constraint_Is15 (op1))
+		|| REG_P (op1))
+	/* ADD instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* ADD instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case MINUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
+	       || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
+	{
+	  /* ALU_SHIFT */
+	  if (TARGET_PIPELINE_PANTHER)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	}
+      else if ((GET_CODE (op0) == CONST_INT
+		&& satisfies_constraint_Is15 (op0))
+		|| REG_P (op0))
+	/* SUB instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SUB instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case TRUNCATE:
+      /* TRUNCATE and AND behavior is same. */
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (NDS32_EXT_DSP_P ())
+	{
+	  /* We prefer (and (ior) (ior)) than (ior (and) (and)) for
+	     synthetize pk** and insb instruction.  */
+	  if (code == AND && GET_CODE (op0) == IOR && GET_CODE (op1) == IOR)
+	    return COSTS_N_INSNS (1);
+
+	  if (code == IOR && GET_CODE (op0) == AND && GET_CODE (op1) == AND)
+	    return COSTS_N_INSNS (10);
+	}
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == ASHIFT || GET_CODE (op0) == LSHIFTRT)
+	{
+	  /* ALU_SHIFT */
+	  if (TARGET_PIPELINE_PANTHER)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	}
+      else if ((GET_CODE (op1) == CONST_INT
+	       && satisfies_constraint_Iu15 (op1))
+	       || REG_P (op1))
+	/* AND, OR, XOR instructions */
+	*total = COSTS_N_INSNS (1);
+      else if (code == AND || GET_CODE (op0) == NOT)
+	/* BITC instruction */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* AND, OR, XOR instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
 
     case MULT:
+      if (GET_MODE (x) == DImode
+	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+	  || GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+	/* MUL instructions */
+	*total = COSTS_N_INSNS (1);
+      else if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == PLUS || outer_code == MINUS)
+	{
+	  /* ALU_SHIFT */
+	  if (TARGET_PIPELINE_PANTHER)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	}
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* MUL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* MUL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+
+      if (TARGET_MUL_SLOW)
+	*total += COSTS_N_INSNS (4);
+
+      return true;
+
+    case LSHIFTRT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == PLUS || outer_code == MINUS
+	       || outer_code == AND || outer_code == IOR
+	       || outer_code == XOR)
+	{
+	  /* ALU_SHIFT */
+	  if (TARGET_PIPELINE_PANTHER)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	}
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* SRL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SRL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case ASHIFT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == AND || outer_code == IOR
+	       || outer_code == XOR)
+	{
+	  /* ALU_SHIFT */
+	  if (TARGET_PIPELINE_PANTHER)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	}
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* SLL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SLL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case ASHIFTRT:
+    case ROTATERT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* ROTR, SLL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* ROTR, SLL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case LT:
+    case LTU:
+      if (outer_code == SET)
+	{
+	  if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	      && satisfies_constraint_Iu15 (XEXP (x, 1)))
+	      || REG_P (XEXP (x, 1)))
+	    /* SLT, SLTI instructions */
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    /* SLT, SLT instructions: IMM out of range.  */
+	    *total = COSTS_N_INSNS (2);
+	}
+      else
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case EQ:
+    case NE:
+    case GE:
+    case LE:
+    case GT:
+      /* branch */
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case IF_THEN_ELSE:
+      if (GET_CODE (XEXP (x, 1)) == LABEL_REF)
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      else
+	/* cmovz, cmovn instructions */
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case LABEL_REF:
+      if (outer_code == IF_THEN_ELSE)
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+      if (MEM_P (XEXP (x, 0)))
+	/* Using memory access. */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* Zero extend and sign extend instructions.  */
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case NEG:
+    case NOT:
       *total = COSTS_N_INSNS (1);
-      break;
+      return true;
 
     case DIV:
     case UDIV:
     case MOD:
     case UMOD:
-      *total = COSTS_N_INSNS (7);
-      break;
+      *total = COSTS_N_INSNS (20);
+      return true;
 
-    default:
+    case CALL:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case CLZ:
+    case SMIN:
+    case SMAX:
+    case ZERO_EXTRACT:
+      if (TARGET_EXT_PERF)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (3);
+      return true;
+    case VEC_SELECT:
       *total = COSTS_N_INSNS (1);
-      break;
-    }
-
-  return true;
-
+      return true;
 
-size_cost:
-  /* This is section for size cost model.  */
+    default:
+      *total = COSTS_N_INSNS (3);
+      return true;
+    }
+}
 
+static bool
+nds32_rtx_costs_size_prefer (rtx x,
+			     int code,
+			     int outer_code,
+			     int opno ATTRIBUTE_UNUSED,
+			     int *total)
+{
   /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
      We treat it as 4-byte cost for each instruction
      under code size consideration.  */
@@ -98,7 +472,7 @@ size_cost:
     {
     case SET:
       /* For 'SET' rtx, we need to return false
-         so that it can recursively calculate costs.  */
+	 so that it can recursively calculate costs.  */
       return false;
 
     case USE:
@@ -108,92 +482,169 @@ size_cost:
 
     case CONST_INT:
       /* All instructions involving constant operation
-         need to be considered for cost evaluation.  */
+	 need to be considered for cost evaluation.  */
       if (outer_code == SET)
 	{
 	  /* (set X imm5s), use movi55, 2-byte cost.
 	     (set X imm20s), use movi, 4-byte cost.
 	     (set X BIG_INT), use sethi/ori, 8-byte cost.  */
 	  if (satisfies_constraint_Is05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
+	    *total = insn_size_16bit;
 	  else if (satisfies_constraint_Is20 (x))
-	    *total = COSTS_N_INSNS (1);
+	    *total = insn_size_32bit;
 	  else
-	    *total = COSTS_N_INSNS (2);
+	    *total = insn_size_32bit * 2;
 	}
       else if (outer_code == PLUS || outer_code == MINUS)
 	{
 	  /* Possible addi333/subi333 or subi45/addi45, 2-byte cost.
 	     General case, cost 1 instruction with 4-byte.  */
 	  if (satisfies_constraint_Iu05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
+	    *total = insn_size_16bit;
 	  else
-	    *total = COSTS_N_INSNS (1);
+	    *total = insn_size_32bit;
 	}
       else if (outer_code == ASHIFT)
 	{
 	  /* Possible slli333, 2-byte cost.
 	     General case, cost 1 instruction with 4-byte.  */
 	  if (satisfies_constraint_Iu03 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
+	    *total = insn_size_16bit;
 	  else
-	    *total = COSTS_N_INSNS (1);
+	    *total = insn_size_32bit;
 	}
       else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT)
 	{
 	  /* Possible srai45 or srli45, 2-byte cost.
 	     General case, cost 1 instruction with 4-byte.  */
 	  if (satisfies_constraint_Iu05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
+	    *total = insn_size_16bit;
 	  else
-	    *total = COSTS_N_INSNS (1);
+	    *total = insn_size_32bit;
 	}
       else
 	{
 	  /* For other cases, simply set it 4-byte cost.  */
-	  *total = COSTS_N_INSNS (1);
+	  *total = insn_size_32bit;
 	}
       break;
 
     case CONST_DOUBLE:
       /* It requires high part and low part processing, set it 8-byte cost.  */
-      *total = COSTS_N_INSNS (2);
+      *total = insn_size_32bit * 2;
+      break;
+
+    case CONST:
+    case SYMBOL_REF:
+      *total = insn_size_32bit * 2;
       break;
 
     default:
       /* For other cases, generally we set it 4-byte cost
-         and stop resurively traversing.  */
-      *total = COSTS_N_INSNS (1);
+	 and stop resurively traversing.  */
+      *total = insn_size_32bit;
       break;
     }
 
   return true;
 }
 
-int
-nds32_address_cost_impl (rtx address,
-			 machine_mode mode ATTRIBUTE_UNUSED,
-			 addr_space_t as ATTRIBUTE_UNUSED,
-			 bool speed)
+void
+nds32_init_rtx_costs (void)
+{
+  rtx_cost_model.speed_prefer = nds32_rtx_costs_speed_prefer;
+  rtx_cost_model.size_prefer  = nds32_rtx_costs_size_prefer;
+
+  if (TARGET_16_BIT)
+    insn_size_16bit = 2;
+  else
+    insn_size_16bit = 4;
+}
+
+/* This target hook describes the relative costs of RTL expressions.
+   Return 'true' when all subexpressions of x have been processed.
+   Return 'false' to sum the costs of sub-rtx, plus cost of this operation.
+   Refer to gcc/rtlanal.c for more information.  */
+bool
+nds32_rtx_costs_impl (rtx x,
+		      machine_mode mode ATTRIBUTE_UNUSED,
+		      int outer_code,
+		      int opno,
+		      int *total,
+		      bool speed)
+{
+  int code = GET_CODE (x);
+
+  /* According to 'speed', use suitable cost model section.  */
+  if (speed)
+    return rtx_cost_model.speed_prefer(x, code, outer_code, opno, total);
+  else
+    return rtx_cost_model.size_prefer(x, code, outer_code, opno, total);
+}
+
+
+int nds32_address_cost_speed_prefer (rtx address)
 {
   rtx plus0, plus1;
   enum rtx_code code;
 
   code = GET_CODE (address);
 
-  /* According to 'speed', goto suitable cost model section.  */
-  if (speed)
-    goto performance_cost;
-  else
-    goto size_cost;
+  switch (code)
+    {
+    case POST_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+      /* We encourage that rtx contains
+	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
+      return COSTS_N_INSNS (1) - 2;
+
+    case SYMBOL_REF:
+      /* We can have gp-relative load/store for symbol_ref.
+	Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case REG:
+      /* Simply return 4-byte costs.  */
+      return COSTS_N_INSNS (1) - 2;
+
+    case PLUS:
+      /* We do not need to check if the address is a legitimate address,
+	 because this hook is never called with an invalid address.
+	 But we better check the range of
+	 const_int value for cost, if it exists.  */
+      plus0 = XEXP (address, 0);
+      plus1 = XEXP (address, 1);
+
+      if (REG_P (plus0) && CONST_INT_P (plus1))
+	return COSTS_N_INSNS (1) - 2;
+      else if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return COSTS_N_INSNS (1) - 1;
+      else if (REG_P (plus0) && REG_P (plus1))
+	return COSTS_N_INSNS (1);
+
+      /* For other 'plus' situation, make it cost 4-byte.  */
+      return COSTS_N_INSNS (1);
 
-performance_cost:
-  /* This is section for performance cost model.  */
+    default:
+      break;
+    }
 
-  /* FALLTHRU, currently we use same cost model as size_cost.  */
+  return COSTS_N_INSNS (4);
 
-size_cost:
-  /* This is section for size cost model.  */
+}
+
+int nds32_address_cost_speed_fwprop (rtx address)
+{
+  rtx plus0, plus1;
+  enum rtx_code code;
+
+  code = GET_CODE (address);
 
   switch (code)
     {
@@ -201,18 +652,18 @@ size_cost:
     case POST_INC:
     case POST_DEC:
       /* We encourage that rtx contains
-         POST_MODIFY/POST_INC/POST_DEC behavior.  */
+	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
       return 0;
 
     case SYMBOL_REF:
       /* We can have gp-relative load/store for symbol_ref.
-         Have it 4-byte cost.  */
-      return COSTS_N_INSNS (1);
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
 
     case CONST:
       /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
-         Have it 4-byte cost.  */
-      return COSTS_N_INSNS (1);
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
 
     case REG:
       /* Simply return 4-byte costs.  */
@@ -220,21 +671,25 @@ size_cost:
 
     case PLUS:
       /* We do not need to check if the address is a legitimate address,
-         because this hook is never called with an invalid address.
-         But we better check the range of
-         const_int value for cost, if it exists.  */
+	 because this hook is never called with an invalid address.
+	 But we better check the range of
+	 const_int value for cost, if it exists.  */
       plus0 = XEXP (address, 0);
       plus1 = XEXP (address, 1);
 
       if (REG_P (plus0) && CONST_INT_P (plus1))
-        {
+	{
 	  /* If it is possible to be lwi333/swi333 form,
 	     make it 2-byte cost.  */
-	  if (satisfies_constraint_Iu05 (plus1))
+	  if (satisfies_constraint_Iu03 (plus1))
 	    return (COSTS_N_INSNS (1) - 2);
 	  else
 	    return COSTS_N_INSNS (1);
 	}
+      if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return COSTS_N_INSNS (1) - 2;
+      else if (REG_P (plus0) && REG_P (plus1))
+	return COSTS_N_INSNS (1);
 
       /* For other 'plus' situation, make it cost 4-byte.  */
       return COSTS_N_INSNS (1);
@@ -246,4 +701,84 @@ size_cost:
   return COSTS_N_INSNS (4);
 }
 
+
+int nds32_address_cost_size_prefer (rtx address)
+{
+  rtx plus0, plus1;
+  enum rtx_code code;
+
+  code = GET_CODE (address);
+
+  switch (code)
+    {
+    case POST_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+      /* We encourage that rtx contains
+	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
+      return 0;
+
+    case SYMBOL_REF:
+      /* We can have gp-relative load/store for symbol_ref.
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case REG:
+      /* Simply return 4-byte costs.  */
+      return COSTS_N_INSNS (1) - 1;
+
+    case PLUS:
+      /* We do not need to check if the address is a legitimate address,
+	 because this hook is never called with an invalid address.
+	 But we better check the range of
+	 const_int value for cost, if it exists.  */
+      plus0 = XEXP (address, 0);
+      plus1 = XEXP (address, 1);
+
+      if (REG_P (plus0) && CONST_INT_P (plus1))
+	{
+	  /* If it is possible to be lwi333/swi333 form,
+	     make it 2-byte cost.  */
+	  if (satisfies_constraint_Iu03 (plus1))
+	    return (COSTS_N_INSNS (1) - 2);
+	  else
+	    return COSTS_N_INSNS (1) - 1;
+	}
+
+      /* (plus (reg) (mult (reg) (const))) */
+      if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return (COSTS_N_INSNS (1) - 1);
+
+      /* For other 'plus' situation, make it cost 4-byte.  */
+      return COSTS_N_INSNS (1);
+
+    default:
+      break;
+    }
+
+  return COSTS_N_INSNS (4);
+
+}
+
+int nds32_address_cost_impl (rtx address,
+			     enum machine_mode mode ATTRIBUTE_UNUSED,
+			     addr_space_t as ATTRIBUTE_UNUSED,
+			     bool speed_p)
+{
+  if (speed_p)
+    {
+      if (current_pass->tv_id == TV_FWPROP)
+	return nds32_address_cost_speed_fwprop (address);
+      else
+	return nds32_address_cost_speed_prefer (address);
+    }
+  else
+    return nds32_address_cost_size_prefer (address);
+}
+
 /* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-cprop-acc.c b/gcc/config/nds32/nds32-cprop-acc.c
new file mode 100644
index 0000000..0852095
--- /dev/null
+++ b/gcc/config/nds32/nds32-cprop-acc.c
@@ -0,0 +1,845 @@
+/* Copy propagation on hard registers for accumulate style instruction.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "insn-config.h"
+#include "regs.h"
+#include "addresses.h"
+#include "predict.h"
+#include "basic-block.h"
+#include "reload.h"
+#include "hash-set.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "function.h"
+#include "recog.h"
+#include "cfgrtl.h"
+#include "flags.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "tree-pass.h"
+#include "bitmap.h"
+#include "df.h"
+#include "output.h"
+#include "emit-rtl.h"
+#include 
+
+/* For each move instruction, we have a two-dimensional vector that record
+   what insns need to replace the operands when the move instruction is
+   propagated.  */
+
+typedef std::vector insn_list;
+
+/* Function called by note_uses to replace used subexpressions.  */
+
+struct replace_src_operands_data
+{
+  rtx dst_reg;
+  rtx src_reg;
+  unsigned int old_regno;
+  unsigned int new_regno;
+  rtx_insn *insn;
+};
+
+/* Return true if a mode change from ORIG to NEW is allowed for REGNO.
+   Adapted from mode_change_ok in regcprop.  */
+
+static bool
+nds32_mode_change_ok (enum machine_mode orig_mode, enum machine_mode new_mode,
+		      unsigned int regno ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_SIZE (orig_mode) < GET_MODE_SIZE (new_mode))
+    return false;
+
+#ifdef CANNOT_CHANGE_MODE_CLASS
+  return !REG_CANNOT_CHANGE_MODE_P (regno, orig_mode, new_mode);
+#endif
+
+  return true;
+}
+
+/* Register REGNO was originally set in ORIG_MODE.  It - or a copy of it -
+   was copied in COPY_MODE to COPY_REGNO, and then COPY_REGNO was accessed
+   in NEW_MODE.
+   Return a NEW_MODE rtx for REGNO if that's OK, otherwise return NULL_RTX.
+   Adapted from maybe_mode_change in regcprop.  */
+
+static rtx
+nds32_mode_change_reg (enum machine_mode orig_mode, enum machine_mode copy_mode,
+		       enum machine_mode new_mode, unsigned int regno,
+		       unsigned int copy_regno ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_SIZE (copy_mode) < GET_MODE_SIZE (orig_mode)
+      && GET_MODE_SIZE (copy_mode) < GET_MODE_SIZE (new_mode))
+    return NULL_RTX;
+
+  if (orig_mode == new_mode)
+    return gen_raw_REG (new_mode, regno);
+  else if (nds32_mode_change_ok (orig_mode, new_mode, regno))
+    {
+      int copy_nregs = hard_regno_nregs[copy_regno][copy_mode];
+      int use_nregs = hard_regno_nregs[copy_regno][new_mode];
+      int copy_offset
+	= GET_MODE_SIZE (copy_mode) / copy_nregs * (copy_nregs - use_nregs);
+      int offset
+	= GET_MODE_SIZE (orig_mode) - GET_MODE_SIZE (new_mode) - copy_offset;
+      int byteoffset = offset % UNITS_PER_WORD;
+      int wordoffset = offset - byteoffset;
+
+      offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0)
+		+ (BYTES_BIG_ENDIAN ? byteoffset : 0));
+      regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
+      if (HARD_REGNO_MODE_OK (regno, new_mode))
+	return gen_raw_REG (new_mode, regno);
+    }
+  return NULL_RTX;
+}
+
+/* Return true if INSN is a register-based move instruction, false
+   otherwise.  */
+
+static bool
+nds32_is_reg_mov_p (rtx_insn *insn)
+{
+  rtx pat = PATTERN (insn);
+
+  if (GET_CODE (pat) != SET)
+    return false;
+
+  rtx src_reg = SET_SRC (pat);
+  rtx dst_reg = SET_DEST (pat);
+
+  if (REG_P (dst_reg) && REG_P (src_reg) && can_copy_p (GET_MODE (dst_reg)))
+    return true;
+  else
+    return false;
+}
+
+
+/* Return accumulated register if INSN is an accumulate style instruction,
+   otherwise return NULL_RTX.  */
+
+static rtx
+nds32_is_acc_insn_p (rtx_insn *insn)
+{
+  int i;
+  const operand_alternative *op_alt;
+  rtx pat;
+
+  if (get_attr_length (insn) != 4)
+    return NULL_RTX;
+
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) != SET)
+    return NULL_RTX;
+
+  /* Try to get the insn data from recog_data.  */
+  recog_memoized (insn);
+  extract_constrain_insn (insn);
+  /* Transform the constraint strings into a more usable form,
+     recog_op_alt.  */
+  preprocess_constraints (insn);
+  op_alt = which_op_alt ();
+
+  /* Check all operands whether the output operand is identical to
+     another input operand  */
+  for (i = 0; i < recog_data.n_operands; ++i)
+    {
+      int matches = op_alt[i].matches;
+      int matched = op_alt[i].matched;
+      if ((matches >= 0
+	   && (recog_data.operand_type[i] != OP_IN
+	       || recog_data.operand_type[matches] != OP_IN))
+	  || (matched >= 0
+	      && (recog_data.operand_type[i] != OP_IN
+		  || recog_data.operand_type[matched] != OP_IN)))
+	return recog_data.operand[i];
+    }
+
+  return NULL_RTX;
+}
+
+/* Finds the reference corresponding to the definition of register whose
+   register number is REGNO in INSN. DF is the dataflow object.
+   Adapted from df_find_def in df-core.  */
+
+static df_ref
+nds32_df_find_regno_def (rtx_insn *insn, unsigned int regno)
+{
+  df_ref def;
+
+  FOR_EACH_INSN_DEF (def, insn)
+    if (DF_REF_REGNO (def) == regno)
+      return def;
+
+  return NULL;
+ }
+
+/* Return true if the REG in INSN is only defined by one insn whose uid
+   is DEF_UID, otherwise return false.  */
+
+static bool
+nds32_is_single_def_p (rtx_insn *insn, rtx reg, unsigned int def_uid)
+{
+  df_ref use;
+
+  FOR_EACH_INSN_USE (use, insn)
+    {
+      df_link *link;
+      unsigned int uid;
+
+      if (DF_REF_REGNO (use) >= REGNO (reg)
+	  && DF_REF_REGNO (use) < END_REGNO (reg))
+	{
+	  link = DF_REF_CHAIN (use);
+	  if (link->next
+	      || DF_REF_IS_ARTIFICIAL (link->ref))
+	    return false;
+
+	  uid = DF_REF_INSN_UID (link->ref);
+	  if (uid != def_uid)
+	    return false;
+	}
+    }
+
+  return true;
+}
+
+/* Return true if there is no definition of REG on any path from the insn
+   whose uid is FROM_UID (called FROM) to insn TO, otherwise return false.
+   This function collects the reaching definitions bitmap at insn TO, and
+   check if all uses of REG in insn FROM can reach insn TO.  */
+
+static bool
+nds32_no_define_reg_p (rtx to, rtx reg, unsigned int from_uid)
+{
+  basic_block bb = BLOCK_FOR_INSN (to);
+  struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (bb);
+  bitmap_head rd_local;
+  bool result = true;
+  rtx_insn *insn;
+  df_ref use;
+  df_insn_info *insn_info;
+
+  bitmap_initialize (&rd_local, &bitmap_default_obstack);
+  bitmap_copy (&rd_local, &bb_info->in);
+  df_rd_simulate_artificial_defs_at_top (bb, &rd_local);
+
+  for (insn = BB_HEAD (bb); insn != to; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      df_rd_simulate_one_insn (bb, insn, &rd_local);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "scan reach define:");
+      print_rtl_single (dump_file, to);
+
+      fprintf (dump_file, "bb rd in:\n");
+      dump_bitmap (dump_file, &bb_info->in);
+
+      fprintf (dump_file, "reach def:\n");
+      dump_bitmap (dump_file, &rd_local);
+    }
+
+  insn_info = DF_INSN_UID_GET (from_uid);
+  FOR_EACH_INSN_INFO_USE (use, insn_info)
+    {
+      df_link *link;
+
+      if (DF_REF_REGNO (use) >= REGNO (reg)
+	  && DF_REF_REGNO (use) < END_REGNO (reg))
+	for (link = DF_REF_CHAIN (use); link; link = link->next)
+	  {
+	    if (dump_file)
+	      {
+		fprintf (dump_file, "use ID %d\n", DF_REF_ID (link->ref));
+		if (DF_REF_IS_ARTIFICIAL (link->ref))
+		  fprintf (dump_file, "use ref is artificial\n");
+		else
+		  {
+		    fprintf (dump_file, "use from insn:");
+		    print_rtl_single (dump_file, DF_REF_INSN (link->ref));
+		  }
+	      }
+	    result &=
+	      (bitmap_bit_p (&rd_local, DF_REF_ID (link->ref)))
+	      ? true
+	      : false;
+	  }
+    }
+
+  bitmap_clear (&rd_local);
+  return result;
+}
+
+/* Return true if the value held by REG is no longer needed before INSN
+   (i.e. REG is dead before INSN), otherwise return false.  */
+
+static bool
+nds32_is_dead_reg_p (rtx_insn *insn, rtx reg)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  bitmap live = BITMAP_ALLOC (®_obstack);
+  bool result = true;
+  rtx_insn *i;
+  unsigned int rn;
+
+  bitmap_copy (live, DF_LR_IN (bb));
+  df_simulate_initialize_forwards (bb, live);
+
+  for (i = BB_HEAD (bb); i != insn; i = NEXT_INSN (i))
+    df_simulate_one_insn_forwards (bb, i, live);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "scan live regs:");
+      print_rtl_single (dump_file, insn);
+
+      fprintf (dump_file, "bb lr in:\n");
+      dump_bitmap (dump_file, DF_LR_IN (bb));
+
+      fprintf (dump_file, "live:\n");
+      dump_bitmap (dump_file, live);
+    }
+
+  for (rn = REGNO (reg); rn < END_REGNO (reg); ++rn)
+    result &= (bitmap_bit_p (live, rn)) ? false : true;
+
+  BITMAP_FREE (live);
+  return result;
+}
+
+/* Return true if START can do propagation. Notice START maybe a move
+   instruction or an accumulate style instruction.
+   MOV_UID is the uid of beginning move instruction that is only used by
+   function nds32_no_define_reg_p.
+   DST_REG & SRC_REG is the SET_DEST and SET_SRC of a move instruction that
+   maybe real or unreal, respectively.
+   INDEX indicates what number sequence is currently considered rank as
+   consecutive hard registers. Simultaneously, INDEX is the index of row in
+   INSN_LISTS.   */
+
+static bool
+nds32_can_cprop_acc_1 (rtx_insn *start, unsigned int mov_uid,
+		       rtx dst_reg, rtx src_reg,
+		       unsigned int index,
+		       std::vector &insn_lists)
+{
+  unsigned int lead_regno = REGNO (dst_reg) + index;
+  unsigned int new_regno = REGNO (src_reg) + index;
+  df_ref def_rec;
+  df_link *link;
+
+  def_rec = nds32_df_find_regno_def (start, lead_regno);
+  gcc_assert (def_rec);
+
+  for (link = DF_REF_CHAIN (def_rec); link; link = link->next)
+    {
+      rtx *use_loc;
+      unsigned int use_regno;
+      enum machine_mode use_mode;
+      rtx_insn *use_insn;
+      rtx acc_reg, new_src;
+
+      if (DF_REF_IS_ARTIFICIAL (link->ref))
+	return false;
+
+      use_loc = DF_REF_LOC (link->ref);
+      gcc_assert (use_loc && REG_P (*use_loc));
+
+      use_regno = REGNO (*use_loc);
+      /* Do not propagate when any insns use register that regno is
+	 smaller than DST_REG.  */
+      if (use_regno < REGNO (dst_reg))
+	return false;
+
+      /* This status should be handled by previous call.  */
+      if (use_regno < lead_regno)
+	continue;
+
+      /* Do not propagate because not all of the pieces of the copy came
+	 from DST_REG.  */
+      if (END_REGNO (*use_loc) > END_REGNO (dst_reg))
+	return false;
+
+      use_insn = DF_REF_INSN (link->ref);
+      /* Do not propagate since call-used registers can't be replaced.  */
+      if (CALL_P (use_insn))
+	return false;
+
+      /* Do not replace in asms intentionally referencing hard registers.  */
+      if (asm_noperands (PATTERN (use_insn)) >= 0
+	  && use_regno == ORIGINAL_REGNO (*use_loc))
+	return false;
+
+      /* Do not propagate when the register is defined by more than one
+	 instruction.  */
+      if (!nds32_is_single_def_p (use_insn, *use_loc, INSN_UID (start)))
+	return false;
+
+      use_mode = GET_MODE (*use_loc);
+      new_src = nds32_mode_change_reg (GET_MODE (src_reg),
+				       GET_MODE (dst_reg),
+				       use_mode,
+				       new_regno,
+				       use_regno);
+      /* Do not propagate if we can't generate a new register with new mode.  */
+      if (!new_src)
+	return false;
+
+      /* Can not replace DST_REG with SRC_REG when SRC_REG is redefined between
+	 START and use insn of START.  */
+      if (!nds32_no_define_reg_p (use_insn, new_src, mov_uid))
+	return false;
+
+      acc_reg = nds32_is_acc_insn_p (use_insn);
+      /* Handle the accumulate style instruction that accumulate register
+	 may be replaced.
+         Also handle the AUTO_INC register that is another form of accumulated
+	 register.  */
+      if ((acc_reg && rtx_equal_p (acc_reg, *use_loc))
+	  || FIND_REG_INC_NOTE (use_insn, *use_loc))
+	{
+	  unsigned int i, use_nregs;
+
+	  /* ACC_REG can't be replaced since the SRC_REG can't be
+	     overwritten.  */
+	  if (!nds32_is_dead_reg_p (use_insn, new_src))
+	    return false;
+
+	  /* Once we confirm that ACC_REG can be replaced, the unreal move
+	     instruction is generated. For example:
+	     mov   r0, r1	   mov   r0, r1
+	     cmovn r0, r2, r3  ->  cmovn r1, r2, r3
+				   mov   r0, r1
+	     If the unreal move instruction can do propagation, the ACC_REG
+	     can be replaced. We check it in a recursive way.  */
+	  use_nregs = hard_regno_nregs [use_regno][(int) use_mode];
+	  for (i = 0; i < use_nregs; ++i)
+	    if (!nds32_can_cprop_acc_1 (use_insn, mov_uid,
+					*use_loc, new_src,
+					i, insn_lists))
+	      return false;
+	}
+      insn_lists[index].push_back (use_insn);
+    }
+
+  return true;
+}
+
+/* Return true if MOV can do propagation, otherwise return false.
+   INSN_LISTS is used to record what insns need to replace the operands.  */
+
+static bool
+nds32_can_cprop_acc (rtx_insn *mov, std::vector &insn_lists)
+{
+  rtx dst_reg = SET_DEST (PATTERN (mov));
+  rtx src_reg = SET_SRC (PATTERN (mov));
+  unsigned int dst_regno = REGNO (dst_reg);
+  enum machine_mode dst_mode = GET_MODE (dst_reg);
+  unsigned int dst_nregs = hard_regno_nregs[dst_regno][(int) dst_mode];
+  unsigned int index;
+
+  insn_lists.resize (dst_nregs);
+  for (index = 0; index < dst_nregs; ++index)
+    if (!nds32_can_cprop_acc_1 (mov, INSN_UID (mov),
+				dst_reg, src_reg,
+				index, insn_lists))
+      return false;
+
+  return true;
+}
+
+/* Replace every occurrence of OLD_REGNO in LOC with NEW_REGNO. LOC maybe a
+   part of INSN.
+   DST_REG & SRC_REG are used by function nds32_mode_change_reg.
+   Mark each change with validate_change passing INSN.  */
+
+static void
+nds32_replace_partial_operands (rtx *loc, rtx dst_reg, rtx src_reg,
+				unsigned int old_regno, unsigned int new_regno,
+				rtx_insn *insn)
+{
+  int i, j;
+  rtx x = *loc;
+  enum rtx_code code;
+  const char *fmt;
+
+  if (!x)
+    return;
+
+  code = GET_CODE (x);
+  fmt = GET_RTX_FORMAT (code);
+
+  if (REG_P (x) && REGNO (x) == old_regno)
+    {
+      rtx new_reg = nds32_mode_change_reg (GET_MODE (src_reg),
+					   GET_MODE (dst_reg),
+					   GET_MODE (x),
+					   new_regno,
+					   old_regno);
+
+      gcc_assert (new_reg);
+
+      ORIGINAL_REGNO (new_reg) = ORIGINAL_REGNO (x);
+      REG_ATTRS (new_reg) = REG_ATTRS (x);
+      REG_POINTER (new_reg) = REG_POINTER (x);
+
+      /* ??? unshare or not?  */
+      validate_change (insn, loc, new_reg, 1);
+      return;
+    }
+
+  /* Call ourself recursively to perform the replacements.  */
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	nds32_replace_partial_operands (&XEXP (x, i), dst_reg, src_reg,
+					old_regno, new_regno, insn);
+      else if (fmt[i] == 'E') /* ??? how about V?  */
+	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  nds32_replace_partial_operands (&XVECEXP (x, i, j), dst_reg, src_reg,
+					  old_regno, new_regno, insn);
+    }
+}
+
+/* Try replacing every occurrence of OLD_REGNO in INSN with NEW_REGNO.  */
+
+static void
+nds32_replace_all_operands (rtx dst_reg, rtx src_reg,
+			    unsigned int old_regno, unsigned int new_regno,
+			    rtx_insn *insn)
+{
+  nds32_replace_partial_operands (&PATTERN (insn), dst_reg, src_reg,
+				  old_regno, new_regno, insn);
+}
+
+/* Called via note_uses in function nds32_replace_src_operands, for all used
+   rtx do replacement.  */
+
+static void
+nds32_replace_src_operands_1 (rtx *loc, void *data)
+{
+  struct replace_src_operands_data *d
+    = (struct replace_src_operands_data *) data;
+
+  nds32_replace_partial_operands (loc, d->dst_reg, d->src_reg,
+				  d->old_regno, d->new_regno, d->insn);
+}
+
+/* Try replacing every occurrence of OLD_REGNO in INSN with NEW_REGNO,
+   avoiding SET_DESTs.  */
+
+static void
+nds32_replace_src_operands (rtx dst_reg, rtx src_reg,
+			    unsigned int old_regno, unsigned int new_regno,
+			    rtx_insn *insn)
+{
+  struct replace_src_operands_data d
+    = {dst_reg, src_reg, old_regno, new_regno, insn};
+
+  note_uses (&PATTERN (insn), nds32_replace_src_operands_1, &d);
+}
+
+/* Try replacing every occurrence of SRC_REG (include its consecutive hard
+   registers) in each insn of INSN_LISTS with DST_REG.  */
+
+static bool
+nds32_try_replace_operands (rtx dst_reg, rtx src_reg,
+			    std::vector &insn_lists)
+{
+  unsigned int i;
+  std::vector::iterator ritr;
+  unsigned int old_regno, new_regno;
+
+  old_regno = REGNO (dst_reg);
+  new_regno = REGNO (src_reg);
+
+  for (i = 0; i < insn_lists.size (); ++i, ++old_regno, ++new_regno)
+    for (ritr = insn_lists[i].begin (); ritr != insn_lists[i].end (); ++ritr)
+      {
+	rtx_insn *insn = *ritr;
+	rtx acc_reg;
+
+	acc_reg = nds32_is_acc_insn_p (insn);
+	if (acc_reg && REGNO (acc_reg) == old_regno)
+	  {
+	    /* Replace OP_OUT & OP_INOUT  */
+	    nds32_replace_all_operands (dst_reg, src_reg,
+					old_regno, new_regno, insn);
+
+	  }
+	else
+	  {
+	    /* Replace OP_IN  */
+	    nds32_replace_src_operands (dst_reg, src_reg,
+					old_regno, new_regno, insn);
+	  }
+      }
+
+  if (!apply_change_group ())
+    return false;
+  else
+    {
+      df_analyze ();
+      return true;
+    }
+}
+
+/* Check if each move instruction in WORK_LIST can do propagation, and
+   then try to replace operands if necessary. */
+
+static int
+nds32_do_cprop_acc (auto_vec &work_list)
+{
+  int n_replace = 0;
+  int i;
+  rtx_insn *mov;
+  std::vector insn_lists;
+
+  FOR_EACH_VEC_ELT (work_list, i, mov)
+    {
+      if (nds32_can_cprop_acc (mov, insn_lists))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "\n [CPROP_ACC] insn %d will be cprop. \n",
+		     INSN_UID (mov));
+
+	  if (nds32_try_replace_operands (SET_DEST (PATTERN (mov)),
+					  SET_SRC (PATTERN (mov)),
+					  insn_lists))
+	    n_replace++;
+	}
+      insn_lists.clear ();
+    }
+
+  return n_replace;
+}
+
+/* Return true if MOV meets the conditions of propagation about move
+   instruction, otherwise return false.  */
+
+static bool
+nds32_is_target_mov_p (rtx mov)
+{
+  rtx dst = SET_DEST (PATTERN (mov));
+  rtx src = SET_SRC (PATTERN (mov));
+  unsigned int dst_regno, src_regno;
+  unsigned int dst_nregs, src_nregs;
+  bool dst_is_general, src_is_general;
+
+  gcc_assert (REG_P (dst) && REG_P (src));
+
+  dst_regno = REGNO (dst);
+  src_regno = REGNO (src);
+  dst_nregs = hard_regno_nregs[dst_regno][GET_MODE (dst)];
+  src_nregs = hard_regno_nregs[src_regno][GET_MODE (src)];
+
+  /* Do not propagate to the stack pointer, as that can leave memory accesses
+     with no scheduling dependency on the stack update.
+     Adapted from regcprop.  */
+  if (dst_regno == STACK_POINTER_REGNUM)
+    return false;
+
+  /* Likewise with the frame pointer, if we're using one.
+     Adapted from regcprop.  */
+  if (frame_pointer_needed && dst_regno == HARD_FRAME_POINTER_REGNUM)
+    return false;
+
+  /* Do not propagate to fixed or global registers, patterns can be relying
+     to see particular fixed register or users can expect the chosen global
+     register in asm.
+     Adapted from regcprop.  */
+  if (fixed_regs[dst_regno] || global_regs[dst_regno])
+    return false;
+
+  /* Make sure the all consecutive registers of SET_DEST are only defined by
+     SET_SRC.  */
+  if (dst_nregs > src_nregs)
+    return false;
+
+  /* Narrowing on big endian will result in the invalid transformation.  */
+  if (dst_nregs < src_nregs
+      && (GET_MODE_SIZE (GET_MODE (src)) > UNITS_PER_WORD
+	  ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN))
+    return false;
+
+  dst_is_general = in_hard_reg_set_p (reg_class_contents[GENERAL_REGS],
+				      GET_MODE (dst), REGNO (dst));
+  src_is_general = in_hard_reg_set_p (reg_class_contents[GENERAL_REGS],
+				      GET_MODE (src), REGNO (src));
+  /* Make sure the register class of SET_DEST & SET_SRC are the same.  */
+  if (dst_is_general ^ src_is_general)
+    return false;
+
+  return true;
+}
+
+/* Collect the move instructions that are the uses of accumulated register
+   in WORK_LIST */
+
+static void
+nds32_cprop_acc_find_target_mov (auto_vec &work_list)
+{
+  basic_block bb;
+  rtx_insn *insn;
+  rtx acc_reg;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    FOR_BB_INSNS (bb, insn)
+      if (INSN_P (insn))
+	{
+	  acc_reg = nds32_is_acc_insn_p (insn);
+	  if (acc_reg)
+	    {
+	      unsigned int acc_regno;
+	      enum machine_mode acc_mode;
+	      df_ref use;
+	      df_link *link;
+	      rtx_insn *def_insn;
+
+	      if (!single_set (insn) || !REG_P (acc_reg))
+		continue;
+
+	      acc_regno = REGNO (acc_reg);
+	      /* Don't replace in asms intentionally referencing hard regs.  */
+	      if (asm_noperands (PATTERN (insn)) >= 0
+		  && acc_regno == ORIGINAL_REGNO (acc_reg))
+		continue;
+
+	      if (dump_file)
+		fprintf (dump_file,
+			 "\n [CPROP_ACC] "
+			 "RTL_UID %d is an exchangeable ACC insn. \n",
+			 INSN_UID (insn));
+
+	      use = df_find_use (insn, acc_reg);
+	      gcc_assert (use);
+	      link = DF_REF_CHAIN (use);
+
+	      if (link->next
+		  || DF_REF_IS_ARTIFICIAL (link->ref))
+		continue;
+
+	      acc_mode = GET_MODE (acc_reg);
+	      def_insn = DF_REF_INSN (link->ref);
+	      if (nds32_is_reg_mov_p (def_insn))
+		{
+		  rtx *loc = DF_REF_LOC (link->ref);
+		  enum machine_mode loc_mode = GET_MODE (*loc);
+
+		  /* If the move instruction can't define whole accumulated
+		     register, the replacement is invalid.  */
+		  if (loc_mode != acc_mode)
+		    if (hard_regno_nregs[acc_regno][acc_mode]
+			> hard_regno_nregs[acc_regno][loc_mode])
+		      continue;
+
+		  if (nds32_is_target_mov_p (def_insn))
+		    work_list.safe_push (def_insn);
+		}
+	    }
+	}
+}
+
+/* Main entry point for the forward copy propagation optimization for
+   accumulate style instruction.  */
+
+static int
+nds32_cprop_acc_opt (void)
+{
+  df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
+  df_note_add_problem ();
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_insn_rescan_all ();
+  df_analyze ();
+
+  auto_vec work_list;
+
+  nds32_cprop_acc_find_target_mov (work_list);
+  if (work_list.is_empty())
+    {
+      if (dump_file)
+	fprintf (dump_file, "\n [CPROP_ACC] The work_list is empty. \n");
+      return 0;
+    }
+
+  if (dump_file)
+    {
+      int i;
+      rtx_insn *mov;
+
+      fprintf (dump_file, "\n [CPROP_ACC] The content of work_list:");
+      FOR_EACH_VEC_ELT (work_list, i, mov)
+	fprintf (dump_file, " %d", INSN_UID (mov));
+      fprintf (dump_file, "\n");
+    }
+
+  compute_bb_for_insn ();
+
+  int n_replace = nds32_do_cprop_acc (work_list);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\n [CPROP_ACC] Result: ");
+      if (n_replace == 0)
+	fprintf (dump_file, "No move can do cprop. \n");
+      else
+	fprintf (dump_file, "Do cprop for %d move. \n", n_replace);
+    }
+
+  work_list.release ();
+  return 1;
+}
+
+const pass_data pass_data_nds32_cprop_acc_opt =
+{
+  RTL_PASS,                                     /* type */
+  "cprop_acc",                                  /* name */
+  OPTGROUP_NONE,                                /* optinfo_flags */
+  TV_MACH_DEP,                                  /* tv_id */
+  0,                                            /* properties_required */
+  0,                                            /* properties_provided */
+  0,                                            /* properties_destroyed */
+  0,                                            /* todo_flags_start */
+  TODO_df_finish,				/* todo_flags_finish */
+};
+
+class pass_nds32_cprop_acc_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_cprop_acc_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_cprop_acc_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) { return optimize > 0 && flag_nds32_cprop_acc; }
+  unsigned int execute (function *) { return nds32_cprop_acc_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_cprop_acc_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_cprop_acc_opt (ctxt);
+}
diff --git a/gcc/config/nds32/nds32-doubleword.md b/gcc/config/nds32/nds32-doubleword.md
index 23a9f25..7c9dfb9 100644
--- a/gcc/config/nds32/nds32-doubleword.md
+++ b/gcc/config/nds32/nds32-doubleword.md
@@ -23,7 +23,8 @@
 ;; Move DImode/DFmode instructions.
 ;; -------------------------------------------------------------
 
-
+;; Do *NOT* try to split DI/DFmode before reload since LRA seem
+;; still buggy for such behavior at least at gcc 4.8.2...
 (define_expand "movdi"
   [(set (match_operand:DI 0 "general_operand" "")
 	(match_operand:DI 1 "general_operand" ""))]
@@ -46,149 +47,100 @@
 
 
 (define_insn "move_"
-  [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, m")
-	(match_operand:DIDF 1 "general_operand"      " r, i, m, r"))]
-  ""
+  [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r,  r, r, Da, m, f, Q, f, *r, *f")
+	(match_operand:DIDF 1 "general_operand"      " r, i, Da, m,  r, r, Q, f, f, *f, *r"))]
+  "register_operand(operands[0], mode)
+   || register_operand(operands[1], mode)"
 {
-  rtx addr;
-  rtx otherops[5];
-
   switch (which_alternative)
     {
     case 0:
       return "movd44\t%0, %1";
-
     case 1:
       /* reg <- const_int, we ask gcc to split instruction.  */
       return "#";
-
     case 2:
-      /* Refer to nds32_legitimate_address_p() in nds32.c,
-         we only allow "reg", "symbol_ref", "const", and "reg + const_int"
-         as address rtx for DImode/DFmode memory access.  */
-      addr = XEXP (operands[1], 0);
-
-      otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
-      otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
-      otherops[2] = addr;
-
-      if (REG_P (addr))
-	{
-	  /* (reg) <- (mem (reg)) */
-	  output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops);
-	}
-      else if (GET_CODE (addr) == PLUS)
-	{
-	  /* (reg) <- (mem (plus (reg) (const_int))) */
-	  rtx op0 = XEXP (addr, 0);
-	  rtx op1 = XEXP (addr, 1);
-
-	  if (REG_P (op0))
-	    {
-	      otherops[2] = op0;
-	      otherops[3] = op1;
-	      otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode);
-	    }
-	  else
-	    {
-	      otherops[2] = op1;
-	      otherops[3] = op0;
-	      otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode);
-	    }
-
-	  /* To avoid base overwrite when REGNO(%0) == REGNO(%2).  */
-	  if (REGNO (otherops[0]) != REGNO (otherops[2]))
-	    {
-	      output_asm_insn ("lwi\t%0, [%2 + (%3)]", otherops);
-	      output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops);
-	    }
-	  else
-	    {
-	      output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops);
-	      output_asm_insn ("lwi\t%0,[ %2 + (%3)]", otherops);
-	    }
-	}
-      else
-	{
-	  /* (reg) <- (mem (symbol_ref ...))
-	     (reg) <- (mem (const ...)) */
-	  output_asm_insn ("lwi.gp\t%0, [ + %2]", otherops);
-	  output_asm_insn ("lwi.gp\t%1, [ + %2 + 4]", otherops);
-	}
-
-      /* We have already used output_asm_insn() by ourself,
-         so return an empty string.  */
-      return "";
-
+      /* The memory format is (mem (reg)),
+	 we can generate 'lmw.bi' instruction.  */
+      return nds32_output_double (operands, true);
     case 3:
-      /* Refer to nds32_legitimate_address_p() in nds32.c,
-         we only allow "reg", "symbol_ref", "const", and "reg + const_int"
-         as address rtx for DImode/DFmode memory access.  */
-      addr = XEXP (operands[0], 0);
-
-      otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
-      otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
-      otherops[2] = addr;
-
-      if (REG_P (addr))
-	{
-	  /* (mem (reg)) <- (reg) */
-	  output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
-	}
-      else if (GET_CODE (addr) == PLUS)
-	{
-	  /* (mem (plus (reg) (const_int))) <- (reg) */
-	  rtx op0 = XEXP (addr, 0);
-	  rtx op1 = XEXP (addr, 1);
-
-	  if (REG_P (op0))
-	    {
-	      otherops[2] = op0;
-	      otherops[3] = op1;
-	      otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode);
-	    }
-	  else
-	    {
-	      otherops[2] = op1;
-	      otherops[3] = op0;
-	      otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode);
-	    }
-
-	  /* To avoid base overwrite when REGNO(%0) == REGNO(%2).  */
-	  if (REGNO (otherops[0]) != REGNO (otherops[2]))
-	    {
-	      output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops);
-	      output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops);
-	    }
-	  else
-	    {
-	      output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops);
-	      output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops);
-	    }
-	}
-      else
-	{
-	  /* (mem (symbol_ref ...)) <- (reg)
-	     (mem (const ...))      <- (reg) */
-	  output_asm_insn ("swi.gp\t%0, [ + %2]", otherops);
-	  output_asm_insn ("swi.gp\t%1, [ + %2 + 4]", otherops);
-	}
-
-      /* We have already used output_asm_insn() by ourself,
-         so return an empty string.  */
-      return "";
-
+      /* We haven't 64-bit load instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 4:
+      /* The memory format is (mem (reg)),
+	 we can generate 'smw.bi' instruction.  */
+      return nds32_output_double (operands, false);
+    case 5:
+      /* We haven't 64-bit store instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 6:
+      return nds32_output_float_load (operands);
+    case 7:
+      return nds32_output_float_store (operands);
+    case 8:
+      return "fcpysd\t%0, %1, %1";
+    case 9:
+      return "fmfdr\t%0, %1";
+    case 10:
+      return "fmtdr\t%1, %0";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type"   "move,move,move,move")
-   (set_attr "length" "   4,  16,   8,   8")])
+  [(set_attr "type"    "alu,alu,load,load,store,store,fload,fstore,fcpy,fmfdr,fmtdr")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!TARGET_16_BIT")
+		     (const_int 4)
+		     (const_int 2))
+       ;; Alternative 1
+       (const_int 16)
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 8)
+       ;; Alternative 4
+       (const_int 4)
+       ;; Alternative 5
+       (const_int 8)
+       ;; Alternative 6
+       (const_int 4)
+       ;; Alternative 7
+       (const_int 4)
+       ;; Alternative 8
+       (const_int 4)
+       ;; Alternative 9
+       (const_int 4)
+       ;; Alternative 10
+       (const_int 4)
+     ])
+   (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
+
+;; Split move_di pattern when the hard register is odd.
+(define_split
+  [(set (match_operand:DIDF 0 "register_operand" "")
+	(match_operand:DIDF 1 "register_operand" ""))]
+  "(NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
+    && ((REGNO (operands[0]) & 0x1) == 1))
+   || (NDS32_IS_GPR_REGNUM (REGNO (operands[1]))
+       && ((REGNO (operands[1]) & 0x1) == 1))"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  {
+     operands[2] = gen_lowpart (SImode, operands[0]);
+     operands[4] = gen_highpart (SImode, operands[0]);
+     operands[3] = gen_lowpart (SImode, operands[1]);
+     operands[5] = gen_highpart (SImode, operands[1]);
+  }
+)
 
 (define_split
   [(set (match_operand:DIDF 0 "register_operand"     "")
 	(match_operand:DIDF 1 "const_double_operand" ""))]
-  "reload_completed"
+  "flag_pic || reload_completed"
   [(set (match_dup 2) (match_dup 3))
    (set (match_dup 4) (match_dup 5))]
 {
@@ -207,7 +159,12 @@
   /* Actually we would like to create move behavior by ourself.
      So that movsi expander could have chance to split large constant.  */
   emit_move_insn (operands[2], operands[3]);
-  emit_move_insn (operands[4], operands[5]);
+
+  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
+  if ((UINTVAL (operands[3]) & mask) == (UINTVAL (operands[5]) & mask))
+    emit_move_insn (operands[4], operands[2]);
+  else
+    emit_move_insn (operands[4], operands[5]);
   DONE;
 })
 
@@ -217,7 +174,9 @@
   [(set (match_operand:DIDF 0 "register_operand" "")
 	(match_operand:DIDF 1 "register_operand" ""))]
   "reload_completed
-   && (TARGET_ISA_V2 || !TARGET_16_BIT)"
+   && (TARGET_ISA_V2 || !TARGET_16_BIT)
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
 {
@@ -239,6 +198,28 @@
     }
 })
 
+(define_split
+  [(set (match_operand:DIDF 0 "nds32_general_register_operand" "")
+	(match_operand:DIDF 1 "memory_operand" ""))]
+  "reload_completed
+   && nds32_split_double_word_load_store_p (operands, true)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  nds32_spilt_doubleword (operands, true);
+})
+
+(define_split
+  [(set (match_operand:DIDF 0  "memory_operand" "")
+	(match_operand:DIDF 1  "nds32_general_register_operand" ""))]
+  "reload_completed
+   && nds32_split_double_word_load_store_p (operands, false)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  nds32_spilt_doubleword (operands, false);
+})
+
 ;; -------------------------------------------------------------
 ;; Boolean DImode instructions.
 ;; -------------------------------------------------------------
diff --git a/gcc/config/nds32/nds32-dspext.md b/gcc/config/nds32/nds32-dspext.md
new file mode 100644
index 0000000..6ec2137
--- /dev/null
+++ b/gcc/config/nds32/nds32-dspext.md
@@ -0,0 +1,5280 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+(define_expand "mov"
+  [(set (match_operand:VQIHI 0 "general_operand" "")
+	(match_operand:VQIHI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* If operands[1] is a large constant and cannot be performed
+     by a single instruction, we need to split it.  */
+  if (GET_CODE (operands[1]) == CONST_VECTOR
+      && !satisfies_constraint_CVs2 (operands[1])
+      && !satisfies_constraint_CVhi (operands[1]))
+    {
+      HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]);
+      rtx tmp_rtx;
+
+      tmp_rtx = can_create_pseudo_p ()
+		? gen_reg_rtx (SImode)
+		: simplify_gen_subreg (SImode, operands[0], mode, 0);
+
+      emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode));
+      convert_move (operands[0], tmp_rtx, false);
+      DONE;
+    }
+
+  if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1]))
+    {
+      if (nds32_tls_referenced_p (operands [1]))
+	{
+	  nds32_expand_tls_move (operands);
+	  DONE;
+	}
+      else if (flag_pic)
+	{
+	  nds32_expand_pic_move (operands);
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*mov"
+  [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$  l,$  l,$  l,$  d,  d, r,$   d,    r,    r,    r, *f, *f,  r, *f,  Q, A")
+	(match_operand:VQIHI 1 "nds32_vmove_operand"  " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f,  r, *f,  Q, *f, r"))]
+  "NDS32_EXT_DSP_P ()
+   && (register_operand(operands[0], mode)
+       || register_operand(operands[1], mode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, );
+    case 6:
+      return nds32_output_32bit_store (operands, );
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+    case 11:
+      return nds32_output_16bit_load (operands, );
+    case 12:
+      return nds32_output_32bit_load (operands, );
+    case 13:
+      return "movpi45\t%0, %1";
+    case 14:
+      return "movi55\t%0, %1";
+    case 15:
+      return "movi\t%0, %1";
+    case 16:
+      return "sethi\t%0, hi20(%1)";
+    case 17:
+      if (TARGET_FPU_SINGLE)
+	return "fcpyss\t%0, %1, %1";
+      else
+	return "#";
+    case 18:
+      return "fmtsr\t%1, %0";
+    case 19:
+      return "fmfsr\t%0, %1";
+    case 20:
+      return nds32_output_float_load (operands);
+    case 21:
+      return nds32_output_float_store (operands);
+    case 22:
+      return "mtusr\t%1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore,alu")
+   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   2,   4,  2,  2,  4,  4,   4,    4,    4,    4,     4,  4")
+   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1, v3m,  v1, v1, v1, v1, v1, fpu,  fpu,  fpu,  fpu,   fpu, v1")])
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_operand" "")
+	(match_operand:V2SI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (V2SImode, operands[1]);
+})
+
+(define_insn "*movv2si"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r,  r, r, Da, m, f, Q, f, r, f")
+	(match_operand:V2SI 1 "general_operand"      " r, i, Da, m,  r, r, Q, f, f, f, r"))]
+  "NDS32_EXT_DSP_P ()
+   && (register_operand(operands[0], V2SImode)
+       || register_operand(operands[1], V2SImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "movd44\t%0, %1";
+    case 1:
+      /* reg <- const_int, we ask gcc to split instruction.  */
+      return "#";
+    case 2:
+      /* The memory format is (mem (reg)),
+	 we can generate 'lmw.bi' instruction.  */
+      return nds32_output_double (operands, true);
+    case 3:
+      /* We haven't 64-bit load instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 4:
+      /* The memory format is (mem (reg)),
+	 we can generate 'smw.bi' instruction.  */
+      return nds32_output_double (operands, false);
+    case 5:
+      /* We haven't 64-bit store instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 6:
+      return nds32_output_float_load (operands);
+    case 7:
+      return nds32_output_float_store (operands);
+    case 8:
+      return "fcpysd\t%0, %1, %1";
+    case 9:
+      return "fmfdr\t%0, %1";
+    case 10:
+      return "fmtdr\t%1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!TARGET_16_BIT")
+		     (const_int 4)
+		     (const_int 2))
+       ;; Alternative 1
+       (const_int 16)
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 8)
+       ;; Alternative 4
+       (const_int 4)
+       ;; Alternative 5
+       (const_int 8)
+       ;; Alternative 6
+       (const_int 4)
+       ;; Alternative 7
+       (const_int 4)
+       ;; Alternative 8
+       (const_int 4)
+       ;; Alternative 9
+       (const_int 4)
+       ;; Alternative 10
+       (const_int 4)
+     ])
+   (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
+
+(define_expand "movmisalign"
+  [(set (match_operand:VQIHI 0 "general_operand" "")
+	(match_operand:VQIHI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  rtx addr;
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (mode, operands[1]);
+
+  if (MEM_P (operands[0]))
+    {
+      addr = force_reg (Pmode, XEXP (operands[0], 0));
+      emit_insn (gen_unaligned_store (addr, operands[1]));
+    }
+  else
+    {
+      addr = force_reg (Pmode, XEXP (operands[1], 0));
+      emit_insn (gen_unaligned_load (operands[0], addr));
+    }
+  DONE;
+})
+
+(define_expand "unaligned_load"
+  [(set (match_operand:VQIHI 0 "register_operand" "=r")
+	(unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_load (operands, mode);
+  else
+    emit_insn (gen_unaligned_load_w (operands[0], gen_rtx_MEM (mode, operands[1])));
+  DONE;
+})
+
+(define_insn "unaligned_load_w"
+  [(set (match_operand:VQIHI 0 "register_operand"                          "=  r")
+	(unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  return nds32_output_lmw_single_word (operands);
+}
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "unaligned_store"
+  [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r"))
+	(unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_store (operands, mode);
+  else
+    emit_insn (gen_unaligned_store_w (gen_rtx_MEM (mode, operands[0]), operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_store_w"
+  [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand"      "=Umw")
+	(unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "   r")] UNSPEC_UASTORE_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  return nds32_output_smw_single_word (operands);
+}
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "add3"
+  [(set (match_operand:VQIHI 0 "register_operand"                 "=r")
+	(all_plus:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+			(match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "add %0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand"              "=r")
+	(all_plus:DI (match_operand:DI 1 "register_operand" " r")
+		     (match_operand:DI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "add64 %0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "raddv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
+	(truncate:V4QI
+	  (ashiftrt:V4HI
+	    (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+		       (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+
+(define_insn "uraddv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
+	(truncate:V4QI
+	  (lshiftrt:V4HI
+	    (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+		       (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "raddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
+	(truncate:V2HI
+	  (ashiftrt:V2SI
+	    (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+		       (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "uraddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
+	(truncate:V2HI
+	  (lshiftrt:V2SI
+	    (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+		       (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "radddi3"
+  [(set (match_operand:DI 0 "register_operand"            "=r")
+	(truncate:DI
+	  (ashiftrt:TI
+	    (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		     (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+
+(define_insn "uradddi3"
+  [(set (match_operand:DI 0 "register_operand"            "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		     (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "sub3"
+  [(set (match_operand:VQIHI 0 "register_operand"                  "=r")
+	(all_minus:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+			 (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "sub %0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand"               "=r")
+	(all_minus:DI (match_operand:DI 1 "register_operand" " r")
+		      (match_operand:DI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "sub64 %0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "rsubv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
+	(truncate:V4QI
+	  (ashiftrt:V4HI
+	    (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+			(sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "ursubv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
+	(truncate:V4QI
+	  (lshiftrt:V4HI
+	    (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+			(zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "rsubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
+	(truncate:V2HI
+	  (ashiftrt:V2SI
+	    (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+			(sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "ursubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
+	(truncate:V2HI
+	  (lshiftrt:V2SI
+	    (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+			(zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "rsubdi3"
+  [(set (match_operand:DI 0 "register_operand"                   "=r")
+	(truncate:DI
+	  (ashiftrt:TI
+	    (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		      (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")])
+
+
+(define_insn "ursubdi3"
+  [(set (match_operand:DI 0 "register_operand"                   "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		      (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")])
+
+(define_expand "cras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "cras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "cras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "cras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "cras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "kcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "kcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "kcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "kcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "kcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "ukcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "ukcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "ukcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "ukcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "ukcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "crsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "crsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "crsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "crsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "crsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "kcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "kcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "kcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "kcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "kcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "ukcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "ukcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "ukcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "ukcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "ukcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "rcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "rcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "rcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "urcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "urcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "urcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "urcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "urcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "rcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "rcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+	        (minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "rcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+	        (minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "urcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "urcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+	        (minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "urcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "urcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+	        (minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "urcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "")
+	(shifts:V2HI (match_operand:V2HI 1 "register_operand"     "")
+		     (match_operand:SI   2 "nds32_rimm4u_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_insn "*ashlv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r, r")
+	(ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   slli16\t%0, %1, %2
+   sll16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "kslli16"
+  [(set (match_operand:V2HI 0 "register_operand"                   "=   r, r")
+	(ss_ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+			(match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   kslli16\t%0, %1, %2
+   ksll16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "*ashrv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
+	(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai16\t%0, %1, %2
+   sra16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "sra16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
+	(unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+				     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
+		     UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai16.u\t%0, %1, %2
+   sra16.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround,daluround")
+   (set_attr "length" "         4,       4")])
+
+(define_insn "*lshrv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
+	(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srli16\t%0, %1, %2
+   srl16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "srl16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
+	(unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+				     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
+		     UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srli16.u\t%0, %1, %2
+   srl16.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround,daluround")
+   (set_attr "length" "        4,        4")])
+
+(define_insn "kslra16"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
+	(if_then_else:V2HI
+	  (lt:SI (match_operand:SI 2 "register_operand"           " r")
+		 (const_int 0))
+	  (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
+			 (neg:SI (match_dup 2)))
+	  (ashift:V2HI (match_dup 1)
+		       (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslra16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "kslra16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
+	(if_then_else:V2HI
+	  (lt:SI (match_operand:SI 2 "register_operand"           " r")
+		 (const_int 0))
+	  (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
+				       (neg:SI (match_dup 2)))]
+		       UNSPEC_ROUND)
+	  (ashift:V2HI (match_dup 1)
+		       (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslra16.u\t%0, %1, %2"
+  [(set_attr "type"    "daluround")
+   (set_attr "length"  "4")])
+
+(define_insn "cmpeq"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "cmpeq\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "scmplt"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "scmplt\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "scmple"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "scmple\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "ucmplt"
+  [(set (match_operand:SI 0 "register_operand"                        "=r")
+	(unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r")
+			    (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "ucmplt\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "ucmple"
+  [(set (match_operand:SI 0 "register_operand"                        "=r")
+	(unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r")
+			    (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "ucmple\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "sclip16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
+		      (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
+		     UNSPEC_CLIPS))]
+  "NDS32_EXT_DSP_P ()"
+  "sclip16\t%0, %1, %2"
+  [(set_attr "type"    "dclip")
+   (set_attr "length"  "4")])
+
+(define_insn "uclip16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
+		      (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
+		     UNSPEC_CLIP))]
+  "NDS32_EXT_DSP_P ()"
+  "uclip16\t%0, %1, %2"
+  [(set_attr "type"    "dclip")
+   (set_attr "length"  "4")])
+
+(define_insn "khm16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
+		      (match_operand:V2HI 2 "register_operand" "  r")]
+		     UNSPEC_KHM))]
+  "NDS32_EXT_DSP_P ()"
+  "khm16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"  "4")])
+
+(define_insn "khmx16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
+		      (match_operand:V2HI 2 "register_operand" "  r")]
+		     UNSPEC_KHMX))]
+  "NDS32_EXT_DSP_P ()"
+  "khmx16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_setv4qi"
+  [(match_operand:V4QI 0 "register_operand" "")
+   (match_operand:QI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+  if (pos > 4)
+    gcc_unreachable ();
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
+  emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1],
+				       operands[0], GEN_INT (elem)));
+  DONE;
+})
+
+(define_expand "insb"
+  [(match_operand:V4QI 0 "register_operand" "")
+   (match_operand:V4QI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0)
+    gcc_unreachable ();
+
+  rtx src = gen_reg_rtx (QImode);
+
+  convert_move (src, operands[2], false);
+
+  HOST_WIDE_INT selector_index;
+  /* Big endian need reverse index. */
+  if (TARGET_BIG_ENDIAN)
+    selector_index = 4 - INTVAL (operands[3]) - 1;
+  else
+    selector_index = INTVAL (operands[3]);
+  rtx selector = gen_int_mode (1 << selector_index, SImode);
+  emit_insn (gen_vec_setv4qi_internal (operands[0], src,
+				       operands[1], selector));
+  DONE;
+})
+
+(define_expand "insvsi"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			 (match_operand:SI 1 "const_int_operand" "")
+			 (match_operand:SI 2 "nds32_insv_operand" ""))
+	(match_operand:SI 3 "register_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[1]) != 8)
+    FAIL;
+}
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+
+(define_insn "insvsi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
+			 (const_int 8)
+			 (match_operand:SI 1 "nds32_insv_operand"  "i"))
+	(match_operand:SI 2                  "register_operand"    "r"))]
+  "NDS32_EXT_DSP_P ()"
+  "insb\t%0, %2, %v1"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "insvsiqi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
+			 (const_int 8)
+			 (match_operand:SI 1 "nds32_insv_operand"  "i"))
+	(zero_extend:SI (match_operand:QI 2 "register_operand"    "r")))]
+  "NDS32_EXT_DSP_P ()"
+  "insb\t%0, %2, %v1"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+;; Intermedium pattern for synthetize insvsiqi_internal
+;; v0 = ((v1 & 0xff) << 8)
+(define_insn_and_split "and0xff_s8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (const_int 8))
+		(const_int 65280)))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode)));
+  emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode)));
+  DONE;
+})
+
+;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000)
+(define_insn_and_split "insbsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+			(const_int -16711681))
+		(and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+				   (const_int 16))
+			(const_int 16711680))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_move_insn (tmp, operands[1]);
+  emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2]));
+  emit_move_insn (operands[0], tmp);
+  DONE;
+})
+
+;; v0 = (v1 & 0xff00ffff) | v2
+(define_insn_and_split "ior_and0xff00ffff_reg"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -16711681))
+		(match_operand:SI 2 "register_operand" "r")))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode)));
+  emit_insn (gen_iorsi3 (operands[0], tmp, operands[2]));
+  DONE;
+})
+
+(define_insn "vec_setv4qi_internal"
+  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (match_operand:QI 1 "register_operand"        "    r,    r,    r,    r"))
+	  (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
+	  (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+       const char *pats[] = { "insb\t%0, %1, 3",
+			      "insb\t%0, %1, 2",
+			      "insb\t%0, %1, 1",
+			      "insb\t%0, %1, 0" };
+      return pats[which_alternative];
+    }
+  else
+    {
+       const char *pats[] = { "insb\t%0, %1, 0",
+			      "insb\t%0, %1, 1",
+			      "insb\t%0, %1, 2",
+			      "insb\t%0, %1, 3" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_setv4qi_internal_vec"
+  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand"    "    r,    r,    r,    r")
+	      (parallel [(const_int 0)])))
+	  (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
+	  (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   insb\t%0, %1, 0
+   insb\t%0, %1, 1
+   insb\t%0, %1, 2
+   insb\t%0, %1, 3"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergev4qi_and_cv0_1"
+  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergev4qi_and_cv0_2"
+  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
+	(vec_merge:V4QI
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergeqi_and_cv0_1"
+  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergeqi_and_cv0_2"
+  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V4QI
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_expand "vec_setv2hi"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+  if (pos > 2)
+    gcc_unreachable ();
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
+  emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1],
+				       operands[0], GEN_INT (elem)));
+  DONE;
+})
+
+(define_insn "vec_setv2hi_internal"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"    "    r,    r"))
+	  (match_operand:V2HI 2 "register_operand"    "    r,    r")
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pkbb16\t%0, %1, %2",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pktb16\t%0, %2, %1",
+			     "pkbb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergev2hi_and_cv0_1"
+  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergev2hi_and_cv0_2"
+  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
+	(vec_merge:V2HI
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergehi_and_cv0_1"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergehi_and_cv0_2"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V2HI
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_expand "pkbb"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (1), GEN_INT (1)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (0), GEN_INT (0)));
+    }
+  DONE;
+})
+
+(define_insn "pkbbsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 65535))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI	(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))
+		(and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 65535))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extend:SI	(match_operand:HI 1 "register_operand" "r"))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI	(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))
+		(zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff)
+(define_insn "pktbsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -65536))
+		(zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -65536))
+		(and:SI (match_operand:SI 2 "register_operand" "r")
+			(const_int 65535))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_3"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 16 )
+			 (const_int 0))
+	(match_operand:SI 1 "register_operand"                  " r"))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_4"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 16 )
+			 (const_int 0))
+	(zero_extend:SI (match_operand:HI 1 "register_operand"  " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkttsi"
+  [(set (match_operand:SI 0 "register_operand"                      "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand"      " r")
+			(const_int -65536))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" " r")
+			     (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktt16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "pkbt"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (1), GEN_INT (0)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (0), GEN_INT (1)));
+    }
+  DONE;
+})
+
+(define_expand "pktt"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (0), GEN_INT (0)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (1), GEN_INT (1)));
+    }
+  DONE;
+})
+
+(define_expand "pktb"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (0), GEN_INT (1)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (1), GEN_INT (0)));
+    }
+  DONE;
+})
+
+(define_insn "vec_mergerr"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"    "    r,    r"))
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 2 "register_operand"    "    r,    r"))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pkbb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+
+(define_insn "vec_merge"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (match_operand:V2HI 1 "register_operand"    "    r,    r")
+	  (match_operand:V2HI 2 "register_operand"    "    r,    r")
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pktb16\t%0, %1, %2",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pktb16\t%0, %2, %1",
+			     "pktb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergerv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"                   "    r,    r,    r,    r"))
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pktb16\t%0, %2, %1
+   pkbb16\t%0, %1, %2
+   pkbt16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergevr"
+  [(set (match_operand:V2HI 0 "register_operand"                      "=   r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 2 "register_operand"                    "    r,    r,    r,    r"))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                 " Iv01, Iv01, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pkbt16\t%0, %2, %1
+   pkbb16\t%0, %1, %2
+   pktb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergevv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r,    r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")])))
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")])))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pktt16\t%0, %1, %2",
+			     "pktb16\t%0, %1, %2",
+			     "pkbb16\t%0, %1, %2",
+			     "pkbt16\t%0, %1, %2",
+			     "pktt16\t%0, %2, %1",
+			     "pkbt16\t%0, %2, %1",
+			     "pkbb16\t%0, %2, %1",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pkbb16\t%0, %2, %1",
+			     "pktb16\t%0, %2, %1",
+			     "pktt16\t%0, %2, %1",
+			     "pkbt16\t%0, %2, %1",
+			     "pkbb16\t%0, %1, %2",
+			     "pkbt16\t%0, %1, %2",
+			     "pktt16\t%0, %1, %2",
+			     "pktb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_extractv4qi"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(vec_select:QI
+	  (match_operand:V4QI 1          "nonimmediate_operand" "")
+	  (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  if (INTVAL (operands[2]) != 0
+      && INTVAL (operands[2]) != 1
+      && INTVAL (operands[2]) != 2
+      && INTVAL (operands[2]) != 3)
+    gcc_unreachable ();
+
+  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
+    FAIL;
+})
+
+(define_insn "vec_extractv4qi0"
+  [(set (match_operand:QI 0 "register_operand"         "=l,r,r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeb33\t%0, %1";
+    case 1:
+      return "zeb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi0_ze"
+  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
+	(zero_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeb33\t%0, %1";
+    case 1:
+      return "zeb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi0_se"
+  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
+	(sign_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seb33\t%0, %1";
+    case 1:
+      return "seb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_se (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi1"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_1 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 2)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_2 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 3)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_3 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi3_se"
+  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
+	(sign_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 24
+   srai\t%0, %1, 24"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv4qi3_ze"
+  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
+	(zero_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srli45\t%0, 24
+   srli\t%0, %1, 24"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn_and_split "vec_extractv4qihi0"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi0 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi1 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 2)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi2 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi3 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_extractv2hi"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(vec_select:HI
+	  (match_operand:V2HI 1          "nonimmediate_operand" "")
+	  (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[2]) != 0
+      && INTVAL (operands[2]) != 1)
+    gcc_unreachable ();
+
+  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
+    FAIL;
+})
+
+(define_insn "vec_extractv2hi0"
+  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_se (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load")
+   (set_attr "length"  "  2,  4,   4")])
+
+(define_insn "vec_extractv2hi0_ze"
+  [(set (match_operand:SI 0 "register_operand"         "=$l, r,$  l, *r")
+        (zero_extend:SI
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "nonimmediate_operand" "  l, r, U33,  m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeh33\t%0, %1";
+    case 1:
+      return "zeh\t%0, %1";
+    case 2:
+      return nds32_output_16bit_load (operands, 2);
+    case 3:
+      return nds32_output_32bit_load (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load,load")
+   (set_attr "length" "  2,  4,   2,   4")])
+
+(define_insn "vec_extractv2hi0_se"
+  [(set (match_operand:SI 0 "register_operand"         "=$l, r, r")
+        (sign_extend:SI
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_se (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load")
+   (set_attr "length" "  2,  4,   4")])
+
+(define_insn "vec_extractv2hi0_be"
+  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "  0,r")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1"
+  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "  0,r")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_se"
+  [(set (match_operand:SI 0 "register_operand"     "=$d,r")
+	(sign_extend:SI
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_ze"
+  [(set (match_operand:SI 0 "register_operand"     "=$d,r")
+	(zero_extend:SI
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srli45\t%0, 16
+   srli\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_be"
+  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_se (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load")
+   (set_attr "length"  "  2,  4,   4")])
+
+(define_insn "mul16"
+  [(set (match_operand:V2SI 0 "register_operand"                         "=r")
+	(mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r"))
+		   (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "mul16\t%0, %1, %2"
+  [(set_attr "type"   "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "mulx16"
+  [(set (match_operand:V2SI 0 "register_operand"         "=r")
+	(vec_merge:V2SI
+	  (vec_duplicate:V2SI
+	    (mult:SI
+	      (extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 1 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))))
+	  (vec_duplicate:V2SI
+	    (mult:SI
+	      (extend:SI
+		(vec_select:HI
+		  (match_dup 1)
+		  (parallel [(const_int 1)])))
+	      (extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P ()"
+  "mulx16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "rotrv2hi_1"
+  [(set (match_operand:V2HI 0 "register_operand"    "=r")
+	(vec_select:V2HI
+	   (match_operand:V2HI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv2hi_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"    "=r")
+	(vec_select:V2HI
+	   (match_operand:V2HI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_1"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 8"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_1_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 8"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_2"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_2_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_3"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 24"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_3_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 24"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "v4qi_dup_10"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "pkbb\t%0, %1, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "v4qi_dup_32"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "pktt\t%0, %1, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_unpacks_lo_v4qi"
+  [(match_operand:V2HI 0 "register_operand" "=r")
+   (match_operand:V4QI 1 "register_operand" " r")]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  emit_insn (gen_sunpkd810 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "sunpkd810"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd810_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "unpkd810_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd810_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd810_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd810_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd820"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd820_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "unpkd820_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd820_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd820_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd820_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd830"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd830_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "unpkd830_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd830_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd830_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd830_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd831"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd831_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "unpkd831_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd831_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd831_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "unpkd831_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "zunpkd810"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd810_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd820"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd820_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd830"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd830_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd831"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd831_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "smbb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (1)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smbt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (0)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (1)));
+  DONE;
+})
+
+(define_expand "smtt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (0)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "mulhisi3v"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(mult:SI
+	  (sign_extend:SI
+	     (vec_select:HI
+	       (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
+	  (sign_extend:SI (vec_select:HI
+	       (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")])))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smtt\t%0, %1, %2",
+			     "smbt\t%0, %2, %1",
+			     "smbb\t%0, %1, %2",
+			     "smbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smbb\t%0, %1, %2",
+			     "smbt\t%0, %1, %2",
+			     "smtt\t%0, %1, %2",
+			     "smbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "kmabb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (1),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (0),
+				 operands[1]));
+  DONE;
+})
+
+(define_expand "kmabt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (0),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (1),
+				 operands[1]));
+  DONE;
+})
+
+(define_expand "kmatt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (0),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (1),
+				 operands[1]));
+  DONE;
+})
+
+(define_insn "kma_internal"
+  [(set (match_operand:SI 0 "register_operand"                          "=    r,    r,    r,    r")
+	(ss_plus:SI
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	        (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:SI
+	      (vec_select:HI
+	        (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
+	        (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")]))))
+	  (match_operand:SI 5 "register_operand"                        "     0,    0,    0,    0")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmatt\t%0, %1, %2",
+			     "kmabt\t%0, %2, %1",
+			     "kmabb\t%0, %1, %2",
+			     "kmabt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmabb\t%0, %1, %2",
+			     "kmabt\t%0, %1, %2",
+			     "kmatt\t%0, %1, %2",
+			     "kmabt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smds"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smds_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smds_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "smds_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smds_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smdrs"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "smdrs_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smdrs_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smxdsv"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+
+(define_expand "smxdsv_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smxdsv_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_insn "smal1"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"    " r")
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal2"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"  " r")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal3"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"    " r")
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal4"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"  " r")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal5"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 1)])))))
+	  (match_operand:DI 1 "register_operand"           " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal6"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (match_operand:DI 1 "register_operand"         " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal7"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))
+	  (match_operand:DI 1 "register_operand"           " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal8"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (match_operand:DI 1 "register_operand"         " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; We need this dummy pattern for smal
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+  emit_move_insn (low_part_dst, operands[1]);
+  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+;; We need this dummy pattern for usmar64/usmsr64
+(define_insn_and_split "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+  emit_move_insn (low_part_dst, operands[1]);
+  emit_move_insn (high_part_dst, const0_rtx);
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn_and_split "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+
+  emit_insn (gen_extendhisi2 (low_part_dst, operands[1]));
+  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand"                 "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "sunpkd820\t%0, %1"
+  [(set_attr "type"       "dpack")
+   (set_attr "length"     "4")])
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "smmul\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "smmul_round"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI [(mult:DI
+		  	  (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+			  (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))]
+		       UNSPEC_ROUND)
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "smmul.u\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmac"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand"             " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+		(sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmac\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmac_round"
+  [(set (match_operand:SI 0 "register_operand"                                     "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand"                         " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI [(mult:DI
+			    (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+			    (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
+			 UNSPEC_ROUND)
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmac.u\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmsb"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(ss_minus:SI (match_operand:SI 1 "register_operand"            " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+		(sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmsb\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmsb_round"
+  [(set (match_operand:SI 0 "register_operand"                                     "=r")
+	(ss_minus:SI (match_operand:SI 1 "register_operand"                        " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI [(mult:DI
+			    (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+			    (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
+			 UNSPEC_ROUND)
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmsb.u\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kwmmul"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (ss_mult:DI
+	      (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
+	      (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "kwmmul\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "kwmmul_round"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI [
+	      (ss_mult:DI
+		(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
+		(mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))]
+	      UNSPEC_ROUND)
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "kwmmul.u\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
+  else
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smmwt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
+  else
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smulhisi3_highpart_1"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+	      (sign_extend:DI
+	        (vec_select:HI
+		  (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		  (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt\t%0, %1, %2",
+			     "smmwb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb\t%0, %1, %2",
+			     "smmwt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "smulhisi3_highpart_2"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI
+	        (vec_select:HI
+		  (match_operand:V2HI 1 "register_operand"               "    r,    r")
+		  (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand"     "    r,    r")))
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt\t%0, %1, %2",
+			     "smmwb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb\t%0, %1, %2",
+			     "smmwt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwb_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
+  else
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smmwt_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
+  else
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smmw_round_internal"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI
+	      [(mult:DI
+		 (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+		 (sign_extend:DI
+		   (vec_select:HI
+		     (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		     (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
+	      UNSPEC_ROUND)
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt.u\t%0, %1, %2",
+			     "smmwb.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb.u\t%0, %1, %2",
+			     "smmwt.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  else
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  DONE;
+})
+
+(define_expand "kmmawt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  else
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  DONE;
+})
+
+(define_insn "kmmaw_internal"
+  [(set (match_operand:SI 0 "register_operand"                               "=   r,    r")
+	(ss_plus:SI
+	  (match_operand:SI 4 "register_operand"                             "    0,    0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 1 "register_operand"       "    r,    r"))
+		  (sign_extend:DI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
+	      (const_int 16)))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmmawt\t%0, %1, %2",
+			     "kmmawb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmmawb\t%0, %1, %2",
+			     "kmmawt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawb_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  else
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawt_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  else
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+
+(define_insn "kmmaw_round_internal"
+  [(set (match_operand:SI 0 "register_operand"                                "=   r,    r")
+	(ss_plus:SI
+	  (match_operand:SI 4 "register_operand"                              "    0,    0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI
+		[(mult:DI
+		   (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+		   (sign_extend:DI
+		     (vec_select:HI
+		       (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		       (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
+		UNSPEC_ROUND)
+	      (const_int 16)))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmmawt.u\t%0, %1, %2",
+			     "kmmawb.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmmawb.u\t%0, %1, %2",
+			     "kmmawt.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalbb"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (1)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smalbt"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (0)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (1)));
+  DONE;
+})
+
+(define_expand "smaltt"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (0)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smaddhidi"
+  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(plus:DI
+	  (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1",
+			     "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2",
+			     "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smaddhidi2"
+  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))
+	  (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1",
+			     "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2",
+			     "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalda1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "smalds1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smalda1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalda\t%0, %2, %3"
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalds1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalda1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalds1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smaldrs3"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smaldrs3_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smaldrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smaldrs3_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smaldrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalxda1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "smalxds1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smalxd1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus_minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalxd\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+
+(define_insn "smalxd1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus_minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalxd\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smslda1"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(minus:DI
+	  (minus:DI
+	    (match_operand:DI 1 "register_operand"                           " 0")
+	    (sign_extend:DI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))))
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smslda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smslxda1"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(minus:DI
+	  (minus:DI
+	    (match_operand:DI 1 "register_operand"                           " 0")
+	      (sign_extend:DI
+		(mult:SI
+		  (sign_extend:SI (vec_select:HI
+				    (match_operand:V2HI 2 "register_operand" " r")
+				    (parallel [(const_int 1)])))
+		  (sign_extend:SI (vec_select:HI
+				    (match_operand:V2HI 3 "register_operand" " r")
+				    (parallel [(const_int 0)]))))))
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smslxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; mada for synthetize smalda
+(define_insn_and_split "mada1"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
+			    operands[3], operands[4]));
+  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
+			    operands[5], operands[6]));
+  emit_insn (gen_addsi3 (operands[0], result0, result1));
+  DONE;
+})
+
+(define_insn_and_split "mada2"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
+			    operands[3], operands[4]));
+  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
+			    operands[6], operands[5]));
+  emit_insn (gen_addsi3 (operands[0], result0, result1));
+  DONE;
+})
+
+;; sms for synthetize smalds
+(define_insn_and_split "sms1"
+  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P ()
+   && (!reload_completed
+       || !nds32_need_split_sms_p (operands[3], operands[4],
+				   operands[5], operands[6]))"
+
+{
+  return nds32_output_sms (operands[3], operands[4],
+			   operands[5], operands[6]);
+}
+  "NDS32_EXT_DSP_P ()
+   && !reload_completed
+   && nds32_need_split_sms_p (operands[3], operands[4],
+			      operands[5], operands[6])"
+  [(const_int 1)]
+{
+  nds32_split_sms (operands[0], operands[1], operands[2],
+		   operands[3], operands[4],
+		   operands[5], operands[6]);
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn_and_split "sms2"
+  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P ()
+   && (!reload_completed
+       || !nds32_need_split_sms_p (operands[3], operands[4],
+				   operands[6], operands[5]))"
+{
+  return nds32_output_sms (operands[3], operands[4],
+			   operands[6], operands[5]);
+}
+  "NDS32_EXT_DSP_P ()
+   && !reload_completed
+   && nds32_need_split_sms_p (operands[3], operands[4],
+			      operands[6], operands[5])"
+  [(const_int 1)]
+{
+  nds32_split_sms (operands[0], operands[1], operands[2],
+		   operands[3], operands[4],
+		   operands[6], operands[5]);
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmda"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(ss_plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmda\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmxda"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(ss_plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmxda\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmada"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmada\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmada2"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmada\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmaxda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmaxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmads"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmads\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmadrs"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmadrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmaxds"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmaxds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_minus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsxda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_minus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; smax[8|16] and umax[8|16]
+(define_insn "3"
+  [(set (match_operand:VQIHI 0 "register_operand"               "=r")
+	(sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+		     (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+;; smin[8|16] and umin[8|16]
+(define_insn "3"
+  [(set (match_operand:VQIHI 0 "register_operand"              "=r")
+	(sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+		     (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "3_bb"
+  [(set (match_operand: 0 "register_operand"                    "=r")
+	(sumin_max: (vec_select:
+			    (match_operand:VQIHI 1 "register_operand" " r")
+			    (parallel [(const_int 0)]))
+			  (vec_select:
+			    (match_operand:VQIHI 2 "register_operand" " r")
+			    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "3_tt"
+  [(set (match_operand: 0 "register_operand"                    "=r")
+	(sumin_max: (vec_select:
+			    (match_operand:VQIHI 1 "register_operand" " r")
+			    (parallel [(const_int 1)]))
+			  (vec_select:
+			    (match_operand:VQIHI 2 "register_operand" " r")
+			    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (mode);
+  emit_insn (gen_3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotr_1 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (mode, tmp, mode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "v4qi3_22"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+	(sumin_max:QI (vec_select:QI
+			(match_operand:V4QI 1 "register_operand" " r")
+			(parallel [(const_int 2)]))
+		      (vec_select:QI
+			(match_operand:V4QI 2 "register_operand" " r")
+			(parallel [(const_int 2)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_v4qi3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotrv4qi_2 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "v4qi3_33"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+	(sumin_max:QI (vec_select:QI
+			(match_operand:V4QI 1 "register_operand" " r")
+			(parallel [(const_int 3)]))
+		      (vec_select:QI
+			(match_operand:V4QI 2 "register_operand" " r")
+			(parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_v4qi3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotrv4qi_3 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "v2hi3_bbtt"
+  [(set (match_operand:V2HI 0 "register_operand"                         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (sumin_max:HI (vec_select:HI
+			    (match_operand:V2HI 1 "register_operand" " r")
+			    (parallel [(const_int 1)]))
+			  (vec_select:HI
+			    (match_operand:V2HI 2 "register_operand" " r")
+			    (parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (sumin_max:HI (vec_select:HI
+			    (match_dup:V2HI 1)
+			    (parallel [(const_int 0)]))
+			  (vec_select:HI
+			    (match_dup:HI 2)
+			    (parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_v2hi3 (operands[0], operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_expand "abs2"
+  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
+	(ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv"
+{
+})
+
+(define_insn "kabs2"
+  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
+	(ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "kabs\t%0, %1"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "mar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "mar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "mar64_3"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (extend:DI
+	    (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "mar64_4"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (extend:DI
+	  (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "msr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "msr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "msr64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (extend:DI
+	    (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "msr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; kmar64, kmsr64, ukmar64 and ukmsr64
+(define_insn "kmar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "kmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_plus:DI
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmsr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmsr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick1"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r"))
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (not:SI (match_dup 3)))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %1, %2, %3"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick2"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (not:SI (match_dup 2))
+	      (match_operand:SI 3 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %1, %3, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick3"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand" " r")
+	      (not:SI (match_dup 1)))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %2, %3, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick4"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (not:SI (match_dup 1))
+	      (match_operand:SI 3 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %2, %3, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick5"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand"         " r")
+	      (not:SI (match_operand:SI 2 "register_operand" " r")))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand"         " r")
+	      (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %1, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick6"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (not:SI (match_operand:SI 1 "register_operand" " r"))
+	      (match_operand:SI 2 "register_operand"         " r"))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand" " r")
+	      (match_dup 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %2, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick7"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand"         " r")
+	      (not:SI (match_operand:SI 2 "register_operand" " r")))
+	    (and:SI
+	      (match_dup 2)
+	      (match_operand:SI 3 "register_operand"         " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %1, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick8"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (not:SI (match_operand:SI 1 "register_operand" " r"))
+	      (match_operand:SI 2 "register_operand"         " r"))
+	    (and:SI
+	      (match_dup 1)
+	      (match_operand:SI 3 "register_operand"         " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %2, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "sraiu"
+  [(set (match_operand:SI 0 "register_operand"                              "=   r, r")
+	(unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand"     "    r, r")
+				 (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))]
+		    UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai.u\t%0, %1, %2
+   sra.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround")
+   (set_attr "length" "4")])
+
+(define_insn "kssl"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r, r")
+	(ss_ashift:SI (match_operand:SI 1 "register_operand"     "    r, r")
+		      (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   kslli\t%0, %1, %2
+   ksll\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "kslraw_round"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+	(if_then_else:SI
+	  (lt:SI (match_operand:SI 2 "register_operand"        " r")
+		 (const_int 0))
+	  (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r")
+				   (neg:SI (match_dup 2)))]
+		     UNSPEC_ROUND)
+	  (ss_ashift:SI (match_dup 1)
+			(match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslraw.u\t%0, %1, %2"
+  [(set_attr "type"    "daluround")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "di3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(shift_rotate:DI (match_operand:DI 1 "register_operand" "")
+			 (match_operand:SI 2 "nds32_rimm6u_operand" "")))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      nds32_split_di3 (tmp, operands[1], operands[2]);
+      emit_move_insn (operands[0], tmp);
+    }
+  else
+    nds32_split_di3 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "sclip32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))]
+  "NDS32_EXT_DSP_P ()"
+  "sclip32\t%0, %1, %2"
+  [(set_attr "type"   "dclip")
+   (set_attr "length" "4")]
+)
+
+(define_insn "uclip32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))]
+  "NDS32_EXT_DSP_P ()"
+  "uclip32\t%0, %1, %2"
+  [(set_attr "type"   "dclip")
+   (set_attr "length" "4")]
+)
+
+(define_insn "bitrev"
+  [(set (match_operand:SI 0 "register_operand"                 "=r,    r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"     " r,    r")
+		    (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")]
+		   UNSPEC_BITREV))]
+  ""
+  "@
+   bitrev\t%0, %1, %2
+   bitrevi\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")]
+)
+
+;; wext, wexti
+(define_insn "wext"
+  [(set (match_operand:SI 0 "register_operand"                "=r,   r")
+	(truncate:SI
+	  (shiftrt:DI
+	    (match_operand:DI 1 "register_operand"            " r,   r")
+	    (match_operand:SI 2 "nds32_rimm5u_operand"        " r,Iu05"))))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   wext\t%0, %1, %2
+   wexti\t%0, %1, %2"
+  [(set_attr "type"     "dwext")
+   (set_attr "length"   "4")])
+
+;; 32-bit add/sub instruction: raddw and rsubw.
+(define_insn "rsi3"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (plus_minus:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rw\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+;; 32-bit add/sub instruction: uraddw and ursubw.
+(define_insn "ursi3"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (plus_minus:DI
+	      (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (zero_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "urw\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
diff --git a/gcc/config/nds32/nds32-e8.md b/gcc/config/nds32/nds32-e8.md
new file mode 100644
index 0000000..1f24b5c
--- /dev/null
+++ b/gcc/config/nds32/nds32-e8.md
@@ -0,0 +1,329 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+
+;; ------------------------------------------------------------------------
+;; Define E8 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_e8_machine")
+
+;; ------------------------------------------------------------------------
+;; Pipeline Stages
+;; ------------------------------------------------------------------------
+;; IF - Instruction Fetch
+;; II - Instruction Issue / Address Generation
+;; EX - Instruction Execution
+;; EXD - Psuedo Stage / Load Data Completion
+
+(define_cpu_unit "e8_ii" "nds32_e8_machine")
+(define_cpu_unit "e8_ex" "nds32_e8_machine")
+
+(define_insn_reservation "nds_e8_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_alu" 1
+  (and (eq_attr "type" "alu")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_load" 1
+  (and (match_test "nds32::load_single_p (insn)")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_store" 1
+  (and (match_test "nds32::store_single_p (insn)")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_1" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_2" 1
+  (and (ior (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32::load_double_p (insn)"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ii+e8_ex, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_3" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*2, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_4" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*3, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_5" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*4, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_6" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*5, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_7" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*6, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_8" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*7, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_12" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*11, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_1" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_2" 1
+  (and (ior (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32::store_double_p (insn)"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ii+e8_ex, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_3" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*2, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_4" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*3, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_5" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*4, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_6" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*5, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_7" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*6, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_8" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*7, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_12" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*11, e8_ex")
+
+(define_insn_reservation "nds_e8_mul_fast" 1
+  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "e8")))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_mul_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "e8")))
+  "e8_ii, e8_ex*16")
+
+(define_insn_reservation "nds_e8_mac_fast" 1
+  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "e8")))
+  "e8_ii, e8_ii+e8_ex, e8_ex")
+
+(define_insn_reservation "nds_e8_mac_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "e8")))
+  "e8_ii, (e8_ii+e8_ex)*16, e8_ex")
+
+(define_insn_reservation "nds_e8_div" 1
+  (and (eq_attr "type" "div")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*36, e8_ex")
+
+(define_insn_reservation "nds_e8_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+;; ------------------------------------------------------------------------
+;; Comment Notations and Bypass Rules
+;; ------------------------------------------------------------------------
+;; Producers (LHS)
+;;   LD
+;;     Load data from the memory and produce the loaded data. The result is
+;;     ready at EXD.
+;;   LMW(N, M)
+;;     There are N micro-operations within an instruction that loads multiple
+;;     words. The result produced by the M-th micro-operation is sent to
+;;     consumers. The result is ready at EXD.
+;;   ADDR_OUT
+;;     Most load/store instructions can produce an address output if updating
+;;     the base register is required. The result is ready at EX, which is
+;;     produced by ALU.
+;;   ALU, MOVD44, MUL, MAC
+;;     The result is ready at EX.
+;;   DIV_Rs
+;;     A division instruction saves the quotient result to Rt and saves the
+;;     remainder result to Rs. The instruction is separated into two micro-
+;;     operations. The first micro-operation writes to Rt, and the seconde
+;;     one writes to Rs. Each of the results is ready at EX.
+;;
+;; Consumers (RHS)
+;;   ALU, MUL, DIV
+;;     Require operands at EX.
+;;   ADDR_IN_MOP(N)
+;;      N denotes the address input is required by the N-th micro-operation.
+;;      Such operand is required at II.
+;;   ST
+;;     A store instruction requires its data at EX.
+;;   SMW(N, M)
+;;     There are N micro-operations within an instruction that stores multiple
+;;     words. Each M-th micro-operation requires its data at EX.
+;;   BR_COND
+;;     If a branch instruction is conditional, its input data is required at EX.
+
+;; LD -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_e8_load"
+  "nds_e8_branch,\
+   nds_e8_load, nds_e8_store,\
+   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_load_to_ii_p"
+)
+
+;; LD -> ALU, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_e8_load"
+  "nds_e8_alu,
+   nds_e8_mul_fast, nds_e8_mul_slow,\
+   nds_e8_mac_fast, nds_e8_mac_slow,\
+   nds_e8_div,\
+   nds_e8_branch,\
+   nds_e8_store,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_load_to_ex_p"
+)
+
+;; ALU, MOVD44, MUL, MAC, DIV_Rs, LD_bi, ADDR_OUT -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_e8_alu,
+   nds_e8_mul_fast, nds_e8_mul_slow,\
+   nds_e8_mac_fast, nds_e8_mac_slow,\
+   nds_e8_div,\
+   nds_e8_load, nds_e8_store,\
+   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds_e8_branch,\
+   nds_e8_load, nds_e8_store,\
+   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_ex_to_ii_p"
+)
+
+;; LMW(N, N) -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12"
+  "nds_e8_branch,\
+   nds_e8_load, nds_e8_store,\
+   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_last_load_to_ii_p"
+)
+
+;; LMW(N, N) -> ALU, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12"
+  "nds_e8_alu,
+   nds_e8_mul_fast, nds_e8_mul_slow,\
+   nds_e8_mac_fast, nds_e8_mac_slow,\
+   nds_e8_div,\
+   nds_e8_branch,\
+   nds_e8_store,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_last_load_to_ex_p"
+)
diff --git a/gcc/config/nds32/nds32-elf.opt b/gcc/config/nds32/nds32-elf.opt
new file mode 100644
index 0000000..afe6aad
--- /dev/null
+++ b/gcc/config/nds32/nds32-elf.opt
@@ -0,0 +1,16 @@
+mcmodel=
+Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM)
+Specify the address generation strategy for code model.
+
+Enum
+Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
+Known cmodel types (for use with the -mcmodel= option):
+
+EnumValue
+Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
+
+EnumValue
+Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
+
+EnumValue
+Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
diff --git a/gcc/config/nds32/nds32-fp-as-gp.c b/gcc/config/nds32/nds32-fp-as-gp.c
index f8b2738..6525915 100644
--- a/gcc/config/nds32/nds32-fp-as-gp.c
+++ b/gcc/config/nds32/nds32-fp-as-gp.c
@@ -1,4 +1,4 @@
-/* The fp-as-gp pass of Andes NDS32 cpu for GNU compiler
+/* fp-as-gp pass of Andes NDS32 cpu for GNU compiler
    Copyright (C) 2012-2016 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
@@ -24,19 +24,280 @@
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
+#include "tree.h"
+#include "rtl.h"
+#include "df.h"
+#include "alias.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "builtins.h"
+#include "ira.h"
+#include "ira-int.h"
+#include "tree-pass.h"
 
 /* ------------------------------------------------------------------------ */
 
+/* A helper function to check if this function should contain prologue.  */
+static bool
+nds32_have_prologue_p (void)
+{
+  int i;
+
+  for (i = 0; i < 28; i++)
+    if (NDS32_REQUIRED_CALLEE_SAVED_P (i))
+      return true;
+
+  return (flag_pic
+	  || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+	  || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM));
+}
+
+static int
+nds32_get_symbol_count (void)
+{
+  int symbol_count = 0;
+  rtx_insn *insn;
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  /* Counting the insn number which the addressing mode is symbol.  */
+	  if (single_set (insn) && nds32_symbol_load_store_p (insn))
+	    {
+	      rtx pattern = PATTERN (insn);
+	      rtx mem;
+	      gcc_assert (GET_CODE (pattern) == SET);
+	      if (GET_CODE (SET_SRC (pattern)) == REG )
+		mem = SET_DEST (pattern);
+	      else
+		mem = SET_SRC (pattern);
+
+	      /* We have only lwi37 and swi37 for fp-as-gp optimization,
+		 so don't count any other than SImode.
+		 MEM for QImode and HImode will wrap by ZERO_EXTEND
+		 or SIGN_EXTEND */
+	      if (GET_CODE (mem) == MEM)
+		symbol_count++;
+	    }
+	}
+    }
+
+  return symbol_count;
+}
+
 /* Function to determine whether it is worth to do fp_as_gp optimization.
-   Return 0: It is NOT worth to do fp_as_gp optimization.
-   Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization.
+   Return false: It is NOT worth to do fp_as_gp optimization.
+   Return true: It is APPROXIMATELY worth to do fp_as_gp optimization.
    Note that if it is worth to do fp_as_gp optimization,
    we MUST set FP_REGNUM ever live in this function.  */
-int
+static bool
 nds32_fp_as_gp_check_available (void)
 {
-  /* By default we return 0.  */
-  return 0;
+  basic_block bb;
+  basic_block exit_bb;
+  edge_iterator ei;
+  edge e;
+  bool first_exit_blocks_p;
+
+  /* If there exists ANY of following conditions,
+     we DO NOT perform fp_as_gp optimization:
+       1. TARGET_FORBID_FP_AS_GP is set
+	  regardless of the TARGET_FORCE_FP_AS_GP.
+       2. User explicitly uses 'naked'/'no_prologue' attribute.
+	  We use nds32_naked_function_p() to help such checking.
+       3. Not optimize for size.
+       4. Need frame pointer.
+       5. If $fp is already required to be saved,
+	  it means $fp is already choosen by register allocator.
+	  Thus we better not to use it for fp_as_gp optimization.
+       6. This function is a vararg function.
+	  DO NOT apply fp_as_gp optimization on this function
+	  because it may change and break stack frame.
+       7. The epilogue is empty.
+	  This happens when the function uses exit()
+	  or its attribute is no_return.
+	  In that case, compiler will not expand epilogue
+	  so that we have no chance to output .omit_fp_end directive.  */
+  if (TARGET_FORBID_FP_AS_GP
+      || nds32_naked_function_p (current_function_decl)
+      || !optimize_size
+      || frame_pointer_needed
+      || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+      || (cfun->stdarg == 1)
+      || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
+    return false;
+
+  /* Disable fp_as_gp if there is any infinite loop since the fp may
+     reuse in infinite loops by register rename.
+     For check infinite loops we should make sure exit_bb is post dominate
+     all other basic blocks if there is no infinite loops.  */
+  first_exit_blocks_p = true;
+  exit_bb = NULL;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+    {
+      /* More than one exit block also do not perform fp_as_gp optimization.  */
+      if (!first_exit_blocks_p)
+	return false;
+
+      exit_bb = e->src;
+      first_exit_blocks_p = false;
+    }
+
+  /* Not found exit_bb? just abort fp_as_gp!  */
+  if (!exit_bb)
+    return false;
+
+  /* Each bb should post dominate by exit_bb if there is no infinite loop! */
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      if (!dominated_by_p (CDI_POST_DOMINATORS,
+			   bb,
+			   exit_bb))
+	return false;
+    }
+
+  /* Now we can check the possibility of using fp_as_gp optimization.  */
+  if (TARGET_FORCE_FP_AS_GP)
+    {
+      /* User explicitly issues -mforce-fp-as-gp option.  */
+      return true;
+    }
+  else
+    {
+      /* In the following we are going to evaluate whether
+	 it is worth to do fp_as_gp optimization.  */
+      bool good_gain = false;
+      int symbol_count;
+
+      int threshold;
+
+      /* We check if there already requires prologue.
+	 Note that $gp will be saved in prologue for PIC code generation.
+	 After that, we can set threshold by the existence of prologue.
+	 Each fp-implied instruction will gain 2-byte code size
+	 from gp-aware instruction, so we have following heuristics.  */
+      if (flag_pic
+	  || nds32_have_prologue_p ())
+	{
+	  /* Have-prologue:
+	       Compiler already intends to generate prologue content,
+	       so the fp_as_gp optimization will only insert
+	       'la $fp,_FP_BASE_' instruction, which will be
+	       converted into 4-byte instruction at link time.
+	       The threshold is "3" symbol accesses, 2 + 2 + 2 > 4.  */
+	  threshold = 3;
+	}
+      else
+	{
+	  /* None-prologue:
+	       Compiler originally does not generate prologue content,
+	       so the fp_as_gp optimization will NOT ONLY insert
+	       'la $fp,_FP_BASE' instruction, but also causes
+	       push/pop instructions.
+	       If we are using v3push (push25/pop25),
+	       the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2;
+	       If we are using normal push (smw/lmw),
+	       the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4.  */
+	  threshold = 5 + (TARGET_V3PUSH ? 0 : 2);
+	}
+
+      symbol_count = nds32_get_symbol_count ();
+
+      if (symbol_count >= threshold)
+	good_gain = true;
+
+      /* Enable fp_as_gp optimization when potential gain is good enough.  */
+      return good_gain;
+    }
+}
+
+static unsigned int
+nds32_fp_as_gp (void)
+{
+  bool fp_as_gp_p;
+  calculate_dominance_info (CDI_POST_DOMINATORS);
+  fp_as_gp_p = nds32_fp_as_gp_check_available ();
+
+  /* Here is a hack to IRA for enable/disable a hard register per function.
+     We *MUST* review this way after migrate gcc 4.9! */
+  if (fp_as_gp_p) {
+    SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
+    df_set_regs_ever_live (FP_REGNUM, 1);
+  } else {
+    CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
+  }
+
+  cfun->machine->fp_as_gp_p = fp_as_gp_p;
+
+  free_dominance_info (CDI_POST_DOMINATORS);
+  return 1;
+}
+
+const pass_data pass_data_nds32_fp_as_gp =
+{
+  RTL_PASS,				/* type */
+  "fp_as_gp",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  0					/* todo_flags_finish */
+};
+
+class pass_nds32_fp_as_gp : public rtl_opt_pass
+{
+public:
+  pass_nds32_fp_as_gp (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *)
+  {
+    return !TARGET_LINUX_ABI
+	   && TARGET_16_BIT
+	   && optimize_size;
+  }
+  unsigned int execute (function *) { return nds32_fp_as_gp (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_fp_as_gp (gcc::context *ctxt)
+{
+  return new pass_nds32_fp_as_gp (ctxt);
 }
 
 /* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-fpu.md b/gcc/config/nds32/nds32-fpu.md
new file mode 100644
index 0000000..11eabd5
--- /dev/null
+++ b/gcc/config/nds32/nds32-fpu.md
@@ -0,0 +1,503 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+;;SFmode moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (SFmode, operands[1]);
+  if (CONST_DOUBLE_P (operands[1])
+      && !satisfies_constraint_Cs20 (operands[1]))
+    {
+      const REAL_VALUE_TYPE *r;
+      unsigned long l;
+
+      r = CONST_DOUBLE_REAL_VALUE (operands[1]);
+      REAL_VALUE_TO_TARGET_SINGLE (*r, l);
+
+      emit_move_insn (operands[0], gen_rtx_HIGH (SFmode, operands[1]));
+
+      if ((l & 0xFFF) != 0)
+	emit_insn (gen_movsf_lo (operands[0], operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "movsf_lo"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(lo_sum:SF (match_operand:SF 1 "register_operand" "r")
+		   (match_operand:SF 2 "immediate_operand" "i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")]
+)
+
+(define_insn "*movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r, f, *f, *r, f, Q,   r,   r,    r")
+	(match_operand:SF 1 "general_operand"      " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, f, *r, *f, Q, f,Cs05,Cs20, Chig"))]
+  "(register_operand(operands[0], SFmode)
+    || register_operand(operands[1], SFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, 4);
+    case 6:
+      return nds32_output_32bit_store (operands, 4);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+      return nds32_output_16bit_load (operands, 4);
+    case 11:
+      return nds32_output_32bit_load (operands, 4);
+    case 12:
+      if (TARGET_FPU_SINGLE)
+	return "fcpyss\t%0, %1, %1";
+      else
+	return "#";
+    case 13:
+      return "fmtsr\t%1, %0";
+    case 14:
+      return "fmfsr\t%0, %1";
+    case 15:
+      return nds32_output_float_load (operands);
+    case 16:
+      return nds32_output_float_store (operands);
+    case 17:
+      return "movi55\t%0, %1";
+    case 18:
+      return "movi\t%0, %1";
+    case 19:
+      return "sethi\t%0, %1";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,fcpy,fmtsr,fmfsr,fload,fstore,alu,alu,alu")
+   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,   4,    4,    4,    4,     4,  2,  4,  4")
+   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1,  v1, fpu,  fpu,  fpu,  fpu,   fpu, v1, v1, v1")])
+
+;; Conditional Move Instructions
+
+(define_expand "movcc"
+  [(set (match_operand:ANYF 0 "register_operand" "")
+	(if_then_else:ANYF (match_operand 1 "nds32_float_comparison_operator" "")
+			   (match_operand:ANYF 2 "register_operand" "")
+			   (match_operand:ANYF 3 "register_operand" "")))]
+  ""
+{
+  if (nds32_cond_move_p (operands[1]))
+    {
+      /* Operands[1] condition code is UNORDERED or ORDERED, and
+	 sub-operands[1] MODE isn't SFmode or SFmode, return FAIL
+	 for gcc, because we don't using slt compare instruction
+	 to generate UNORDERED and ORDERED condition.  */
+      FAIL;
+    }
+  else
+    nds32_expand_float_movcc (operands);
+})
+
+(define_insn "fcmov_eq"
+  [(set (match_operand:ANYF 0 "register_operand" "=f, f")
+	(if_then_else:ANYF (eq (match_operand:SI 1 "register_operand" "f, f")
+			       (const_int 0))
+			   (match_operand:ANYF 2 "register_operand" "f, 0")
+			   (match_operand:ANYF 3 "register_operand" "0, f")))]
+  ""
+  "@
+   fcmovz\t%0,%2,%1
+   fcmovn\t%0,%3,%1"
+  [(set_attr "type"  "fcmov")
+   (set_attr "length" "4")]
+)
+
+(define_insn "fcmov_ne"
+  [(set (match_operand:ANYF 0 "register_operand" "=f, f")
+	(if_then_else:ANYF (ne (match_operand:SI 1 "register_operand" "f, f")
+			       (const_int 0))
+			   (match_operand:ANYF 2 "register_operand" "f, 0")
+			   (match_operand:ANYF 3 "register_operand" "0, f")))]
+  ""
+  "@
+   fcmovn\t%0,%2,%1
+   fcmovz\t%0,%3,%1"
+  [(set_attr "type"  "fcmov")
+   (set_attr "length" "4")]
+)
+
+;; Arithmetic instructions.
+
+(define_insn "add3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(plus:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		   (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fadd\t %0, %1, %2"
+  [(set_attr "type"   "falu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "sub3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		    (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fsub\t %0, %1, %2"
+  [(set_attr "type"   "falu")
+   (set_attr "length" "4")]
+)
+
+;; Multiplication insns.
+
+(define_insn "mul3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		   (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fmul\t %0, %1, %2"
+  [(set_attr "type"   "fmul")
+   (set_attr "length" "4")]
+)
+
+(define_insn "fma4"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		  (match_operand:ANYF 2 "register_operand" "f")
+		  (match_operand:ANYF 3 "register_operand" "0")))]
+  "TARGET_EXT_FPU_FMA"
+  "fmadd\t%0, %1, %2"
+  [(set_attr "type"   "fmac")
+   (set_attr "length" "4")]
+)
+
+(define_insn "fnma4"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+		  (match_operand:ANYF 2 "register_operand" "f")
+		  (match_operand:ANYF 3 "register_operand" "0")))]
+  "TARGET_EXT_FPU_FMA"
+  "fmsub\t%0, %1, %2"
+  [(set_attr "type"   "fmac")
+   (set_attr "length" "4")]
+)
+
+(define_insn "fms4"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		  (match_operand:ANYF 2 "register_operand" "f")
+		  (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))]
+  "TARGET_EXT_FPU_FMA"
+  "fnmsub\t%0, %1, %2"
+  [(set_attr "type"   "fmac")
+   (set_attr "length" "4")]
+)
+
+(define_insn "fnms4"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+		  (match_operand:ANYF 2 "register_operand" "f")
+		  (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))]
+  "TARGET_EXT_FPU_FMA"
+  "fnmadd\t%0, %1, %2"
+  [(set_attr "type"   "fmac")
+   (set_attr "length" "4")]
+)
+
+;; Div Instructions.
+
+(define_insn "div3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(div:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		  (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fdiv\t %0, %1, %2"
+  [(set_attr "type"   "fdiv")
+   (set_attr "length" "4")]
+)
+
+(define_insn "sqrt2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+  ""
+  "fsqrt\t %0, %1"
+  [(set_attr "type"   "fsqrt")
+   (set_attr "length" "4")]
+)
+
+;; Conditional Branch patterns
+
+(define_expand "cstore4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "nds32_float_comparison_operator"
+	 [(match_operand:ANYF 2 "register_operand" "")
+	  (match_operand:ANYF 3 "register_operand" "")]))]
+  ""
+{
+  nds32_expand_float_cstore (operands);
+  DONE;
+})
+
+(define_expand "cbranch4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_float_comparison_operator"
+		       [(match_operand:ANYF 1 "register_operand" "")
+			(match_operand:ANYF 2 "register_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  nds32_expand_float_cbranch (operands);
+  DONE;
+})
+
+;; Copysign Instructions.
+
+(define_insn "copysignsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "f")
+		    (match_operand:SF 2 "register_operand" "f")]
+		     UNSPEC_COPYSIGN))]
+  "TARGET_FPU_SINGLE"
+  "fcpyss\t%0,%1,%2"
+  [(set_attr "type"   "fcpy")
+   (set_attr "length" "4")]
+)
+
+(define_insn "copysigndf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unspec:DF [(match_operand:DF 1 "register_operand" "f")
+		    (match_operand:DF 2 "register_operand" "f")]
+		     UNSPEC_COPYSIGN))]
+  "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE"
+  "fcpysd\t%0,%1,%2"
+  [(set_attr "type"   "fcpy")
+   (set_attr "length" "4")]
+)
+
+(define_insn "*ncopysign3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+				(match_operand:ANYF 2 "register_operand" "f")]
+				UNSPEC_COPYSIGN)))]
+  ""
+  "fcpyns\t%0,%1,%2"
+  [(set_attr "type"   "fcpy")
+   (set_attr "length" "4")]
+)
+
+;; Absolute Instructions
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f, r")
+	(abs:SF (match_operand:SF 1 "register_operand" "f, r")))]
+  "TARGET_FPU_SINGLE || TARGET_EXT_PERF"
+  "@
+   fabss\t%0, %1
+   bclr\t%0, %1, 31"
+  [(set_attr "type"    "fabs,alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "fpu,pe1")]
+)
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_FPU_DOUBLE"
+  "fabsd\t%0, %1"
+  [(set_attr "type"   "fabs")
+   (set_attr "length" "4")]
+)
+
+;; Negation Instructions
+
+(define_insn "*negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f, r")
+	(neg:SF (match_operand:SF 1 "register_operand" "f, r")))]
+  "TARGET_FPU_SINGLE || TARGET_EXT_PERF"
+  "@
+   fcpynss\t%0, %1, %1
+   btgl\t%0, %1, 31"
+  [(set_attr "type"    "fcpy,alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "fpu,pe1")]
+)
+
+(define_insn "*negdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_FPU_DOUBLE"
+  "fcpynsd\t%0, %1, %1"
+  [(set_attr "type"   "fcpy")
+   (set_attr "length" "4")]
+)
+
+;; Data Format Conversion Instructions
+
+(define_insn "floatunssi2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unsigned_float:ANYF (match_operand:SI 1 "register_operand" "f")))]
+  ""
+  "fui2\t %0, %1"
+  [(set_attr "type"   "falu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "floatsi2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(float:ANYF (match_operand:SI 1 "register_operand" "f")))]
+  ""
+  "fsi2\t %0, %1"
+  [(set_attr "type"   "falu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "fixuns_truncsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(unsigned_fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))]
+  ""
+  "f2ui.z\t %0, %1"
+  [(set_attr "type"   "falu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "fix_truncsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))]
+  ""
+  "f2si.z\t %0, %1"
+  [(set_attr "type"   "falu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE"
+  "fs2d\t%0, %1"
+  [(set_attr "type"   "falu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE"
+  "fd2s\t%0, %1"
+  [(set_attr "type"   "falu")
+   (set_attr "length" "4")]
+)
+
+;; Compare Instructions
+
+(define_insn "cmp_eq"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(eq:SI (match_operand:ANYF 1 "register_operand" "f")
+	       (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  {
+    if (NDS32_EXT_FPU_DOT_E)
+      return "fcmpeq.e %0, %1, %2";
+    else
+      return "fcmpeq\t%0, %1, %2";
+  }
+  [(set_attr "type"   "fcmp")
+   (set_attr "length" "4")]
+)
+
+(define_insn "cmp_lt"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(lt:SI (match_operand:ANYF 1 "register_operand" "f")
+	       (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+{
+  if (NDS32_EXT_FPU_DOT_E)
+    return "fcmplt.e %0, %1, %2";
+  else
+    return "fcmplt\t%0, %1, %2";
+}
+  [(set_attr "type"   "fcmp")
+   (set_attr "length" "4")]
+)
+
+(define_insn "cmp_le"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(le:SI (match_operand:ANYF 1 "register_operand" "f")
+	       (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+{
+  if (NDS32_EXT_FPU_DOT_E)
+    return "fcmple.e %0, %1, %2";
+  else
+    return "fcmple\t%0, %1, %2";
+}
+  [(set_attr "type"   "fcmp")
+   (set_attr "length" "4")]
+)
+
+(define_insn "cmp_un"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(unordered:SI (match_operand:ANYF 1 "register_operand" "f")
+		      (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+{
+  if (NDS32_EXT_FPU_DOT_E)
+    return "fcmpun.e %0, %1, %2";
+  else
+    return "fcmpun\t%0, %1, %2";
+}
+  [(set_attr "type"   "fcmp")
+   (set_attr "length" "4")]
+)
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))]
+  "!TARGET_FPU_SINGLE
+   && NDS32_IS_FPR_REGNUM (REGNO (operands[0]))
+   && NDS32_IS_FPR_REGNUM (REGNO (operands[1]))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+{
+  operands[2] = gen_rtx_REG (SFmode, TA_REGNUM);
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "const_double_operand" ""))]
+  "!satisfies_constraint_Cs20 (operands[1])
+   && !satisfies_constraint_Chig (operands[1])"
+  [(set (match_dup 0) (high:SF (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))])
+;; ----------------------------------------------------------------------------
diff --git a/gcc/config/nds32/nds32-gcse.c b/gcc/config/nds32/nds32-gcse.c
new file mode 100644
index 0000000..301981d
--- /dev/null
+++ b/gcc/config/nds32/nds32-gcse.c
@@ -0,0 +1,670 @@
+/* Global CSE pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "rtl.h"
+#include "df.h"
+#include "alias.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "builtins.h"
+#include "cpplib.h"
+#include "params.h"
+#include "tree-pass.h"
+#include "dbgcnt.h"
+#include "df.h"
+#include "reload.h"
+
+/* ------------------------------------------------------------------------ */
+
+struct expr
+{
+  /* The expression.  */
+  rtx expr;
+
+  /* The same hash for this entry.  */
+  hashval_t hash;
+
+  struct occr *antic_occr;
+  /* The number of antic_occr.  */
+  unsigned int count;
+};
+
+struct occr
+{
+  /* Next occurrence of this expression.  */
+  struct occr *next;
+  /* The insn that computes the expression.  */
+  rtx_insn *insn;
+  /* Nonzero if this [anticipatable] occurrence has been deleted.  */
+  char deleted_p;
+};
+
+struct reg_avail_info
+{
+  basic_block last_bb;
+  int first_set;
+  int first_use;
+};
+
+/* Hashtable helpers.  */
+
+struct expr_hasher : nofree_ptr_hash 
+{
+  static inline hashval_t hash (const expr *);
+  static inline bool equal (const expr *, const expr *);
+};
+
+/* Callback for hashtab.
+   Return the hash value for expression EXP.  We don't actually hash
+   here, we just return the cached hash value.  */
+
+inline hashval_t
+expr_hasher::hash (const expr *exp)
+{
+  return exp->hash;
+}
+
+/* Callback for hashtab.
+   Return nonzero if exp1 is equivalent to exp2.  */
+
+inline bool
+expr_hasher::equal (const expr *exp1, const expr *exp2)
+{
+  int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
+
+  gcc_assert (!equiv_p || exp1->hash == exp2->hash);
+  return equiv_p;
+}
+
+static hashval_t
+hash_expr (rtx x, int *do_not_record_p)
+{
+  *do_not_record_p = 0;
+  return hash_rtx (x, GET_MODE (x), do_not_record_p,
+		   NULL,  /*have_reg_qty=*/false);
+}
+
+
+/* Helpers for memory allocation/freeing.  */
+static void alloc_mem (void);
+static void free_mem (void);
+static void compute_hash_table (void);
+/* Scan the pattern of INSN and add an entry to the hash TABLE.
+   After reload we are interested in loads/stores only.  */
+static void hash_scan_set (rtx_insn *);
+static void insert_expr_in_table (rtx, rtx_insn *);
+static void dump_hash_table (FILE *);
+
+static struct obstack expr_obstack;
+/* The table itself.  */
+static hash_table  *expr_table;
+static struct reg_avail_info *reg_avail_info;
+static sbitmap *hoist_vbein;
+static sbitmap *hoist_vbeout;
+
+/* Allocate memory for the CUID mapping array and register/memory
+   tracking tables.  */
+
+static void
+alloc_mem (void)
+{
+  /* Allocate the available expressions hash table.  We don't want to
+     make the hash table too small, but unnecessarily making it too large
+     also doesn't help.  The i/4 is a gcse.c relic, and seems like a
+     reasonable choice.  */
+  expr_table = new hash_table (MAX (get_max_insn_count () / 4,
+					     13));
+
+  /* We allocate everything on obstacks because we often can roll back
+     the whole obstack to some point.  Freeing obstacks is very fast.  */
+  gcc_obstack_init (&expr_obstack);
+}
+
+/* Free memory allocated by alloc_mem.  */
+
+static void
+free_mem (void)
+{
+  delete expr_table;
+  expr_table = NULL;
+
+  obstack_free (&expr_obstack, NULL);
+}
+
+
+/* Dump all expressions and occurrences that are currently in the
+   expression hash table to FILE.  */
+
+/* This helper is called via htab_traverse.  */
+int
+nds32_dump_expr_hash_table_entry (expr **slot, FILE *file)
+{
+  struct expr *exprs = *slot;
+  struct occr *occr;
+
+  fprintf (file, "expr: ");
+  print_rtl (file, exprs->expr);
+  fprintf (file,"\nhashcode: %u\n", exprs->hash);
+  fprintf (file,"list of occurrences:\n");
+  occr = exprs->antic_occr;
+  while (occr)
+    {
+      rtx_insn *insn = occr->insn;
+      print_rtl_single (file, insn);
+      fprintf (file, "\n");
+      occr = occr->next;
+    }
+  fprintf (file, "\n");
+  return 1;
+}
+
+static void
+dump_hash_table (FILE *file)
+{
+  fprintf (file, "\n\nexpression hash table\n");
+  fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n",
+	   (long) expr_table->size (),
+	   (long) expr_table->elements (),
+	   expr_table->collisions ());
+  if (expr_table->elements () > 0)
+    {
+      fprintf (file, "\n\ntable entries:\n");
+      expr_table->traverse  (file);
+    }
+  fprintf (file, "\n");
+}
+
+/* Insert expression X in INSN in the hash TABLE.
+   If it is already present, record it as the last occurrence in INSN's
+   basic block.  */
+
+static void
+insert_expr_in_table (rtx x, rtx_insn *insn)
+{
+  int do_not_record_p;
+  hashval_t hash;
+  struct expr *cur_expr, **slot;
+  struct occr *antic_occr, *last_occr = NULL;
+
+  hash = hash_expr (x, &do_not_record_p);
+
+  /* Do not insert expression in the table if it contains volatile operands,
+     or if hash_expr determines the expression is something we don't want
+     to or can't handle.  */
+  if (do_not_record_p)
+    return;
+
+  /* We anticipate that redundant expressions are rare, so for convenience
+     allocate a new hash table element here already and set its fields.
+     If we don't do this, we need a hack with a static struct expr.  Anyway,
+     obstack_free is really fast and one more obstack_alloc doesn't hurt if
+     we're going to see more expressions later on.  */
+  cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
+					    sizeof (struct expr));
+  cur_expr->expr = x;
+  cur_expr->hash = hash;
+  cur_expr->antic_occr = NULL;
+
+  slot = expr_table->find_slot_with_hash (cur_expr, hash, INSERT);
+
+  if (! (*slot))
+    /* The expression isn't found, so insert it.  */
+    *slot = cur_expr;
+  else
+    {
+      /* The expression is already in the table, so roll back the
+	 obstack and use the existing table entry.  */
+      obstack_free (&expr_obstack, cur_expr);
+      cur_expr = *slot;
+    }
+
+  /* Search for another occurrence in the same basic block.  */
+  antic_occr = cur_expr->antic_occr;
+  cur_expr->count++;
+  while (antic_occr
+	 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
+    {
+      /* If an occurrence isn't found, save a pointer to the end of
+	 the list.  */
+      last_occr = antic_occr;
+      antic_occr = antic_occr->next;
+    }
+
+  if (antic_occr)
+    /* Found another instance of the expression in the same basic block.
+       Prefer this occurrence to the currently recorded one.  We want
+       the last one in the block and the block is scanned from start
+       to end.  */
+    antic_occr->insn = insn;
+  else
+    {
+      /* First occurrence of this expression in this basic block.  */
+      antic_occr = (struct occr *) obstack_alloc (&expr_obstack,
+						  sizeof (struct occr));
+
+      /* First occurrence of this expression in any block?  */
+      if (cur_expr->antic_occr == NULL)
+	cur_expr->antic_occr = antic_occr;
+      else
+	last_occr->next = antic_occr;
+
+      antic_occr->insn = insn;
+      antic_occr->next = NULL;
+      antic_occr->deleted_p = 0;
+    }
+}
+
+/* Check whether this instruction is supported format.  */
+
+static void
+hash_scan_set (rtx_insn *insn)
+{
+  rtx pat = PATTERN (insn);
+  rtx src = SET_SRC (pat);
+  rtx dest = SET_DEST (pat);
+  int regno;
+  struct reg_avail_info *info;
+
+  /* Don't mess with jumps and nops.  */
+  if (JUMP_P (insn) || set_noop_p (pat))
+    return;
+
+  /* TODO: support more format.  */
+
+  /* Only consider locally anticipatable intructions currently.  */
+  if (REG_P (dest) && REGNO (dest) <= SP_REGNUM)
+    {
+      regno = REGNO (dest);
+      info = ®_avail_info[regno];
+
+      if (BLOCK_FOR_INSN (insn) == info->last_bb
+	  && info->first_set == DF_INSN_LUID (insn)
+	  && info->first_use >= info->first_set)
+	{
+	  /* Only support immediate input currently because
+	     this is bugzilla case.  */
+	  if (CONST_INT_P (src) || CONST_DOUBLE_P (src))
+	    insert_expr_in_table (PATTERN (insn), insn);
+	}
+    }
+}
+
+/* Record register first use information for REGNO in INSN.
+
+   first_use records the first place in the block where the register
+   is used and is used to compute "anticipatability".
+
+   last_bb records the block for which first_use is valid,
+   as a quick test to invalidate them.  */
+
+static void
+record_first_reg_use_info (rtx_insn *insn, int regno)
+{
+  struct reg_avail_info *info = ®_avail_info[regno];
+  int luid = DF_INSN_LUID (insn);
+
+  if (info->last_bb != BLOCK_FOR_INSN (insn))
+    {
+      info->last_bb = BLOCK_FOR_INSN (insn);
+      info->first_use = luid;
+      /* Set the value to record the using is former than setting.  */
+      info->first_set = luid + 1;
+    }
+}
+
+/* Called from compute_hash_table via note_stores to handle one
+   SET or CLOBBER in an insn.  DATA is really the instruction in which
+   the SET is taking place.  */
+
+static void
+record_first_use_info (rtx *dest, void *data)
+{
+  rtx_insn *last_set_insn = static_cast (data);
+  int i, j;
+  enum rtx_code code;
+  const char *fmt;
+  rtx x = *dest;
+
+  if (x == 0)
+    return;
+
+  code = GET_CODE (x);
+  if (REG_P (x) && REGNO (x) <= SP_REGNUM)
+    {
+      record_first_reg_use_info (last_set_insn, REGNO (x));
+      /* DF and DI mode may use two registers.  */
+      if (GET_MODE_SIZE (GET_MODE (x)) == 8)
+	record_first_reg_use_info (last_set_insn, REGNO (x) + 1);
+    }
+
+  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	record_first_use_info (&XEXP (x, i), data);
+      else if (fmt[i] == 'E')
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  record_first_use_info (&XVECEXP (x, i, j), data);
+    }
+}
+
+/* Record register first/block set information for REGNO in INSN.
+
+   first_set records the first place in the block where the register
+   is set and is used to compute "anticipatability".
+
+   last_bb records the block for which first_set is valid,
+   as a quick test to invalidate them.  */
+
+static void
+record_first_reg_set_info (rtx_insn *insn, int regno)
+{
+  struct reg_avail_info *info = ®_avail_info[regno];
+  int luid = DF_INSN_LUID (insn);
+
+  if (info->last_bb != BLOCK_FOR_INSN (insn))
+    {
+      info->last_bb = BLOCK_FOR_INSN (insn);
+      info->first_set = luid;
+      /* Set the value to record the using is later than setting.  */
+      info->first_use = luid + 1;
+    }
+}
+
+/* Called from compute_hash_table via note_stores to handle one
+   SET or CLOBBER in an insn.  DATA is really the instruction in which
+   the SET is taking place.  */
+
+static void
+record_first_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
+{
+  rtx_insn *last_set_insn = static_cast (data);
+
+  if (GET_CODE (dest) == SUBREG)
+    dest = SUBREG_REG (dest);
+
+  if (REG_P (dest) && REGNO (dest) <= SP_REGNUM)
+    {
+      record_first_reg_set_info (last_set_insn, REGNO (dest));
+      if (GET_MODE_SIZE (GET_MODE (dest)) == 8)
+	record_first_reg_set_info (last_set_insn, REGNO (dest) + 1);
+    }
+}
+
+/* Build hash table for supported format instructions.
+   Only consider if the instruction is anticipatable in the basic block here.
+   We postpone the def-use check until hoisting.  */
+
+static void
+compute_hash_table (void)
+{
+  basic_block bb;
+  int i;
+
+  /* We only take care hard registers.  */
+  reg_avail_info =
+    (struct reg_avail_info *) xmalloc (sizeof (struct reg_avail_info) *
+				       (SP_REGNUM + 1));
+
+  for (i = 0; i < 32; i++)
+    reg_avail_info[i].last_bb = NULL;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx_insn *insn;
+
+      /* Do not hoist instrucion from block which has more
+	 than one predecessor.  */
+      if (EDGE_COUNT (bb->preds) > 1)
+	continue;
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  /* Construct a caller save register barrier.  We cannot hoist the
+	     instruction over a function call which sets caller save
+	     registers.  */
+	  if (CALL_P (insn))
+	    {
+	      for (i = 0; i <= SP_REGNUM; i++)
+		if (call_used_regs[i])
+		  record_first_reg_use_info (insn, i);
+	    }
+
+	  note_uses (&PATTERN (insn), record_first_use_info, insn);
+	  note_stores (PATTERN (insn), record_first_set_info, insn);
+	}
+
+      /* Build the hash table.  */
+      FOR_BB_INSNS (bb, insn)
+	if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
+	  hash_scan_set (insn);
+    }
+}
+
+/* Hoist instructions in this slot if possible.  */
+int
+nds32_find_gcse_expr_table (expr **slot, void *data ATTRIBUTE_UNUSED)
+{
+  struct expr *exprs = *slot;
+  struct occr *occr;
+  rtx_insn *insn = NULL;
+  rtx_insn *last_insn;
+  basic_block bb;
+  edge e;
+  unsigned ix;
+  unsigned emit_done;
+  unsigned cover, regno;
+  df_ref use;
+  enum machine_mode mode;
+
+  if (exprs->count < 2)
+    return 1;
+
+  bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
+  bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
+
+  /* Set the bit for this slot.  */
+  occr = exprs->antic_occr;
+  while (occr)
+    {
+      insn = occr->insn;
+      bb = BLOCK_FOR_INSN (insn);
+      if (!occr->deleted_p)
+	bitmap_set_bit (hoist_vbein[bb->index], 0);
+      occr = occr->next;
+    }
+
+  /* Try to hoist code for each basic block.  */
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
+    {
+      if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
+	bitmap_intersection_of_succs (hoist_vbeout[bb->index], hoist_vbein, bb);
+
+      if (bitmap_bit_p (hoist_vbeout[bb->index], 0)
+	  && EDGE_COUNT (bb->succs) > 1)
+	{
+	  emit_done = 0;
+	  cover = FALSE;
+	  for (e = NULL, ix = 0; ix < EDGE_COUNT (bb->succs); ix++)
+	    {
+	      e = EDGE_SUCC (bb, ix);
+	      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
+		continue;
+	      occr = exprs->antic_occr;
+	      while (occr)
+		{
+		  insn = occr->insn;
+		  if (!occr->deleted_p && e->dest == BLOCK_FOR_INSN (insn))
+		    break;
+		  occr = occr->next;
+		}
+
+	      gcc_assert (insn != NULL);
+
+	      if (!emit_done)
+		{
+		  last_insn = BB_END (bb);
+		  /* Check the defined register is not used by the last
+		     instruction of the previos block.*/
+		  regno = REGNO (SET_DEST (PATTERN (insn)));
+		  mode = GET_MODE (SET_DEST (PATTERN (insn)));
+		  FOR_EACH_INSN_USE (use, last_insn)
+		    {
+		      if (DF_REF_REGNO (use) == regno
+			  || regno_clobbered_p (regno, last_insn, mode, 2))
+			{
+			  cover = TRUE;
+			  break;
+			}
+		    }
+
+		  /* TODO: support more format.  */
+		  if (cover)
+		    break;
+		  else if (JUMP_P (last_insn))
+		    {
+		      emit_insn_before_noloc (PATTERN (insn), last_insn, bb);
+		      emit_done = TRUE;
+		    }
+		  else
+		    break;
+		}
+
+	      if (emit_done)
+		{
+		  delete_insn (insn);
+		  occr->deleted_p = TRUE;
+		}
+	    }
+	}
+    }
+  return 1;
+}
+
+static int
+hoist_code (void)
+{
+  hoist_vbein = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1);
+  hoist_vbeout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1);
+
+  expr_table->traverse  (NULL);
+
+  sbitmap_vector_free (hoist_vbein);
+  sbitmap_vector_free (hoist_vbeout);
+
+  return 0;
+}
+
+
+static unsigned int
+nds32_gcse_opt (void)
+{
+
+  if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
+    return 0;
+  /* Allocate memory for this pass.
+     Also computes and initializes the insns' CUIDs.  */
+  alloc_mem ();
+
+  df_chain_add_problem (DF_DU_CHAIN);
+  df_insn_rescan_all ();
+  df_analyze ();
+
+  compute_hash_table ();
+
+  if (dump_file)
+    dump_hash_table (dump_file);
+
+  hoist_code ();
+
+  df_insn_rescan_all ();
+  free_mem ();
+  return 0;
+}
+
+const pass_data pass_data_nds32_gcse_opt =
+{
+  RTL_PASS,				/* type */
+  "gcse_opt",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  0,					/* todo_flags_finish */
+};
+
+class pass_nds32_gcse_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_gcse_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_gcse_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) { return TARGET_GCSE_OPT; }
+  unsigned int execute (function *) { return nds32_gcse_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_gcse_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_gcse_opt (ctxt);
+}
+
+/* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-graywolf.md b/gcc/config/nds32/nds32-graywolf.md
new file mode 100644
index 0000000..f9ddbd8
--- /dev/null
+++ b/gcc/config/nds32/nds32-graywolf.md
@@ -0,0 +1,471 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+;; ------------------------------------------------------------------------
+;; Define Graywolf pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_graywolf_machine")
+
+(define_cpu_unit "gw_ii_0" "nds32_graywolf_machine")
+(define_cpu_unit "gw_ii_1" "nds32_graywolf_machine")
+(define_cpu_unit "gw_ex_p0" "nds32_graywolf_machine")
+(define_cpu_unit "gw_mm_p0" "nds32_graywolf_machine")
+(define_cpu_unit "gw_wb_p0" "nds32_graywolf_machine")
+(define_cpu_unit "gw_ex_p1" "nds32_graywolf_machine")
+(define_cpu_unit "gw_mm_p1" "nds32_graywolf_machine")
+(define_cpu_unit "gw_wb_p1" "nds32_graywolf_machine")
+(define_cpu_unit "gw_iq_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_rf_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_e1_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_e2_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_e3_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_e4_p2" "nds32_graywolf_machine")
+
+(define_reservation "gw_ii" "gw_ii_0 | gw_ii_1")
+(define_reservation "gw_ex" "gw_ex_p0 | gw_ex_p1")
+(define_reservation "gw_mm" "gw_mm_p0 | gw_mm_p1")
+(define_reservation "gw_wb" "gw_wb_p0 | gw_wb_p1")
+
+(define_reservation "gw_ii_all" "gw_ii_0 + gw_ii_1")
+
+(define_insn_reservation "nds_gw_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_mmu" 1
+  (and (eq_attr "type" "mmu")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_alu" 1
+  (and (and (eq_attr "type" "alu")
+            (match_test "!nds32::movd44_insn_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_movd44" 1
+  (and (and (eq_attr "type" "alu")
+            (match_test "nds32::movd44_insn_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_alu_shift" 1
+  (and (eq_attr "type" "alu_shift")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex*2, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_pbsad" 1
+  (and (eq_attr "type" "pbsad")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex*3, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_pbsada" 1
+  (and (eq_attr "type" "pbsada")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex*3, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_load" 1
+  (and (and (eq_attr "type" "load")
+            (match_test "!nds32::post_update_insn_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_2w" 1
+  (and (and (eq_attr "type" "load")
+            (match_test "nds32::post_update_insn_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store" 1
+  (and (and (eq_attr "type" "store")
+            (match_test "!nds32::store_offset_reg_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_3r" 1
+  (and (and (eq_attr "type" "store")
+            (match_test "nds32::store_offset_reg_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_1" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_2" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "2"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_3" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_4" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_5" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_6" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_7" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_8" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_12" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_1" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_2" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "2"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_3" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_4" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_5" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_6" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_7" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_8" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_12" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_mul_fast1" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1")
+       (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mul_fast2" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2")
+       (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_0, gw_ex_p0*2, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mul_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mac_fast1" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1")
+       (and (eq_attr "type" "mac")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mac_fast2" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2")
+       (and (eq_attr "type" "mac")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_all, gw_ex_p0*2, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mac_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_div" 1
+  (and (and (eq_attr "type" "div")
+            (match_test "!nds32::divmod_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_div_2w" 1
+  (and (and (eq_attr "type" "div")
+            (match_test "nds32::divmod_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_alu" 1
+  (and (eq_attr "type" "dalu")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_dsp_alu64" 1
+  (and (eq_attr "type" "dalu64")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_alu_round" 1
+  (and (eq_attr "type" "daluround")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_cmp" 1
+  (and (eq_attr "type" "dcmp")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_clip" 1
+  (and (eq_attr "type" "dclip")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_mul" 1
+  (and (eq_attr "type" "dmul")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_mac" 1
+  (and (eq_attr "type" "dmac")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_insb" 1
+  (and (eq_attr "type" "dinsb")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_pack" 1
+  (and (eq_attr "type" "dpack")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_bpick" 1
+  (and (eq_attr "type" "dbpick")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_wext" 1
+  (and (eq_attr "type" "dwext")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_fpu_alu" 4
+  (and (eq_attr "type" "falu")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_muls" 4
+  (and (eq_attr "type" "fmuls")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_muld" 4
+  (and (eq_attr "type" "fmuld")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_macs" 4
+  (and (eq_attr "type" "fmacs")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*3, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_macd" 4
+  (and (eq_attr "type" "fmacd")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*4, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_divs" 4
+  (and (ior (eq_attr "type" "fdivs")
+	    (eq_attr "type" "fsqrts"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*14, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_divd" 4
+  (and (ior (eq_attr "type" "fdivd")
+	    (eq_attr "type" "fsqrtd"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*28, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fast_alu" 2
+  (and (ior (eq_attr "type" "fcmp")
+	    (ior (eq_attr "type" "fabs")
+		 (ior (eq_attr "type" "fcpy")
+		      (eq_attr "type" "fcmov"))))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fmtsr" 1
+  (and (eq_attr "type" "fmtsr")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fmtdr" 1
+  (and (eq_attr "type" "fmtdr")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fmfsr" 1
+  (and (eq_attr "type" "fmfsr")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fmfdr" 1
+  (and (eq_attr "type" "fmfdr")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_load" 3
+  (and (eq_attr "type" "fload")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_store" 1
+  (and (eq_attr "type" "fstore")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+;; FPU_ADDR_OUT -> FPU_ADDR_IN
+;; Main pipeline rules don't need this because those default latency is 1.
+(define_bypass 1
+  "nds_gw_fpu_load, nds_gw_fpu_store"
+  "nds_gw_fpu_load, nds_gw_fpu_store"
+  "nds32_gw_ex_to_ex_p"
+)
+
+;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU,
+;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
+(define_bypass 2
+  "nds_gw_load, nds_gw_load_2w,\
+   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
+   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
+   nds_gw_div, nds_gw_div_2w,\
+   nds_gw_dsp_alu64, nds_gw_dsp_mul, nds_gw_dsp_mac,\
+   nds_gw_dsp_alu_round, nds_gw_dsp_bpick, nds_gw_dsp_wext"
+  "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\
+   nds_gw_pbsad, nds_gw_pbsada,\
+   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
+   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
+   nds_gw_branch,\
+   nds_gw_div, nds_gw_div_2w,\
+   nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\
+   nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
+   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
+   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\
+   nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\
+   nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\
+   nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\
+   nds_gw_mmu,\
+   nds_gw_dsp_alu, nds_gw_dsp_alu_round,\
+   nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\
+   nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\
+   nds_gw_dsp_wext, nds_gw_dsp_bpick"
+  "nds32_gw_mm_to_ex_p"
+)
+
+;; LMW(N, N)
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
+;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
+(define_bypass 2
+  "nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
+   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
+   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12"
+  "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\
+   nds_gw_pbsad, nds_gw_pbsada,\
+   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
+   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
+   nds_gw_branch,\
+   nds_gw_div, nds_gw_div_2w,\
+   nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\
+   nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
+   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
+   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\
+   nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\
+   nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\
+   nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\
+   nds_gw_mmu,\
+   nds_gw_dsp_alu, nds_gw_dsp_alu_round,\
+   nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\
+   nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\
+   nds_gw_dsp_wext, nds_gw_dsp_bpick"
+  "nds32_gw_last_load_to_ex_p"
+)
diff --git a/gcc/config/nds32/nds32-intrinsic.c b/gcc/config/nds32/nds32-intrinsic.c
index fabf262..7547fb1 100644
--- a/gcc/config/nds32/nds32-intrinsic.c
+++ b/gcc/config/nds32/nds32-intrinsic.c
@@ -24,210 +24,1867 @@
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
-#include "target.h"
-#include "rtl.h"
 #include "tree.h"
-#include "optabs.h"		/* For GEN_FCN.  */
-#include "diagnostic-core.h"
+#include "rtl.h"
+#include "df.h"
+#include "alias.h"
 #include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
 #include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
 #include "langhooks.h"		/* For add_builtin_function().  */
+#include "builtins.h"
 
 /* ------------------------------------------------------------------------ */
 
-/* Function to expand builtin function for
-   '[(unspec_volatile [(reg)])]'.  */
+/* Read the requested argument from the EXP given by INDEX.
+   Return the value as an rtx.  */
+static rtx
+nds32_read_argument (tree exp, unsigned int index)
+{
+  return expand_normal (CALL_EXPR_ARG (exp, index));
+}
+
+/* Return a legitimate rtx for instruction ICODE's return value.  Use TARGET
+   if it's not null, has the right mode, and satisfies operand 0's
+   predicate.  */
+static rtx
+nds32_legitimize_target (enum insn_code icode, rtx target)
+{
+  enum machine_mode mode = insn_data[icode].operand[0].mode;
+
+  if (! target
+      || GET_MODE (target) != mode
+      || ! (*insn_data[icode].operand[0].predicate) (target, mode))
+    return gen_reg_rtx (mode);
+  else
+    return target;
+}
+
+/* Given that ARG is being passed as operand OPNUM to instruction ICODE,
+   check whether ARG satisfies the operand's constraints.  If it doesn't,
+   copy ARG to a temporary register and return that.  Otherwise return ARG
+   itself.  */
 static rtx
-nds32_expand_builtin_null_ftype_reg (enum insn_code icode,
-				     tree exp, rtx target)
+nds32_legitimize_argument (enum insn_code icode, int opnum, rtx arg)
+{
+  enum machine_mode mode = insn_data[icode].operand[opnum].mode;
+
+  if ((*insn_data[icode].operand[opnum].predicate) (arg, mode))
+    return arg;
+  else if (VECTOR_MODE_P (mode) && CONST_INT_P (arg))
+    {
+      /* Handle CONST_INT covert to CONST_VECTOR.  */
+      int nunits = GET_MODE_NUNITS (mode);
+      int i, shift = 0;
+      rtvec v = rtvec_alloc (nunits);
+      int val = INTVAL (arg);
+      enum machine_mode val_mode = (mode == V4QImode) ? QImode : HImode;
+      int shift_acc = (val_mode == QImode) ? 8 : 16;
+      int mask = (val_mode == QImode) ? 0xff : 0xffff;
+      int tmp_val = val;
+
+      if (TARGET_BIG_ENDIAN)
+	for (i = 0; i < nunits; i++)
+	  {
+	    tmp_val = (val >> shift) & mask;
+	    RTVEC_ELT (v, nunits - i - 1) = gen_int_mode (tmp_val, val_mode);
+	    shift += shift_acc;
+	  }
+      else
+	for (i = 0; i < nunits; i++)
+	  {
+	    tmp_val = (val >> shift) & mask;
+	    RTVEC_ELT (v, i) = gen_int_mode (tmp_val, val_mode);
+	    shift += shift_acc;
+	  }
+
+      return copy_to_mode_reg (mode, gen_rtx_CONST_VECTOR (mode, v));
+    }
+  else
+    {
+      rtx tmp_rtx = gen_reg_rtx (mode);
+      convert_move (tmp_rtx, arg, false);
+      return tmp_rtx;
+    }
+}
+
+/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
+   The instruction should require a constant operand of some sort.  The
+   function prints an error if OPVAL is not valid.  */
+static int
+nds32_check_constant_argument (enum insn_code icode, int opnum, rtx opval,
+			       const char *name)
 {
-  /* Mapping:
-       ops[0] <--> value0 <--> arg0 */
-  struct expand_operand ops[1];
-  tree arg0;
-  rtx value0;
+  if (GET_CODE (opval) != CONST_INT)
+    {
+      error ("invalid argument to built-in function %s", name);
+      return false;
+    }
+  if (! (*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
+    {
+      error ("constant argument out of range for %s", name);
+
+      return false;
+    }
+  return true;
+}
 
-  /* Grab the incoming arguments and extract its rtx.  */
-  arg0 = CALL_EXPR_ARG (exp, 0);
-  value0 = expand_normal (arg0);
+/* Expand builtins that return target.  */
+static rtx
+nds32_expand_noarg_builtin (enum insn_code icode, rtx target)
+{
+  rtx pat;
 
-  /* Create operands.  */
-  create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0)));
+  target = nds32_legitimize_target (icode, target);
 
-  /* Emit new instruction.  */
-  if (!maybe_expand_insn (icode, 1, ops))
-    error ("invalid argument to built-in function");
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target);
+  if (! pat)
+    return NULL_RTX;
 
+  emit_insn (pat);
   return target;
 }
 
-/* Function to expand builtin function for
-   '[(set (reg) (unspec_volatile [(imm)]))]'.  */
+/* Expand builtins that take one operand.  */
 static rtx
-nds32_expand_builtin_reg_ftype_imm (enum insn_code icode,
-				    tree exp, rtx target)
+nds32_expand_unop_builtin (enum insn_code icode, tree exp, rtx target,
+			   bool return_p)
 {
-  /* Mapping:
-       ops[0] <--> target <--> exp
-       ops[1] <--> value0 <--> arg0 */
-  struct expand_operand ops[2];
-  tree arg0;
-  rtx value0;
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  int op0_num = return_p ? 1 : 0;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
 
-  /* Grab the incoming arguments and extract its rtx.  */
-  arg0 = CALL_EXPR_ARG (exp, 0);
-  value0 = expand_normal (arg0);
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
 
-  /* Create operands.  */
-  create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (exp)));
-  create_input_operand (&ops[1], value0, TYPE_MODE (TREE_TYPE (arg0)));
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0);
+  else
+    pat = GEN_FCN (icode) (op0);
 
-  /* Emit new instruction.  */
-  if (!maybe_expand_insn (icode, 2, ops))
-    error ("invalid argument to built-in function");
+  if (! pat)
+    return NULL_RTX;
 
+  emit_insn (pat);
   return target;
 }
 
-/* Function to expand builtin function for
-   '[(unspec_volatile [(reg) (imm)])]' pattern.  */
+/* Expand builtins that take one operands and the first is immediate.  */
 static rtx
-nds32_expand_builtin_null_ftype_reg_imm (enum insn_code icode,
-					 tree exp, rtx target)
-{
-  /* Mapping:
-       ops[0] <--> value0 <--> arg0
-       ops[1] <--> value1 <--> arg1 */
-  struct expand_operand ops[2];
-  tree arg0, arg1;
-  rtx value0, value1;
-
-  /* Grab the incoming arguments and extract its rtx.  */
-  arg0 = CALL_EXPR_ARG (exp, 0);
-  arg1 = CALL_EXPR_ARG (exp, 1);
-  value0 = expand_normal (arg0);
-  value1 = expand_normal (arg1);
-
-  /* Create operands.  */
-  create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0)));
-  create_input_operand (&ops[1], value1, TYPE_MODE (TREE_TYPE (arg1)));
-
-  /* Emit new instruction.  */
-  if (!maybe_expand_insn (icode, 2, ops))
-    error ("invalid argument to built-in function");
+nds32_expand_unopimm_builtin (enum insn_code icode, tree exp, rtx target,
+			      bool return_p, const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  int op0_num = return_p ? 1 : 0;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  if (!nds32_check_constant_argument (icode, op0_num, op0, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
 
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0);
+  else
+    pat = GEN_FCN (icode) (op0);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
   return target;
 }
 
-/* ------------------------------------------------------------------------ */
+/* Expand builtins that take two operands.  */
+static rtx
+nds32_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
+			    bool return_p)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
 
-void
-nds32_init_builtins_impl (void)
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (op0, op1);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands and the second is immediate.  */
+static rtx
+nds32_expand_binopimm_builtin (enum insn_code icode, tree exp, rtx target,
+			       bool return_p, const char *name)
 {
-  tree pointer_type_node  = build_pointer_type (integer_type_node);
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
 
-  tree void_ftype_void    = build_function_type (void_type_node,
-						 void_list_node);
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
 
-  tree void_ftype_pint    = build_function_type_list (void_type_node,
-						      pointer_type_node,
-						      NULL_TREE);
+  if (!nds32_check_constant_argument (icode, op1_num, op1, name))
+    return NULL_RTX;
 
-  tree int_ftype_int      = build_function_type_list (integer_type_node,
-						      integer_type_node,
-						      NULL_TREE);
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
 
-  tree void_ftype_int_int = build_function_type_list (void_type_node,
-						      integer_type_node,
-						      integer_type_node,
-						      NULL_TREE);
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (op0, op1);
 
-  /* Cache.  */
-  add_builtin_function ("__builtin_nds32_isync",  void_ftype_pint,
-			NDS32_BUILTIN_ISYNC,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_isb",  void_ftype_void,
-			NDS32_BUILTIN_ISB,
-			BUILT_IN_MD, NULL, NULL_TREE);
+  if (! pat)
+    return NULL_RTX;
 
-  /* Register Transfer.  */
-  add_builtin_function ("__builtin_nds32_mfsr",  int_ftype_int,
-			NDS32_BUILTIN_MFSR,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_mfusr", int_ftype_int,
-			NDS32_BUILTIN_MFUSR,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_mtsr",  void_ftype_int_int,
-			NDS32_BUILTIN_MTSR,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_mtusr", void_ftype_int_int,
-			NDS32_BUILTIN_MTUSR,
-			BUILT_IN_MD, NULL, NULL_TREE);
+  emit_insn (pat);
+  return target;
+}
 
-  /* Interrupt.  */
-  add_builtin_function ("__builtin_nds32_setgie_en",  void_ftype_void,
-			NDS32_BUILTIN_SETGIE_EN,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_setgie_dis", void_ftype_void,
-			NDS32_BUILTIN_SETGIE_DIS,
-			BUILT_IN_MD, NULL, NULL_TREE);
+/* Expand builtins that take three operands.  */
+static rtx
+nds32_expand_triop_builtin (enum insn_code icode, tree exp, rtx target,
+			    bool return_p)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  rtx op2 = nds32_read_argument (exp, 2);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
+  int op2_num = return_p ? 3 : 2;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+  op2 = nds32_legitimize_argument (icode, op2_num, op2);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+  else
+    pat = GEN_FCN (icode) (op0, op1, op2);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take three operands and the third is immediate.  */
+static rtx
+nds32_expand_triopimm_builtin (enum insn_code icode, tree exp, rtx target,
+			       bool return_p, const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  rtx op2 = nds32_read_argument (exp, 2);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
+  int op2_num = return_p ? 3 : 2;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  if (!nds32_check_constant_argument (icode, op2_num, op2, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+  op2 = nds32_legitimize_argument (icode, op2_num, op2);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+  else
+    pat = GEN_FCN (icode) (op0, op1, op2);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins for load.  */
+static rtx
+nds32_expand_builtin_load (enum insn_code icode, tree exp, rtx target)
+{
+  /* Load address format is [$ra + $rb],
+     but input arguments not enough,
+     so we need another temp register as $rb.
+     Generating assembly code:
+       movi $temp, 0
+       llw  $rt, [$ra + $temp] */
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
+
+  target = nds32_legitimize_target (icode, target);
+  op0 = nds32_legitimize_argument (icode, 1, op0);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target, op0, addr_helper);
+  if (!pat)
+    return NULL_RTX;
+
+  emit_move_insn (addr_helper, GEN_INT (0));
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins for store.  */
+static rtx
+nds32_expand_builtin_store (enum insn_code icode, tree exp, rtx target)
+{
+  /* Store address format is [$ra + $rb],
+     but input arguments not enough,
+     so we need another temp register as $rb.
+     Generating assembly code:
+       movi $temp, 0
+       store  $rt, [$ra + $temp] */
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
+
+  op0 = nds32_legitimize_argument (icode, 0, op0);
+  op1 = nds32_legitimize_argument (icode, 2, op1);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (op0, addr_helper, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_move_insn (addr_helper, GEN_INT (0));
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand cctl builtins.  */
+static rtx
+nds32_expand_cctl_builtin (enum insn_code icode, tree exp, rtx target,
+			   bool return_p, const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  if (!nds32_check_constant_argument (icode, op0_num, op0, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+
+  /* Emit and return the new instruction. */
+  if (icode == CODE_FOR_cctl_idx_write)
+    {
+      /* cctl_idx_write is three argument,
+	 so create operand2 for cctl_idx_write pattern.  */
+      rtx op2 = nds32_read_argument (exp, 2);
+      op2 = nds32_legitimize_argument (icode, 2, op2);
+      pat = GEN_FCN (icode) (op0, op1, op2);
+    }
+  else if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (op0, op1);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand scw builtins.  */
+static rtx
+nds32_expand_scw_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  /* SCW address format is [$ra + $rb], but input arguments not enough,
+     so we need another temp register as $rb.
+     Generating assembly code:
+	movi $temp, 0
+	scw  $rt, [$ra + $temp] */
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
+
+  target = nds32_legitimize_target (icode, target);
+  op0 = nds32_legitimize_argument (icode, 1, op0);
+  op1 = nds32_legitimize_argument (icode, 2, op1);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target, op0, addr_helper, target);
+
+  if (!pat)
+    return NULL_RTX;
+
+  emit_move_insn (addr_helper, GEN_INT (0));
+  emit_move_insn (target, op1);
+  emit_insn (pat);
+  return target;
 }
 
+/* Expand set int priority builtins. */
+static rtx
+nds32_expand_priority_builtin (enum insn_code icode, tree exp, rtx target,
+			       const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+
+  /* set_int_priority intrinsic function that two arguments are immediate,
+     so check whether auguments are immedite.  */
+
+  if (!nds32_check_constant_argument (icode, 0, op0, name))
+    return NULL_RTX;
+
+  if (!nds32_check_constant_argument (icode, 1, op1, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, 0, op0);
+  op1 = nds32_legitimize_argument (icode, 1, op1);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (op0, op1);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+struct builtin_description
+{
+  const enum insn_code icode;
+  const char *name;
+  enum nds32_builtins code;
+  bool return_p;
+};
+
+#define NDS32_BUILTIN(code, string, builtin) \
+  { CODE_FOR_##code, "__nds32__" string, \
+    NDS32_BUILTIN_##builtin, true },
+
+#define NDS32_NO_TARGET_BUILTIN(code, string, builtin) \
+  { CODE_FOR_##code, "__nds32__" string, \
+    NDS32_BUILTIN_##builtin, false },
+
+/* Intrinsics that no argument, and that return value.  */
+static struct builtin_description bdesc_noarg[] =
+{
+  NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG)
+  NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR)
+  NDS32_BUILTIN(unspec_volatile_rdov, "rdov", RDOV)
+  NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP)
+  NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS)
+  NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int",
+		GET_ALL_PENDING_INT)
+  NDS32_BUILTIN(unspec_unaligned_feature, "unaligned_feature",
+		UNALIGNED_FEATURE)
+  NDS32_NO_TARGET_BUILTIN(unspec_enable_unaligned, "enable_unaligned",
+			  ENABLE_UNALIGNED)
+  NDS32_NO_TARGET_BUILTIN(unspec_disable_unaligned, "disable_unaligned",
+			  DISABLE_UNALIGNED)
+};
+
+/* Intrinsics that take just one argument.  */
+static struct builtin_description bdesc_1arg[] =
+{
+  NDS32_BUILTIN(unspec_ssabssi2, "abs", ABS)
+  NDS32_BUILTIN(clzsi2, "clz", CLZ)
+  NDS32_BUILTIN(unspec_clo, "clo", CLO)
+  NDS32_BUILTIN(unspec_wsbh, "wsbh", WSBH)
+  NDS32_BUILTIN(unspec_tlbop_pb, "tlbop_pb",TLBOP_PB)
+  NDS32_BUILTIN(unaligned_load_hw, "unaligned_load_hw", UALOAD_HW)
+  NDS32_BUILTIN(unaligned_loadsi, "unaligned_load_w", UALOAD_W)
+  NDS32_BUILTIN(unaligned_loaddi, "unaligned_load_dw", UALOAD_DW)
+  NDS32_NO_TARGET_BUILTIN(unspec_volatile_isync, "isync", ISYNC)
+  NDS32_NO_TARGET_BUILTIN(unspec_fmtcsr, "fmtcsr", FMTCSR)
+  NDS32_NO_TARGET_BUILTIN(unspec_jr_itoff, "jr_itoff", JR_ITOFF)
+  NDS32_NO_TARGET_BUILTIN(unspec_jr_toff, "jr_toff", JR_TOFF)
+  NDS32_NO_TARGET_BUILTIN(unspec_jral_ton, "jral_ton", JRAL_TON)
+  NDS32_NO_TARGET_BUILTIN(unspec_ret_toff, "ret_toff", RET_TOFF)
+  NDS32_NO_TARGET_BUILTIN(unspec_jral_iton, "jral_iton",JRAL_ITON)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_trd, "tlbop_trd", TLBOP_TRD)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_twr, "tlbop_twr", TLBOP_TWR)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwr, "tlbop_rwr", TLBOP_RWR)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwlk, "tlbop_rwlk", TLBOP_RWLK)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_unlk, "tlbop_unlk", TLBOP_UNLK)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_inv, "tlbop_inv", TLBOP_INV)
+  NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF)
+  NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp,
+			  "set_current_sp", SET_CURRENT_SP)
+  NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16)
+  NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16)
+  NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8)
+  NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8)
+  NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810)
+  NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810)
+  NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820)
+  NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820)
+  NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830)
+  NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830)
+  NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831)
+  NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831)
+  NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810)
+  NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810)
+  NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820)
+  NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820)
+  NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830)
+  NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830)
+  NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831)
+  NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831)
+  NDS32_BUILTIN(unspec_kabs, "kabs", KABS)
+  NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_u16x2", UALOAD_U16)
+  NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_s16x2", UALOAD_S16)
+  NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_u8x4", UALOAD_U8)
+  NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_s8x4", UALOAD_S8)
+};
+
+/* Intrinsics that take just one argument. and the argument is immediate.  */
+static struct builtin_description bdesc_1argimm[] =
+{
+  NDS32_BUILTIN(unspec_volatile_mfsr, "mfsr", MFSR)
+  NDS32_BUILTIN(unspec_volatile_mfusr, "mfsr", MFUSR)
+  NDS32_BUILTIN(unspec_get_pending_int, "get_pending_int", GET_PENDING_INT)
+  NDS32_BUILTIN(unspec_get_int_priority, "get_int_priority", GET_INT_PRIORITY)
+  NDS32_NO_TARGET_BUILTIN(unspec_trap, "trap", TRAP)
+  NDS32_NO_TARGET_BUILTIN(unspec_break, "break", BREAK)
+  NDS32_NO_TARGET_BUILTIN(unspec_syscall, "syscall", SYSCALL)
+  NDS32_NO_TARGET_BUILTIN(unspec_enable_int, "enable_int", ENABLE_INT)
+  NDS32_NO_TARGET_BUILTIN(unspec_disable_int, "disable_int", DISABLE_INT)
+  NDS32_NO_TARGET_BUILTIN(unspec_clr_pending_hwint, "clr_pending_hwint",
+			  CLR_PENDING_HWINT)
+  NDS32_NO_TARGET_BUILTIN(unspec_set_trig_level, "set_trig_level",
+			  SET_TRIG_LEVEL)
+  NDS32_NO_TARGET_BUILTIN(unspec_set_trig_edge, "set_trig_edge",
+			  SET_TRIG_EDGE)
+  NDS32_BUILTIN(unspec_get_trig_type, "get_trig_type", GET_TRIG_TYPE)
+};
+
+/* Intrinsics that take two arguments.  */
+static struct builtin_description bdesc_2arg[] =
+{
+  NDS32_BUILTIN(unspec_fcpynss, "fcpynss", FCPYNSS)
+  NDS32_BUILTIN(unspec_fcpyss, "fcpyss", FCPYSS)
+  NDS32_BUILTIN(unspec_fcpynsd, "fcpynsd", FCPYNSD)
+  NDS32_BUILTIN(unspec_fcpysd, "fcpysd", FCPYSD)
+  NDS32_BUILTIN(unspec_ave, "ave", AVE)
+  NDS32_BUILTIN(unspec_pbsad, "pbsad", PBSAD)
+  NDS32_BUILTIN(unspec_ffb, "ffb", FFB)
+  NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM)
+  NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM)
+  NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW)
+  NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH)
+  NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW)
+  NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH)
+  NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB)
+  NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB)
+  NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT)
+  NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT)
+  NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB)
+  NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB)
+  NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT)
+  NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT)
+  NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB)
+  NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB)
+  NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT)
+  NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT)
+  NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB)
+  NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB)
+  NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT)
+  NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT)
+  NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW)
+  NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U)
+  NDS32_BUILTIN(rotrsi3, "rotr", ROTR)
+  NDS32_BUILTIN(unspec_sva, "sva", SVA)
+  NDS32_BUILTIN(unspec_svs, "svs", SVS)
+  NDS32_NO_TARGET_BUILTIN(mtsr_isb, "mtsr_isb", MTSR_ISB)
+  NDS32_NO_TARGET_BUILTIN(mtsr_dsb, "mtsr_dsb", MTSR_DSB)
+  NDS32_NO_TARGET_BUILTIN(unspec_volatile_mtsr, "mtsr", MTSR)
+  NDS32_NO_TARGET_BUILTIN(unspec_volatile_mtusr, "mtusr", MTUSR)
+  NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW)
+  NDS32_BUILTIN(addv2hi3, "add16", ADD16)
+  NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16)
+  NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16)
+  NDS32_BUILTIN(raddv2hi3, "radd16", RADD16)
+  NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16)
+  NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16)
+  NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16)
+  NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16)
+  NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16)
+  NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16)
+  NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16)
+  NDS32_BUILTIN(subv2hi3, "sub16", SUB16)
+  NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16)
+  NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16)
+  NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16)
+  NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16)
+  NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16)
+  NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16)
+  NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16)
+  NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16)
+  NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16)
+  NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16)
+  NDS32_BUILTIN(cras16_1, "cras16", CRAS16)
+  NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16)
+  NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16)
+  NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16)
+  NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16)
+  NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16)
+  NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16)
+  NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16)
+  NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16)
+  NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16)
+  NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16)
+  NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16)
+  NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16)
+  NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16)
+  NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16)
+  NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16)
+  NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16)
+  NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16)
+  NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16)
+  NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16)
+  NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16)
+  NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16)
+  NDS32_BUILTIN(addv4qi3, "add8", ADD8)
+  NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8)
+  NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8)
+  NDS32_BUILTIN(raddv4qi3, "radd8", RADD8)
+  NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8)
+  NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8)
+  NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8)
+  NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8)
+  NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8)
+  NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8)
+  NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8)
+  NDS32_BUILTIN(subv4qi3, "sub8", SUB8)
+  NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8)
+  NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8)
+  NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8)
+  NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8)
+  NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8)
+  NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8)
+  NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8)
+  NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8)
+  NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8)
+  NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8)
+  NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16)
+  NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16)
+  NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U)
+  NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U)
+  NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16)
+  NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16)
+  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
+  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
+  NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16)
+  NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16)
+  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
+  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
+  NDS32_BUILTIN(kslra16, "kslra16", KSLRA16)
+  NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16)
+  NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U)
+  NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U)
+  NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16)
+  NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16)
+  NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16)
+  NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16)
+  NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16)
+  NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16)
+  NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16)
+  NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16)
+  NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16)
+  NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16)
+  NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16)
+  NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8)
+  NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8)
+  NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8)
+  NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8)
+  NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8)
+  NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8)
+  NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8)
+  NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8)
+  NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8)
+  NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8)
+  NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8)
+  NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16)
+  NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16)
+  NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16)
+  NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16)
+  NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16)
+  NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16)
+  NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16)
+  NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16)
+  NDS32_BUILTIN(khm16, "khm16", KHM16)
+  NDS32_BUILTIN(khm16, "v_khm16", V_KHM16)
+  NDS32_BUILTIN(khmx16, "khmx16", KHMX16)
+  NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16)
+  NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8)
+  NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8)
+  NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8)
+  NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8)
+  NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8)
+  NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8)
+  NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8)
+  NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8)
+  NDS32_BUILTIN(raddsi3, "raddw", RADDW)
+  NDS32_BUILTIN(uraddsi3, "uraddw", URADDW)
+  NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW)
+  NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW)
+  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
+  NDS32_BUILTIN(kssl, "ksll", KSLL)
+  NDS32_BUILTIN(pkbb, "pkbb16", PKBB16)
+  NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16)
+  NDS32_BUILTIN(pkbt, "pkbt16", PKBT16)
+  NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16)
+  NDS32_BUILTIN(pktb, "pktb16", PKTB16)
+  NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16)
+  NDS32_BUILTIN(pktt, "pktt16", PKTT16)
+  NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16)
+  NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL)
+  NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U)
+  NDS32_BUILTIN(smmwb, "smmwb", SMMWB)
+  NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB)
+  NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U)
+  NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U)
+  NDS32_BUILTIN(smmwt, "smmwt", SMMWT)
+  NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT)
+  NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U)
+  NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U)
+  NDS32_BUILTIN(smbb, "smbb", SMBB)
+  NDS32_BUILTIN(smbb, "v_smbb", V_SMBB)
+  NDS32_BUILTIN(smbt, "smbt", SMBT)
+  NDS32_BUILTIN(smbt, "v_smbt", V_SMBT)
+  NDS32_BUILTIN(smtt, "smtt", SMTT)
+  NDS32_BUILTIN(smtt, "v_smtt", V_SMTT)
+  NDS32_BUILTIN(kmda, "kmda", KMDA)
+  NDS32_BUILTIN(kmda, "v_kmda", V_KMDA)
+  NDS32_BUILTIN(kmxda, "kmxda", KMXDA)
+  NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA)
+  NDS32_BUILTIN(smds, "smds", SMDS)
+  NDS32_BUILTIN(smds, "v_smds", V_SMDS)
+  NDS32_BUILTIN(smdrs, "smdrs", SMDRS)
+  NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS)
+  NDS32_BUILTIN(smxdsv, "smxds", SMXDS)
+  NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS)
+  NDS32_BUILTIN(smal1, "smal", SMAL)
+  NDS32_BUILTIN(smal1, "v_smal", V_SMAL)
+  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
+  NDS32_BUILTIN(wext, "wext", WEXT)
+  NDS32_BUILTIN(adddi3, "sadd64", SADD64)
+  NDS32_BUILTIN(adddi3, "uadd64", UADD64)
+  NDS32_BUILTIN(radddi3, "radd64", RADD64)
+  NDS32_BUILTIN(uradddi3, "uradd64", URADD64)
+  NDS32_BUILTIN(kadddi3, "kadd64", KADD64)
+  NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64)
+  NDS32_BUILTIN(subdi3, "ssub64", SSUB64)
+  NDS32_BUILTIN(subdi3, "usub64", USUB64)
+  NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64)
+  NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64)
+  NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64)
+  NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64)
+  NDS32_BUILTIN(smul16, "smul16", SMUL16)
+  NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16)
+  NDS32_BUILTIN(smulx16, "smulx16", SMULX16)
+  NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16)
+  NDS32_BUILTIN(umul16, "umul16", UMUL16)
+  NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16)
+  NDS32_BUILTIN(umulx16, "umulx16", UMULX16)
+  NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16)
+  NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL)
+  NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi,
+			  "put_unaligned_u16x2", UASTORE_U16)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi,
+			  "put_unaligned_s16x2", UASTORE_S16)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_u8x4", UASTORE_U8)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_s8x4", UASTORE_S8)
+};
+
+/* Two-argument intrinsics with an immediate second argument.  */
+static struct builtin_description bdesc_2argimm[] =
+{
+  NDS32_BUILTIN(unspec_bclr, "bclr", BCLR)
+  NDS32_BUILTIN(unspec_bset, "bset", BSET)
+  NDS32_BUILTIN(unspec_btgl, "btgl", BTGL)
+  NDS32_BUILTIN(unspec_btst, "btst", BTST)
+  NDS32_BUILTIN(unspec_clip, "clip", CLIP)
+  NDS32_BUILTIN(unspec_clips, "clips", CLIPS)
+  NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ)
+  NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ)
+  NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16)
+  NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16)
+  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
+  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
+  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
+  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
+  NDS32_BUILTIN(sclip16, "sclip16", SCLIP16)
+  NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16)
+  NDS32_BUILTIN(uclip16, "uclip16", UCLIP16)
+  NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16)
+  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
+  NDS32_BUILTIN(kssl, "ksll", KSLL)
+  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
+  NDS32_BUILTIN(wext, "wext", WEXT)
+  NDS32_BUILTIN(uclip32, "uclip32", UCLIP32)
+  NDS32_BUILTIN(sclip32, "sclip32", SCLIP32)
+};
+
+/* Intrinsics that take three arguments.  */
+static struct builtin_description bdesc_3arg[] =
+{
+  NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA)
+  NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE)
+  NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP)
+  NDS32_BUILTIN(kmabb, "kmabb", KMABB)
+  NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB)
+  NDS32_BUILTIN(kmabt, "kmabt", KMABT)
+  NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT)
+  NDS32_BUILTIN(kmatt, "kmatt", KMATT)
+  NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT)
+  NDS32_BUILTIN(kmada, "kmada", KMADA)
+  NDS32_BUILTIN(kmada, "v_kmada", V_KMADA)
+  NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA)
+  NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA)
+  NDS32_BUILTIN(kmads, "kmads", KMADS)
+  NDS32_BUILTIN(kmads, "v_kmads", V_KMADS)
+  NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS)
+  NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS)
+  NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS)
+  NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS)
+  NDS32_BUILTIN(kmsda, "kmsda", KMSDA)
+  NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA)
+  NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA)
+  NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA)
+  NDS32_BUILTIN(bpick1, "bpick", BPICK)
+  NDS32_BUILTIN(smar64_1, "smar64", SMAR64)
+  NDS32_BUILTIN(smsr64, "smsr64", SMSR64)
+  NDS32_BUILTIN(umar64_1, "umar64", UMAR64)
+  NDS32_BUILTIN(umsr64, "umsr64", UMSR64)
+  NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64)
+  NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64)
+  NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64)
+  NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64)
+  NDS32_BUILTIN(smalbb, "smalbb", SMALBB)
+  NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB)
+  NDS32_BUILTIN(smalbt, "smalbt", SMALBT)
+  NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT)
+  NDS32_BUILTIN(smaltt, "smaltt", SMALTT)
+  NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT)
+  NDS32_BUILTIN(smalda1, "smalda", SMALDA)
+  NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA)
+  NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA)
+  NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA)
+  NDS32_BUILTIN(smalds1, "smalds", SMALDS)
+  NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS)
+  NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS)
+  NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS)
+  NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS)
+  NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS)
+  NDS32_BUILTIN(smslda1, "smslda", SMSLDA)
+  NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA)
+  NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA)
+  NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA)
+  NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB)
+  NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB)
+  NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U)
+  NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U)
+  NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT)
+  NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT)
+  NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U)
+  NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U)
+  NDS32_BUILTIN(kmmac, "kmmac", KMMAC)
+  NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U)
+  NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB)
+  NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U)
+};
+
+/* Three-argument intrinsics with an immediate third argument.  */
+static struct builtin_description bdesc_3argimm[] =
+{
+  NDS32_NO_TARGET_BUILTIN(prefetch_qw, "prefetch_qw", DPREF_QW)
+  NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW)
+  NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W)
+  NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW)
+  NDS32_BUILTIN(insb, "insb", INSB)
+};
+
+/* Intrinsics that load a value.  */
+static struct builtin_description bdesc_load[] =
+{
+  NDS32_BUILTIN(unspec_volatile_llw, "llw", LLW)
+  NDS32_BUILTIN(unspec_lwup, "lwup", LWUP)
+  NDS32_BUILTIN(unspec_lbup, "lbup", LBUP)
+};
+
+/* Intrinsics that store a value.  */
+static struct builtin_description bdesc_store[] =
+{
+  NDS32_BUILTIN(unspec_swup, "swup", SWUP)
+  NDS32_BUILTIN(unspec_sbup, "sbup", SBUP)
+};
+
+static struct builtin_description bdesc_cctl[] =
+{
+  NDS32_BUILTIN(cctl_idx_read, "cctl_idx_read", CCTL_IDX_READ)
+  NDS32_NO_TARGET_BUILTIN(cctl_idx_write, "cctl_idx_write", CCTL_IDX_WRITE)
+  NDS32_NO_TARGET_BUILTIN(cctl_va_lck, "cctl_va_lck", CCTL_VA_LCK)
+  NDS32_NO_TARGET_BUILTIN(cctl_idx_wbinval,
+			  "cctl_idx_wbinval", CCTL_IDX_WBINVAL)
+  NDS32_NO_TARGET_BUILTIN(cctl_va_wbinval_l1,
+			  "cctl_va_wbinval_l1", CCTL_VA_WBINVAL_L1)
+  NDS32_NO_TARGET_BUILTIN(cctl_va_wbinval_la,
+			  "cctl_va_wbinval_la", CCTL_VA_WBINVAL_LA)
+};
 
 rtx
 nds32_expand_builtin_impl (tree exp,
 			   rtx target,
 			   rtx subtarget ATTRIBUTE_UNUSED,
-			   machine_mode mode ATTRIBUTE_UNUSED,
+			   enum machine_mode mode ATTRIBUTE_UNUSED,
 			   int ignore ATTRIBUTE_UNUSED)
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  unsigned i;
+  struct builtin_description *d;
+
+  if (!NDS32_EXT_DSP_P ()
+      && fcode > NDS32_BUILTIN_DSP_BEGIN
+      && fcode < NDS32_BUILTIN_DSP_END)
+    error ("don't support DSP extension instructions");
+
+  switch (fcode)
+    {
+    /* FPU Register Transfer.  */
+    case NDS32_BUILTIN_FMFCFG:
+    case NDS32_BUILTIN_FMFCSR:
+    case NDS32_BUILTIN_FMTCSR:
+    case NDS32_BUILTIN_FCPYNSS:
+    case NDS32_BUILTIN_FCPYSS:
+      /* Both v3s and v3f toolchains define TARGET_FPU_SINGLE.  */
+      if (!TARGET_FPU_SINGLE)
+	{
+	  error ("this builtin function is only available "
+		 "on the v3s or v3f toolchain");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* FPU Register Transfer.  */
+    case NDS32_BUILTIN_FCPYNSD:
+    case NDS32_BUILTIN_FCPYSD:
+      /* Only v3f toolchain defines TARGET_FPU_DOUBLE.  */
+      if (!TARGET_FPU_DOUBLE)
+	{
+	  error ("this builtin function is only available "
+		 "on the v3f toolchain");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* Load and Store  */
+    case NDS32_BUILTIN_LLW:
+    case NDS32_BUILTIN_LWUP:
+    case NDS32_BUILTIN_LBUP:
+    case NDS32_BUILTIN_SCW:
+    case NDS32_BUILTIN_SWUP:
+    case NDS32_BUILTIN_SBUP:
+      if (TARGET_ISA_V3M)
+	{
+	  error ("this builtin function not support "
+		 "on the v3m toolchain");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* Performance Extension  */
+    case NDS32_BUILTIN_ABS:
+    case NDS32_BUILTIN_AVE:
+    case NDS32_BUILTIN_BCLR:
+    case NDS32_BUILTIN_BSET:
+    case NDS32_BUILTIN_BTGL:
+    case NDS32_BUILTIN_BTST:
+    case NDS32_BUILTIN_CLIP:
+    case NDS32_BUILTIN_CLIPS:
+    case NDS32_BUILTIN_CLZ:
+    case NDS32_BUILTIN_CLO:
+      if (!TARGET_EXT_PERF)
+	{
+	  error ("don't support performance extension instructions");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* Performance Extension 2  */
+    case NDS32_BUILTIN_PBSAD:
+    case NDS32_BUILTIN_PBSADA:
+    case NDS32_BUILTIN_BSE:
+    case NDS32_BUILTIN_BSP:
+      if (!TARGET_EXT_PERF2)
+	{
+	  error ("don't support performance extension "
+		 "version 2 instructions");
+	  return NULL_RTX;
+	}
+      break;
 
-  int fcode = DECL_FUNCTION_CODE (fndecl);
+    /* String Extension  */
+    case NDS32_BUILTIN_FFB:
+    case NDS32_BUILTIN_FFMISM:
+    case NDS32_BUILTIN_FLMISM:
+      if (!TARGET_EXT_STRING)
+	{
+	  error ("don't support string extension instructions");
+	  return NULL_RTX;
+	}
+      break;
 
+    default:
+      break;
+    }
+
+  /* Since there are no result and operands, we can simply emit this rtx.  */
   switch (fcode)
     {
-    /* Cache.  */
-    case NDS32_BUILTIN_ISYNC:
-      return nds32_expand_builtin_null_ftype_reg
-	     (CODE_FOR_unspec_volatile_isync, exp, target);
     case NDS32_BUILTIN_ISB:
-      /* Since there are no result and operands for isb instruciton,
-         we can simply emit this rtx.  */
       emit_insn (gen_unspec_volatile_isb ());
       return target;
-
-    /* Register Transfer.  */
-    case NDS32_BUILTIN_MFSR:
-      return nds32_expand_builtin_reg_ftype_imm
-	     (CODE_FOR_unspec_volatile_mfsr, exp, target);
-    case NDS32_BUILTIN_MFUSR:
-      return nds32_expand_builtin_reg_ftype_imm
-	     (CODE_FOR_unspec_volatile_mfusr, exp, target);
-    case NDS32_BUILTIN_MTSR:
-      return nds32_expand_builtin_null_ftype_reg_imm
-	     (CODE_FOR_unspec_volatile_mtsr, exp, target);
-    case NDS32_BUILTIN_MTUSR:
-      return nds32_expand_builtin_null_ftype_reg_imm
-	     (CODE_FOR_unspec_volatile_mtusr, exp, target);
-
-    /* Interrupt.  */
+    case NDS32_BUILTIN_DSB:
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_MSYNC_ALL:
+      emit_insn (gen_unspec_msync_all ());
+      return target;
+    case NDS32_BUILTIN_MSYNC_STORE:
+      emit_insn (gen_unspec_msync_store ());
+      return target;
     case NDS32_BUILTIN_SETGIE_EN:
-      /* Since there are no result and operands for setgie.e instruciton,
-         we can simply emit this rtx.  */
       emit_insn (gen_unspec_volatile_setgie_en ());
+      emit_insn (gen_unspec_dsb ());
       return target;
     case NDS32_BUILTIN_SETGIE_DIS:
-      /* Since there are no result and operands for setgie.d instruciton,
-         we can simply emit this rtx.  */
       emit_insn (gen_unspec_volatile_setgie_dis ());
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_GIE_DIS:
+      emit_insn (gen_unspec_volatile_setgie_dis ());
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_GIE_EN:
+      emit_insn (gen_unspec_volatile_setgie_en ());
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_SET_PENDING_SWINT:
+      emit_insn (gen_unspec_set_pending_swint ());
+      return target;
+    case NDS32_BUILTIN_CLR_PENDING_SWINT:
+      emit_insn (gen_unspec_clr_pending_swint ());
+      return target;
+    case NDS32_BUILTIN_CCTL_L1D_INVALALL:
+      emit_insn (gen_cctl_l1d_invalall());
+      return target;
+    case NDS32_BUILTIN_CCTL_L1D_WBALL_ALVL:
+      emit_insn (gen_cctl_l1d_wball_alvl());
+      return target;
+    case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL:
+      emit_insn (gen_cctl_l1d_wball_one_lvl());
+      return target;
+    case NDS32_BUILTIN_CLROV:
+      emit_insn (gen_unspec_volatile_clrov ());
+      return target;
+    case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT:
+      emit_insn (gen_unspec_standby_no_wake_grant ());
+      return target;
+    case NDS32_BUILTIN_STANDBY_WAKE_GRANT:
+      emit_insn (gen_unspec_standby_wake_grant ());
+      return target;
+    case NDS32_BUILTIN_STANDBY_WAKE_DONE:
+      emit_insn (gen_unspec_standby_wait_done ());
+      return target;
+    case NDS32_BUILTIN_SETEND_BIG:
+      emit_insn (gen_unspec_setend_big ());
+      return target;
+    case NDS32_BUILTIN_SETEND_LITTLE:
+      emit_insn (gen_unspec_setend_little ());
+      return target;
+    case NDS32_BUILTIN_NOP:
+      emit_insn (gen_unspec_nop ());
+      return target;
+    case NDS32_BUILTIN_SCHE_BARRIER:
+      emit_insn (gen_blockage ());
+      return target;
+    case NDS32_BUILTIN_TLBOP_FLUA:
+      emit_insn (gen_unspec_tlbop_flua ());
+      return target;
+    /* Instruction sequence protection  */
+    case NDS32_BUILTIN_SIGNATURE_BEGIN:
+      emit_insn (gen_unspec_signature_begin ());
+      return target;
+    case NDS32_BUILTIN_SIGNATURE_END:
+      emit_insn (gen_unspec_signature_end ());
+      return target;
+    case NDS32_BUILTIN_SCW:
+      return nds32_expand_scw_builtin (CODE_FOR_unspec_volatile_scw,
+				       exp, target);
+    case NDS32_BUILTIN_SET_INT_PRIORITY:
+      return nds32_expand_priority_builtin (CODE_FOR_unspec_set_int_priority,
+					    exp, target,
+					    "__nds32__set_int_priority");
+    case NDS32_BUILTIN_NO_HWLOOP:
+      emit_insn (gen_no_hwloop ());
       return target;
-
     default:
-      gcc_unreachable ();
+      break;
     }
 
+  /* Expand groups of builtins.  */
+  for (i = 0, d = bdesc_noarg; i < ARRAY_SIZE (bdesc_noarg); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_noarg_builtin (d->icode, target);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_unop_builtin (d->icode, exp, target, d->return_p);
+
+  for (i = 0, d = bdesc_1argimm; i < ARRAY_SIZE (bdesc_1argimm); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_unopimm_builtin (d->icode, exp, target,
+					   d->return_p, d->name);
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_binop_builtin (d->icode, exp, target, d->return_p);
+
+  for (i = 0, d = bdesc_2argimm; i < ARRAY_SIZE (bdesc_2argimm); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_binopimm_builtin (d->icode, exp, target,
+					    d->return_p, d->name);
+
+  for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_triop_builtin (d->icode, exp, target, d->return_p);
+
+  for (i = 0, d = bdesc_3argimm; i < ARRAY_SIZE (bdesc_3argimm); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_triopimm_builtin (d->icode, exp, target,
+					    d->return_p, d->name);
+
+  for (i = 0, d = bdesc_load; i < ARRAY_SIZE (bdesc_load); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_builtin_load (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_store; i < ARRAY_SIZE (bdesc_store); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_builtin_store (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_cctl; i < ARRAY_SIZE (bdesc_cctl); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_cctl_builtin (d->icode, exp, target,
+					d->return_p, d->name);
+
   return NULL_RTX;
 }
 
+static GTY(()) tree nds32_builtin_decls[NDS32_BUILTIN_COUNT];
+
+/* Return the NDS32 builtin for CODE.  */
+tree
+nds32_builtin_decl_impl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= NDS32_BUILTIN_COUNT)
+    return error_mark_node;
+
+  return nds32_builtin_decls[code];
+}
+
+void
+nds32_init_builtins_impl (void)
+{
+#define ADD_NDS32_BUILTIN0(NAME, RET_TYPE, CODE)		\
+  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =			\
+  add_builtin_function ("__builtin_nds32_" NAME,		\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  NULL_TREE),		\
+			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_NDS32_BUILTIN1(NAME, RET_TYPE, ARG_TYPE, CODE)	\
+  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =			\
+  add_builtin_function ("__builtin_nds32_" NAME,		\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE##_type_node, \
+						  NULL_TREE),		\
+			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_NDS32_BUILTIN2(NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2, CODE)	\
+  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =				\
+  add_builtin_function ("__builtin_nds32_" NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  NULL_TREE),		\
+			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_NDS32_BUILTIN3(NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2, ARG_TYPE3, CODE) \
+  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =				\
+  add_builtin_function ("__builtin_nds32_" NAME,			\
+			build_function_type_list (RET_TYPE##_type_node,	\
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  ARG_TYPE3##_type_node,\
+						  NULL_TREE),		\
+			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
+
+  /* Looking for return type and argument can be found in tree.h file.  */
+  tree ptr_char_type_node = build_pointer_type (char_type_node);
+  tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node);
+  tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node);
+  tree ptr_short_type_node = build_pointer_type (short_integer_type_node);
+  tree ptr_uint_type_node = build_pointer_type (unsigned_type_node);
+  tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node);
+  tree v4qi_type_node = build_vector_type (intQI_type_node, 4);
+  tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4);
+  tree v2hi_type_node = build_vector_type (intHI_type_node, 2);
+  tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2);
+  tree v2si_type_node = build_vector_type (intSI_type_node, 2);
+  tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2);
+
+  /* Cache.  */
+  ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC);
+  ADD_NDS32_BUILTIN0 ("isb", void, ISB);
+  ADD_NDS32_BUILTIN0 ("dsb", void, DSB);
+  ADD_NDS32_BUILTIN0 ("msync_all", void, MSYNC_ALL);
+  ADD_NDS32_BUILTIN0 ("msync_store", void, MSYNC_STORE);
+
+  /* Register Transfer.  */
+  ADD_NDS32_BUILTIN1 ("mfsr", unsigned, integer, MFSR);
+  ADD_NDS32_BUILTIN1 ("mfusr", unsigned, integer, MFUSR);
+  ADD_NDS32_BUILTIN2 ("mtsr", void, unsigned, integer, MTSR);
+  ADD_NDS32_BUILTIN2 ("mtsr_isb", void, unsigned, integer, MTSR_ISB);
+  ADD_NDS32_BUILTIN2 ("mtsr_dsb", void, unsigned, integer, MTSR_DSB);
+  ADD_NDS32_BUILTIN2 ("mtusr", void, unsigned, integer, MTUSR);
+
+  /* FPU Register Transfer.  */
+  ADD_NDS32_BUILTIN0 ("fmfcsr", unsigned, FMFCSR);
+  ADD_NDS32_BUILTIN1 ("fmtcsr", void, unsigned, FMTCSR);
+  ADD_NDS32_BUILTIN0 ("fmfcfg", unsigned, FMFCFG);
+  ADD_NDS32_BUILTIN2 ("fcpyss", float, float, float, FCPYSS);
+  ADD_NDS32_BUILTIN2 ("fcpynss", float, float, float, FCPYNSS);
+  ADD_NDS32_BUILTIN2 ("fcpysd", double, double, double, FCPYSD);
+  ADD_NDS32_BUILTIN2 ("fcpynsd", double, double, double, FCPYNSD);
+
+  /* Interrupt.  */
+  ADD_NDS32_BUILTIN0 ("setgie_en", void, SETGIE_EN);
+  ADD_NDS32_BUILTIN0 ("setgie_dis", void, SETGIE_DIS);
+  ADD_NDS32_BUILTIN0 ("gie_en", void, GIE_EN);
+  ADD_NDS32_BUILTIN0 ("gie_dis", void, GIE_DIS);
+  ADD_NDS32_BUILTIN1 ("enable_int", void, integer, ENABLE_INT);
+  ADD_NDS32_BUILTIN1 ("disable_int", void, integer, DISABLE_INT);
+  ADD_NDS32_BUILTIN0 ("set_pending_swint", void, SET_PENDING_SWINT);
+  ADD_NDS32_BUILTIN0 ("clr_pending_swint", void, CLR_PENDING_SWINT);
+  ADD_NDS32_BUILTIN0 ("get_all_pending_int", unsigned, GET_ALL_PENDING_INT);
+  ADD_NDS32_BUILTIN1 ("get_pending_int", unsigned, integer, GET_PENDING_INT);
+  ADD_NDS32_BUILTIN1 ("get_int_priority", unsigned, integer, GET_INT_PRIORITY);
+  ADD_NDS32_BUILTIN2 ("set_int_priority", void, integer, integer,
+		      SET_INT_PRIORITY);
+  ADD_NDS32_BUILTIN1 ("clr_pending_hwint", void, integer, CLR_PENDING_HWINT);
+  ADD_NDS32_BUILTIN1 ("set_trig_level", void, integer, SET_TRIG_LEVEL);
+  ADD_NDS32_BUILTIN1 ("set_trig_edge", void, integer, SET_TRIG_EDGE);
+  ADD_NDS32_BUILTIN1 ("get_trig_type", unsigned, integer, GET_TRIG_TYPE);
+
+  /* Load and Store  */
+  ADD_NDS32_BUILTIN1 ("llw", unsigned, ptr_uint, LLW);
+  ADD_NDS32_BUILTIN1 ("lwup", unsigned, ptr_uint, LWUP);
+  ADD_NDS32_BUILTIN1 ("lbup", char, ptr_uchar, LBUP);
+  ADD_NDS32_BUILTIN2 ("scw", unsigned, ptr_uint, unsigned, SCW);
+  ADD_NDS32_BUILTIN2 ("swup", void, ptr_uint, unsigned, SWUP);
+  ADD_NDS32_BUILTIN2 ("sbup", void, ptr_uchar, char, SBUP);
+
+  /* CCTL  */
+  ADD_NDS32_BUILTIN0 ("cctl_l1d_invalall", void, CCTL_L1D_INVALALL);
+  ADD_NDS32_BUILTIN0 ("cctl_l1d_wball_alvl", void, CCTL_L1D_WBALL_ALVL);
+  ADD_NDS32_BUILTIN0 ("cctl_l1d_wball_one_lvl", void, CCTL_L1D_WBALL_ONE_LVL);
+  ADD_NDS32_BUILTIN2 ("cctl_va_lck", void, integer, ptr_uint, CCTL_VA_LCK);
+  ADD_NDS32_BUILTIN2 ("cctl_idx_wbinval", void, integer, unsigned,
+		      CCTL_IDX_WBINVAL);
+  ADD_NDS32_BUILTIN2 ("cctl_va_wbinval_l1", void, integer, ptr_uint,
+		      CCTL_VA_WBINVAL_L1);
+  ADD_NDS32_BUILTIN2 ("cctl_va_wbinval_la", void, integer, ptr_uint,
+		      CCTL_VA_WBINVAL_LA);
+  ADD_NDS32_BUILTIN2 ("cctl_idx_read", unsigned, integer, unsigned,
+		      CCTL_IDX_READ);
+  ADD_NDS32_BUILTIN3 ("cctl_idx_write", void, integer, unsigned, unsigned,
+		      CCTL_IDX_WRITE);
+
+  /* PREFETCH  */
+  ADD_NDS32_BUILTIN3 ("dpref_qw", void, ptr_uchar, unsigned, integer, DPREF_QW);
+  ADD_NDS32_BUILTIN3 ("dpref_hw", void, ptr_ushort, unsigned, integer,
+		      DPREF_HW);
+  ADD_NDS32_BUILTIN3 ("dpref_w", void, ptr_uint, unsigned, integer, DPREF_W);
+  ADD_NDS32_BUILTIN3 ("dpref_dw", void, ptr_ulong, unsigned, integer, DPREF_DW);
+
+  /* Performance Extension  */
+  ADD_NDS32_BUILTIN1 ("pe_abs", integer, integer, ABS);
+  ADD_NDS32_BUILTIN2 ("pe_ave", integer, integer, integer, AVE);
+  ADD_NDS32_BUILTIN2 ("pe_bclr", unsigned, unsigned, unsigned, BCLR);
+  ADD_NDS32_BUILTIN2 ("pe_bset", unsigned, unsigned, unsigned, BSET);
+  ADD_NDS32_BUILTIN2 ("pe_btgl", unsigned, unsigned, unsigned, BTGL);
+  ADD_NDS32_BUILTIN2 ("pe_btst", unsigned, unsigned, unsigned, BTST);
+  ADD_NDS32_BUILTIN2 ("pe_clip", unsigned, integer, unsigned, CLIP);
+  ADD_NDS32_BUILTIN2 ("pe_clips", integer, integer, unsigned, CLIPS);
+  ADD_NDS32_BUILTIN1 ("pe_clz", unsigned, unsigned, CLZ);
+  ADD_NDS32_BUILTIN1 ("pe_clo", unsigned, unsigned, CLO);
+
+  /* Performance Extension 2  */
+  ADD_NDS32_BUILTIN3 ("pe2_bse", void, ptr_uint, unsigned, ptr_uint, BSE);
+  ADD_NDS32_BUILTIN3 ("pe2_bsp", void, ptr_uint, unsigned, ptr_uint, BSP);
+  ADD_NDS32_BUILTIN2 ("pe2_pbsad", unsigned, unsigned, unsigned, PBSAD);
+  ADD_NDS32_BUILTIN3 ("pe2_pbsada", unsigned, unsigned, unsigned, unsigned,
+		      PBSADA);
+
+  /* String Extension  */
+  ADD_NDS32_BUILTIN2 ("se_ffb", integer, unsigned, unsigned, FFB);
+  ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM);
+  ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM);
+
+  /* SATURATION  */
+  ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW);
+  ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW);
+  ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH);
+  ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH);
+  ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB);
+  ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB);
+  ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT);
+  ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT);
+  ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB);
+  ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB);
+  ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT);
+  ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT);
+  ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB);
+  ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB);
+  ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT);
+  ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT);
+  ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB);
+  ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB);
+  ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT);
+  ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT);
+  ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW);
+  ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U);
+  ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV);
+  ADD_NDS32_BUILTIN0 ("clrov", void, CLROV);
+
+  /* ROTR  */
+  ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR);
+
+  /* Swap  */
+  ADD_NDS32_BUILTIN1 ("wsbh", unsigned, unsigned, WSBH);
+
+  /* System  */
+  ADD_NDS32_BUILTIN2 ("svs", unsigned, integer, integer, SVS);
+  ADD_NDS32_BUILTIN2 ("sva", unsigned, integer, integer, SVA);
+  ADD_NDS32_BUILTIN1 ("jr_itoff", void, unsigned, JR_ITOFF);
+  ADD_NDS32_BUILTIN1 ("jr_toff", void, unsigned, JR_TOFF);
+  ADD_NDS32_BUILTIN1 ("jral_iton", void, unsigned, JRAL_ITON);
+  ADD_NDS32_BUILTIN1 ("jral_ton", void, unsigned, JRAL_TON);
+  ADD_NDS32_BUILTIN1 ("ret_itoff", void, unsigned, RET_ITOFF);
+  ADD_NDS32_BUILTIN1 ("ret_toff", void, unsigned, RET_TOFF);
+  ADD_NDS32_BUILTIN0 ("standby_no_wake_grant", void, STANDBY_NO_WAKE_GRANT);
+  ADD_NDS32_BUILTIN0 ("standby_wake_grant", void, STANDBY_WAKE_GRANT);
+  ADD_NDS32_BUILTIN0 ("standby_wait_done", void, STANDBY_WAKE_DONE);
+  ADD_NDS32_BUILTIN1 ("break", void, unsigned, BREAK);
+  ADD_NDS32_BUILTIN1 ("syscall", void, unsigned, SYSCALL);
+  ADD_NDS32_BUILTIN0 ("nop", void, NOP);
+  ADD_NDS32_BUILTIN0 ("get_current_sp", unsigned, GET_CURRENT_SP);
+  ADD_NDS32_BUILTIN1 ("set_current_sp", void, unsigned, SET_CURRENT_SP);
+  ADD_NDS32_BUILTIN2 ("teqz", void, unsigned, unsigned, TEQZ);
+  ADD_NDS32_BUILTIN2 ("tnez", void, unsigned, unsigned, TNEZ);
+  ADD_NDS32_BUILTIN1 ("trap", void, unsigned, TRAP);
+  ADD_NDS32_BUILTIN0 ("return_address", unsigned, RETURN_ADDRESS);
+  ADD_NDS32_BUILTIN0 ("setend_big", void, SETEND_BIG);
+  ADD_NDS32_BUILTIN0 ("setend_little", void, SETEND_LITTLE);
+
+  /* Schedule Barrier */
+  ADD_NDS32_BUILTIN0 ("schedule_barrier", void, SCHE_BARRIER);
+
+  /* TLBOP  */
+  ADD_NDS32_BUILTIN1 ("tlbop_trd", void, unsigned, TLBOP_TRD);
+  ADD_NDS32_BUILTIN1 ("tlbop_twr", void, unsigned, TLBOP_TWR);
+  ADD_NDS32_BUILTIN1 ("tlbop_rwr", void, unsigned, TLBOP_RWR);
+  ADD_NDS32_BUILTIN1 ("tlbop_rwlk", void, unsigned, TLBOP_RWLK);
+  ADD_NDS32_BUILTIN1 ("tlbop_unlk", void, unsigned, TLBOP_UNLK);
+  ADD_NDS32_BUILTIN1 ("tlbop_pb", unsigned, unsigned, TLBOP_PB);
+  ADD_NDS32_BUILTIN1 ("tlbop_inv", void, unsigned, TLBOP_INV);
+  ADD_NDS32_BUILTIN0 ("tlbop_flua", void, TLBOP_FLUA);
+
+  /* Unaligned Load/Store  */
+  ADD_NDS32_BUILTIN1 ("unaligned_load_hw", short_unsigned, ptr_ushort,
+		      UALOAD_HW);
+  ADD_NDS32_BUILTIN1 ("unaligned_load_w", unsigned, ptr_uint, UALOAD_W);
+  ADD_NDS32_BUILTIN1 ("unaligned_load_dw", long_long_unsigned, ptr_ulong,
+		      UALOAD_DW);
+  ADD_NDS32_BUILTIN2 ("unaligned_store_hw", void, ptr_ushort, short_unsigned,
+		      UASTORE_HW);
+  ADD_NDS32_BUILTIN2 ("unaligned_store_w", void, ptr_uint, unsigned, UASTORE_W);
+  ADD_NDS32_BUILTIN2 ("unaligned_store_dw", void, ptr_ulong, long_long_unsigned,
+		      UASTORE_DW);
+  ADD_NDS32_BUILTIN0 ("unaligned_feature", unsigned, UNALIGNED_FEATURE);
+  ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED);
+  ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED);
+
+  /* Instruction sequence protection  */
+  ADD_NDS32_BUILTIN0 ("signature_begin", void, SIGNATURE_BEGIN);
+  ADD_NDS32_BUILTIN0 ("signature_end", void, SIGNATURE_END);
+
+  /* DSP Extension: SIMD 16bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16);
+  ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16);
+  ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16);
+  ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16);
+  ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16);
+  ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16);
+  ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16);
+  ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16);
+  ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16);
+  ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16);
+  ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16);
+  ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16);
+  ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16);
+  ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16);
+  ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16);
+  ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16);
+  ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16);
+  ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16);
+  ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16);
+  ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16);
+  ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16);
+  ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16);
+  ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16);
+  ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16);
+  ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16);
+  ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16);
+  ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16);
+  ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16);
+  ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16);
+  ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16);
+  ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16);
+  ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16);
+  ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16);
+  ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16);
+
+  /* DSP Extension: SIMD 8bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8);
+  ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8);
+  ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8);
+  ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8);
+  ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8);
+  ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8);
+  ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8);
+  ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8);
+  ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8);
+  ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8);
+  ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8);
+  ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8);
+  ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8);
+  ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8);
+  ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8);
+  ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8);
+  ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8);
+  ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8);
+  ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8);
+  ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8);
+  ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8);
+  ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8);
+
+  /* DSP Extension: SIMD 16bit Shift.  */
+  ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16);
+  ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16);
+  ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U);
+  ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U);
+  ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16);
+  ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16);
+  ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U);
+  ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U);
+  ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16);
+  ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16);
+  ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16);
+  ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16);
+  ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16);
+  ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16);
+  ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U);
+  ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U);
+
+  /* DSP Extension: 16bit Compare.  */
+  ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16);
+  ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16);
+  ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16);
+  ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16);
+  ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16);
+  ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16);
+  ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16);
+  ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16);
+  ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16);
+  ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16);
+  ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16);
+
+  /* DSP Extension: 8bit Compare.  */
+  ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8);
+  ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8);
+  ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8);
+  ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8);
+  ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8);
+  ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8);
+  ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8);
+  ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8);
+  ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8);
+  ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8);
+  ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8);
+
+  /* DSP Extension: SIMD 16bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16);
+  ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16);
+  ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16);
+  ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16);
+  ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16);
+  ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16);
+  ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16);
+  ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16);
+  ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16);
+  ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16);
+  ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16);
+  ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16);
+  ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16);
+  ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16);
+  ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16);
+  ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16);
+  ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16);
+  ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16);
+  ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16);
+  ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16);
+  ADD_NDS32_BUILTIN2 ("smulx16",
+		      long_long_unsigned, unsigned, unsigned, SMULX16);
+  ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16);
+  ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16);
+  ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16);
+  ADD_NDS32_BUILTIN2 ("umulx16",
+		      long_long_unsigned, unsigned, unsigned, UMULX16);
+  ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16);
+
+  /* DSP Extension: SIMD 8bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8);
+  ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8);
+  ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8);
+  ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8);
+  ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8);
+  ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8);
+  ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8);
+  ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8);
+  ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8);
+  ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8);
+
+  /* DSP Extension: 8bit Unpacking.  */
+  ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810);
+  ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820);
+  ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830);
+  ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831);
+  ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810);
+  ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820);
+  ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830);
+  ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831);
+
+  /* DSP Extension: 32bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW);
+  ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW);
+  ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW);
+  ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW);
+
+  /* DSP Extension: 32bit Shift.  */
+  ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U);
+  ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL);
+
+  /* DSP Extension: 16bit Packing.  */
+  ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16);
+  ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16);
+  ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16);
+  ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16);
+  ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16);
+  ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16);
+  ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16);
+  ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16);
+
+  /* DSP Extension: Signed MSW 32x32 Multiply and ADD.  */
+  ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL);
+  ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U);
+  ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC);
+  ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U);
+  ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB);
+  ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U);
+  ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL);
+  ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U);
+
+  /* DSP Extension: Most Significant Word 32x16 Multiply and ADD.  */
+  ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB);
+  ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB);
+  ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U);
+  ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U);
+  ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT);
+  ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT);
+  ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U);
+  ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U);
+  ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB);
+  ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB);
+  ADD_NDS32_BUILTIN3 ("kmmawb_u",
+		      integer, integer, integer, unsigned, KMMAWB_U);
+  ADD_NDS32_BUILTIN3 ("v_kmmawb_u",
+		      integer, integer, integer, v2hi, V_KMMAWB_U);
+  ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT);
+  ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT);
+  ADD_NDS32_BUILTIN3 ("kmmawt_u",
+		      integer, integer, integer, unsigned, KMMAWT_U);
+  ADD_NDS32_BUILTIN3 ("v_kmmawt_u",
+		      integer, integer, integer, v2hi, V_KMMAWT_U);
+
+  /* DSP Extension: Signed 16bit Multiply with ADD/Subtract.  */
+  ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB);
+  ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB);
+  ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT);
+  ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT);
+  ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT);
+  ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT);
+  ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA);
+  ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA);
+  ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA);
+  ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA);
+  ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS);
+  ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS);
+  ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS);
+  ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS);
+  ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS);
+  ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS);
+  ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB);
+  ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB);
+  ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT);
+  ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT);
+  ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT);
+  ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT);
+  ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA);
+  ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA);
+  ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA);
+  ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA);
+  ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS);
+  ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS);
+  ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS);
+  ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS);
+  ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS);
+  ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS);
+  ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA);
+  ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA);
+  ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA);
+  ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA);
+
+  /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract.  */
+  ADD_NDS32_BUILTIN2 ("smal", long_long_integer,
+		      long_long_integer, unsigned, SMAL);
+  ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer,
+		      long_long_integer, v2hi, V_SMAL);
+
+  /* DSP Extension: 32bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV);
+  ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT);
+  ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK);
+  ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB);
+
+  /* DSP Extension: 64bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer,
+		      long_long_integer, long_long_integer, SADD64);
+  ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UADD64);
+  ADD_NDS32_BUILTIN2 ("radd64", long_long_integer,
+		      long_long_integer, long_long_integer, RADD64);
+  ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, URADD64);
+  ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer,
+		      long_long_integer, long_long_integer, KADD64);
+  ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UKADD64);
+  ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer,
+		      long_long_integer, long_long_integer, SSUB64);
+  ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, USUB64);
+  ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer,
+		      long_long_integer, long_long_integer, RSUB64);
+  ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, URSUB64);
+  ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer,
+		      long_long_integer, long_long_integer, KSUB64);
+  ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UKSUB64);
+
+  /* DSP Extension: 32bit Multiply with 64bit Add/Subtract.  */
+  ADD_NDS32_BUILTIN3 ("smar64", long_long_integer,
+		      long_long_integer, integer, integer, SMAR64);
+  ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer,
+		      long_long_integer, integer, integer, SMSR64);
+  ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UMAR64);
+  ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UMSR64);
+  ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer,
+		      long_long_integer, integer, integer, KMAR64);
+  ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer,
+		      long_long_integer, integer, integer, KMSR64);
+  ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UKMAR64);
+  ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UKMSR64);
+
+  /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract.  */
+  ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALBB);
+  ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALBB);
+  ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALBT);
+  ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALBT);
+  ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALTT);
+  ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALTT);
+  ADD_NDS32_BUILTIN3 ("smalda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDA);
+  ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDA);
+  ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALXDA);
+  ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALXDA);
+  ADD_NDS32_BUILTIN3 ("smalds", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDS);
+  ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDS);
+  ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDRS);
+  ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDRS);
+  ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALXDS);
+  ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALXDS);
+  ADD_NDS32_BUILTIN3 ("smslda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMSLDA);
+  ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMSLDA);
+  ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMSLXDA);
+  ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMSLXDA);
+
+  /* DSP Extension: augmented baseline.  */
+  ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32);
+  ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32);
+  ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS);
+
+  /* The builtin turn off hwloop optimization.  */
+  ADD_NDS32_BUILTIN0 ("no_ext_zol", void, NO_HWLOOP);
+
+  /* DSP Extension: vector type unaligned Load/Store  */
+  ADD_NDS32_BUILTIN1 ("get_unaligned_u16x2", u_v2hi, ptr_ushort, UALOAD_U16);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_s16x2", v2hi, ptr_short, UALOAD_S16);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_u8x4", u_v4qi, ptr_uchar, UALOAD_U8);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_s8x4", v4qi, ptr_char, UALOAD_S8);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_u16x2", void, ptr_ushort,
+		      u_v2hi, UASTORE_U16);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_s16x2", void, ptr_short,
+		      v2hi, UASTORE_S16);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_u8x4", void, ptr_uchar,
+		      u_v4qi, UASTORE_U8);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_s8x4", void, ptr_char,
+		      v4qi, UASTORE_S8);
+}
 /* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md
index 53876c5..6f8b3eb 100644
--- a/gcc/config/nds32/nds32-intrinsic.md
+++ b/gcc/config/nds32/nds32-intrinsic.md
@@ -40,6 +40,26 @@
    (set_attr "length"    "4")]
 )
 
+(define_expand "mtsr_isb"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  ""
+{
+  emit_insn (gen_unspec_volatile_mtsr (operands[0], operands[1]));
+  emit_insn (gen_unspec_volatile_isb());
+  DONE;
+})
+
+(define_expand "mtsr_dsb"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  ""
+{
+  emit_insn (gen_unspec_volatile_mtsr (operands[0], operands[1]));
+  emit_insn (gen_unspec_dsb());
+  DONE;
+})
+
 (define_insn "unspec_volatile_mtsr"
   [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
 			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MTSR)]
@@ -58,6 +78,74 @@
    (set_attr "length"    "4")]
 )
 
+;; FPU Register Transfer.
+
+(define_insn "unspec_fcpynsd"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+	 (unspec:DF [(match_operand:DF 1 "register_operand" "f")
+		     (match_operand:DF 2 "register_operand" "f")] UNSPEC_FCPYNSD))]
+  ""
+  "fcpynsd\t%0, %1, %2"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fcpynss"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+	 (unspec:SF [(match_operand:SF 1 "register_operand" "f")
+		     (match_operand:SF 2 "register_operand" "f")] UNSPEC_FCPYNSS))]
+  ""
+  "fcpynss\t%0, %1, %2"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fcpysd"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+	 (unspec:DF [(match_operand:DF 1 "register_operand" "f")
+		     (match_operand:DF 2 "register_operand" "f")] UNSPEC_FCPYSD))]
+  ""
+  "fcpysd\t%0, %1, %2"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fcpyss"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+	 (unspec:SF [(match_operand:SF 1 "register_operand" "f")
+		     (match_operand:SF 2 "register_operand" "f")] UNSPEC_FCPYSS))]
+  ""
+  "fcpyss\t%0, %1, %2"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fmfcsr"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+	 (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_FMFCSR))]
+  ""
+  "fmfcsr\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fmtcsr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_FMTCSR)]
+  ""
+  "fmtcsr\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fmfcfg"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_FMFCFG))]
+  ""
+  "fmfcfg\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
 ;; ------------------------------------------------------------------------
 
 ;; Interrupt Instructions.
@@ -76,6 +164,445 @@
   [(set_attr "type" "misc")]
 )
 
+(define_expand "unspec_enable_int"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_ENABLE_INT)]
+  ""
+{
+  rtx system_reg;
+  rtx temp_reg = gen_reg_rtx (SImode);
+
+  /* Set system register form nds32_intrinsic_register_names[].  */
+  if ((INTVAL (operands[0]) >= NDS32_INT_H16)
+      && (INTVAL (operands[0]) <= NDS32_INT_H31))
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK2__);
+      operands[0] = GEN_INT (1 << (INTVAL (operands[0])));
+    }
+  else if ((INTVAL (operands[0]) >= NDS32_INT_H32)
+	   && (INTVAL (operands[0]) <= NDS32_INT_H63))
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK3__);
+      operands[0] = GEN_INT (1 << (INTVAL (operands[0]) - 32));
+    }
+  else
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK__);
+
+      if (INTVAL (operands[0]) == NDS32_INT_SWI)
+        operands[0] = GEN_INT (1 << 16);
+      else if ((INTVAL (operands[0]) >= NDS32_INT_ALZ)
+	       && (INTVAL (operands[0]) <= NDS32_INT_DSSIM))
+	operands[0] = GEN_INT (1 << (INTVAL (operands[0]) - 4));
+      else
+	operands[0] = GEN_INT (1 << (INTVAL (operands[0])));
+    }
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_iorsi3 (temp_reg, temp_reg, operands[0]));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_disable_int"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_DISABLE_INT)]
+  ""
+{
+  rtx system_reg;
+  rtx temp_reg = gen_reg_rtx (SImode);
+
+  /* Set system register form nds32_intrinsic_register_names[].  */
+  if ((INTVAL (operands[0]) >= NDS32_INT_H16)
+      && (INTVAL (operands[0]) <= NDS32_INT_H31))
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK2__);
+      operands[0] = GEN_INT (~(1 << INTVAL (operands[0])));
+    }
+  else if ((INTVAL (operands[0]) >= NDS32_INT_H32)
+	   && (INTVAL (operands[0]) <= NDS32_INT_H63))
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK3__);
+      operands[0] = GEN_INT (~(1 << (INTVAL (operands[0]) - 32)));
+    }
+  else
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK__);
+
+      if (INTVAL (operands[0]) == NDS32_INT_SWI)
+        operands[0] = GEN_INT (~(1 << 16));
+      else if ((INTVAL (operands[0]) >= NDS32_INT_ALZ)
+	       && (INTVAL (operands[0]) <= NDS32_INT_DSSIM))
+	operands[0] = GEN_INT (~(1 << (INTVAL (operands[0]) - 4)));
+      else
+	operands[0] = GEN_INT (~(1 << INTVAL (operands[0])));
+    }
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, operands[0]));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_set_pending_swint"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SET_PENDING_SWINT)]
+  ""
+{
+  /* Get $INT_PEND system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_INT_PEND__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_iorsi3 (temp_reg, temp_reg, GEN_INT (65536)));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_clr_pending_swint"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLR_PENDING_SWINT)]
+  ""
+{
+  /* Get $INT_PEND system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_INT_PEND__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, GEN_INT (~(1 << 16))));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_clr_pending_hwint"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_CLR_PENDING_HWINT)]
+  ""
+{
+  rtx system_reg = NULL_RTX;
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx clr_hwint;
+  unsigned offset = 0;
+
+  /* Set system register form nds32_intrinsic_register_names[].  */
+  if ((INTVAL (operands[0]) >= NDS32_INT_H0)
+      && (INTVAL (operands[0]) <= NDS32_INT_H15))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
+    }
+  else if ((INTVAL (operands[0]) >= NDS32_INT_H16)
+	   && (INTVAL (operands[0]) <= NDS32_INT_H31))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
+    }
+  else if ((INTVAL (operands[0]) >= NDS32_INT_H32)
+	   && (INTVAL (operands[0]) <= NDS32_INT_H63))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND3__);
+      offset = 32;
+    }
+  else
+    error ("__nds32__clr_pending_hwint not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  /* $INT_PEND type is write one clear.  */
+  clr_hwint = GEN_INT (1 << (INTVAL (operands[0]) - offset));
+
+  if (system_reg != NULL_RTX)
+    {
+      emit_move_insn (temp_reg, clr_hwint);
+      emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+      emit_insn (gen_unspec_dsb ());
+    }
+  DONE;
+})
+
+(define_expand "unspec_get_all_pending_int"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_GET_ALL_PENDING_INT))]
+  ""
+{
+  rtx system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_get_pending_int"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_PENDING_INT))]
+  ""
+{
+  rtx system_reg = NULL_RTX;
+
+  /* Set system register form nds32_intrinsic_register_names[].  */
+  if ((INTVAL (operands[1]) >= NDS32_INT_H0)
+      && (INTVAL (operands[1]) <= NDS32_INT_H15))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
+      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+    }
+  else if (INTVAL (operands[1]) == NDS32_INT_SWI)
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
+      operands[2] = GEN_INT (15);
+    }
+  else if ((INTVAL (operands[1]) >= NDS32_INT_H16)
+	   && (INTVAL (operands[1]) <= NDS32_INT_H31))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
+      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+    }
+  else if ((INTVAL (operands[1]) >= NDS32_INT_H32)
+	   && (INTVAL (operands[1]) <= NDS32_INT_H63))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND3__);
+      operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 32));
+    }
+  else
+    error ("get_pending_int not support NDS32_INT_ALZ,"
+	   " NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  /* mfsr op0, sytem_reg  */
+  if (system_reg != NULL_RTX)
+    {
+      emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
+      emit_insn (gen_unspec_dsb ());
+    }
+  DONE;
+})
+
+(define_expand "unspec_set_int_priority"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")
+			(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_SET_INT_PRIORITY)]
+  ""
+{
+  rtx system_reg = NULL_RTX;
+  rtx priority = NULL_RTX;
+  rtx mask = NULL_RTX;
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx mask_reg = gen_reg_rtx (SImode);
+  rtx set_reg = gen_reg_rtx (SImode);
+  unsigned offset = 0;
+
+  /* Get system register form nds32_intrinsic_register_names[].  */
+  if (INTVAL (operands[0]) <= NDS32_INT_H15)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI__);
+      offset = 0;
+    }
+  else if (INTVAL (operands[0]) >= NDS32_INT_H16
+	   && INTVAL (operands[0]) <= NDS32_INT_H31)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI2__);
+      /* The $INT_PRI2 first bit correspond to H16, so need
+	 subtract 16.  */
+      offset = 16;
+    }
+  else if (INTVAL (operands[0]) >= NDS32_INT_H32
+	   && INTVAL (operands[0]) <= NDS32_INT_H47)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI3__);
+      /* The $INT_PRI3 first bit correspond to H32, so need
+	 subtract 32.  */
+      offset = 32;
+    }
+  else if (INTVAL (operands[0]) >= NDS32_INT_H48
+	   && INTVAL (operands[0]) <= NDS32_INT_H63)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI4__);
+      /* The $INT_PRI3 first bit correspond to H48, so need
+	 subtract 48.  */
+      offset = 48;
+    }
+  else
+    error ("set_int_priority not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  mask = GEN_INT (~(3 << 2 * (INTVAL (operands[0]) - offset)));
+  priority = GEN_INT ((int) (INTVAL (operands[1])
+			     << ((INTVAL (operands[0]) - offset) * 2)));
+
+  if (system_reg != NULL_RTX)
+    {
+      emit_move_insn (mask_reg, mask);
+      emit_move_insn (set_reg, priority);
+      emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+      emit_insn (gen_andsi3 (temp_reg, temp_reg, mask_reg));
+      emit_insn (gen_iorsi3 (temp_reg, temp_reg, set_reg));
+      emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+      emit_insn (gen_unspec_dsb ());
+    }
+  DONE;
+})
+
+(define_expand "unspec_get_int_priority"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_INT_PRIORITY))]
+  ""
+{
+  rtx system_reg = NULL_RTX;
+  rtx priority = NULL_RTX;
+  unsigned offset = 0;
+
+  /* Get system register form nds32_intrinsic_register_names[]  */
+  if (INTVAL (operands[1]) <= NDS32_INT_H15)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI__);
+      offset = 0;
+    }
+  else if (INTVAL (operands[1]) >= NDS32_INT_H16
+	   && INTVAL (operands[1]) <= NDS32_INT_H31)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI2__);
+      /* The $INT_PRI2 first bit correspond to H16, so need
+	 subtract 16.  */
+      offset = 16;
+    }
+  else if (INTVAL (operands[1]) >= NDS32_INT_H32
+	   && INTVAL (operands[1]) <= NDS32_INT_H47)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI3__);
+      /* The $INT_PRI3 first bit correspond to H32, so need
+	 subtract 32.  */
+      offset = 32;
+    }
+  else if (INTVAL (operands[1]) >= NDS32_INT_H48
+	   && INTVAL (operands[1]) <= NDS32_INT_H63)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI4__);
+      /* The $INT_PRI4 first bit correspond to H48, so need
+	 subtract 48.  */
+      offset = 48;
+    }
+  else
+    error ("set_int_priority not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  priority = GEN_INT (31 - 2 * (INTVAL (operands[1]) - offset));
+
+  if (system_reg != NULL_RTX)
+    {
+      emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], priority));
+      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (30)));
+      emit_insn (gen_unspec_dsb ());
+    }
+  DONE;
+})
+
+(define_expand "unspec_set_trig_level"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_SET_TRIG_LEVEL)]
+  ""
+{
+  rtx system_reg = NULL_RTX;
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx set_level;
+  unsigned offset = 0;
+
+  if (INTVAL (operands[0]) >= NDS32_INT_H0
+      && INTVAL (operands[0]) <= NDS32_INT_H31)
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
+      offset = 0;
+    }
+  else if (INTVAL (operands[0]) >= NDS32_INT_H32
+	   && INTVAL (operands[0]) <= NDS32_INT_H63)
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__);
+      offset = 32;
+    }
+  else
+    error ("__nds32__set_trig_type_level not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  if (system_reg != NULL_RTX)
+    {
+      /* TRIGGER register, 0 mean level triggered and 1 mean edge triggered. */
+      set_level = GEN_INT (~(1 << (INTVAL (operands[0]) - offset)));
+
+      emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+      emit_insn (gen_andsi3 (temp_reg, temp_reg, set_level));
+      emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+    }
+  DONE;
+})
+
+(define_expand "unspec_set_trig_edge"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_SET_TRIG_EDGE)]
+  ""
+{
+  rtx system_reg = NULL_RTX;
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx set_level;
+  unsigned offset = 0;
+
+  if (INTVAL (operands[0]) >= NDS32_INT_H0
+      && INTVAL (operands[0]) <= NDS32_INT_H31)
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
+      offset = 0;
+    }
+  else if (INTVAL (operands[0]) >= NDS32_INT_H32
+	   && INTVAL (operands[0]) <= NDS32_INT_H63)
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__);
+      offset = 32;
+    }
+  else
+    error ("__nds32__set_trig_type_edge not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  if (system_reg != NULL_RTX)
+    {
+      /* TRIGGER register, 0 mean level triggered and 1 mean edge triggered. */
+      set_level = GEN_INT ((1 << (INTVAL (operands[0]) - offset)));
+
+      emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+      emit_insn (gen_iorsi3 (temp_reg, temp_reg, set_level));
+      emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+    }
+  DONE;
+})
+
+(define_expand "unspec_get_trig_type"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_TRIG_TYPE))]
+  ""
+{
+  rtx system_reg = NULL_RTX;
+  rtx trig_type;
+  unsigned offset = 0;
+
+  if (INTVAL (operands[1]) >= NDS32_INT_H0
+      && INTVAL (operands[1]) <= NDS32_INT_H31)
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
+      offset = 0;
+    }
+  else if (INTVAL (operands[1]) >= NDS32_INT_H32
+	   && INTVAL (operands[1]) <= NDS32_INT_H63)
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__);
+      offset = 32;
+    }
+  else
+    error ("__nds32__get_trig_type not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  if (system_reg != NULL_RTX)
+    {
+      trig_type = GEN_INT (31 - (INTVAL (operands[1]) - offset));
+
+      emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], trig_type));
+      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
+      emit_insn (gen_unspec_dsb ());
+    }
+  DONE;
+})
+
 ;; ------------------------------------------------------------------------
 
 ;; Cache Synchronization Instructions
@@ -84,7 +611,7 @@
   [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_ISYNC)]
   ""
   "isync\t%0"
-  [(set_attr "type" "misc")]
+  [(set_attr "type" "mmu")]
 )
 
 (define_insn "unspec_volatile_isb"
@@ -94,4 +621,1077 @@
   [(set_attr "type" "misc")]
 )
 
+(define_insn "unspec_dsb"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_DSB)]
+  ""
+  "dsb"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_msync"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_MSYNC)]
+  ""
+  "msync\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_msync_all"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_MSYNC_ALL)]
+  ""
+  "msync\tall"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_msync_store"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_MSYNC_STORE)]
+  ""
+  "msync\tstore"
+  [(set_attr "type" "misc")]
+)
+
+;; Load and Store
+
+(define_insn "unspec_volatile_llw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_VOLATILE_LLW))]
+  ""
+  "llw\t%0, [%1 + %2]"
+  [(set_attr "length"    "4")]
+)
+
+(define_insn "unspec_lwup"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_LWUP))]
+  ""
+  "lwup\t%0, [%1 + %2]"
+  [(set_attr "length"    "4")]
+)
+
+(define_insn "unspec_lbup"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_LBUP))]
+  ""
+  "lbup\t%0, [%1 + %2]"
+  [(set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_scw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+					      (match_operand:SI 2 "register_operand" "r")))
+			     (match_operand:SI 3 "register_operand" "0")] UNSPEC_VOLATILE_SCW))]
+  ""
+  "scw\t%0, [%1 + %2]"
+  [(set_attr "length"     "4")]
+)
+
+(define_insn "unspec_swup"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand:SI 1 "register_operand" "r")))
+	(unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SWUP))]
+  ""
+  "swup\t%2, [%0 + %1]"
+  [(set_attr "length"     "4")]
+)
+
+(define_insn "unspec_sbup"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand:SI 1 "register_operand" "r")))
+	(unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SBUP))]
+  ""
+  "sbup\t%2, [%0 + %1]"
+  [(set_attr "length"     "4")]
+)
+
+;; CCTL
+
+(define_insn "cctl_l1d_invalall"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_INVALALL)]
+  ""
+  "cctl\tL1D_INVALALL"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_l1d_wball_alvl"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL)]
+  ""
+  "cctl\tL1D_WBALL, alevel"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_l1d_wball_one_lvl"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL)]
+  ""
+  "cctl\tL1D_WBALL, 1level"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_idx_read"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")
+			     (match_operand:SI 2 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_READ))]
+  ""
+  "cctl\t%0, %2, %X1"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_idx_write"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")
+			(match_operand:SI 2 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_WRITE)]
+  ""
+  "cctl\t%1, %2, %W0"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_va_wbinval_l1"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1)]
+  ""
+  "cctl\t%1, %U0, 1level"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_va_wbinval_la"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA)]
+  ""
+  "cctl\t%1, %U0, alevel"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_idx_wbinval"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_WBINVAL)]
+  ""
+  "cctl\t%1, %T0"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_va_lck"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_LCK)]
+  ""
+  "cctl\t%1, %R0"
+  [(set_attr "type" "mmu")]
+)
+
+;;PREFETCH
+
+(define_insn "prefetch_qw"
+  [(unspec_volatile:QI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "nonmemory_operand" "r")
+			(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_QW)]
+  ""
+  "dpref\t%Z2, [%0 + %1]"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_hw"
+  [(unspec_volatile:HI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "nonmemory_operand" "r")
+			(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_HW)]
+  ""
+  "dpref\t%Z2, [%0 + (%1<<1)]"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_w"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "    r, r")
+			(match_operand:SI 1 "nonmemory_operand" "Is15, r")
+			(match_operand:SI 2 "immediate_operand" "   i, i")] UNSPEC_VOLATILE_DPREF_W)]
+  ""
+  "@
+  dprefi.w\t%Z2, [%0 + %1]
+  dpref\t%Z2, [%0 + (%1<<2)]"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_dw"
+  [(unspec_volatile:DI [(match_operand:SI 0 "register_operand"  "   r, r")
+			(match_operand:SI 1 "nonmemory_operand" "Is15, r")
+			(match_operand:SI 2 "immediate_operand" "   i, i")] UNSPEC_VOLATILE_DPREF_DW)]
+  ""
+  "@
+  dprefi.d\t%Z2, [%0 + %1]
+  dpref\t%Z2, [%0 + (%1<<3)]"
+  [(set_attr "type" "misc")]
+)
+
+;; Performance Extension
+
+(define_expand "unspec_ave"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")]
+  ""
+{
+  emit_insn (gen_ave (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "unspec_bclr"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  ""
+{
+  unsigned HOST_WIDE_INT val = ~(1u << UINTVAL (operands[2]));
+  emit_insn (gen_andsi3 (operands[0], operands[1], gen_int_mode (val, SImode)));
+  DONE;
+})
+
+(define_expand "unspec_bset"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  ""
+{
+  unsigned HOST_WIDE_INT val = 1u << UINTVAL (operands[2]);
+  emit_insn (gen_iorsi3 (operands[0], operands[1], gen_int_mode (val, SImode)));
+  DONE;
+})
+
+(define_expand "unspec_btgl"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  ""
+{
+  unsigned HOST_WIDE_INT val = 1u << UINTVAL (operands[2]);
+  emit_insn (gen_xorsi3 (operands[0], operands[1], gen_int_mode (val, SImode)));
+  DONE;
+})
+
+(define_expand "unspec_btst"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  ""
+{
+  emit_insn (gen_btst (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "unspec_clip"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP))]
+  ""
+  "clip\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_clips"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS))]
+  ""
+  "clips\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_clo"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_CLO))]
+  ""
+  "clo\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_ssabssi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_abs:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "abs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+;; Performance extension 2
+
+(define_insn "unspec_pbsad"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_PBSAD))]
+  ""
+  "pbsad\t%0, %1, %2"
+  [(set_attr "type" "pbsad")
+   (set_attr "length"   "4")]
+)
+
+(define_insn "unspec_pbsada"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")] UNSPEC_PBSADA))]
+  ""
+  "pbsada\t%0, %2, %3"
+  [(set_attr "type" "pbsada")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "bse"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")]
+  ""
+  {
+    rtx temp0 = gen_reg_rtx (SImode);
+    rtx temp2 = gen_reg_rtx (SImode);
+
+    emit_move_insn (temp0, gen_rtx_MEM (Pmode, operands[0]));
+    emit_move_insn (temp2, gen_rtx_MEM (Pmode, operands[2]));
+    emit_insn (gen_unspec_bse (temp0, operands[1], temp2, temp0, temp2));
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), temp0);
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[2]), temp2);
+    DONE;
+  }
+)
+
+(define_insn "unspec_bse"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "0")] UNSPEC_BSE))
+   (set (match_operand:SI 4 "register_operand" "=2")
+	(unspec:SI [(match_dup 1)
+		    (match_dup 2)
+		    (match_dup 0)] UNSPEC_BSE_2))]
+  ""
+  "bse\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "bsp"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")]
+  ""
+  {
+    rtx temp0 = gen_reg_rtx (SImode);
+    rtx temp2 = gen_reg_rtx (SImode);
+
+    emit_move_insn (temp0, gen_rtx_MEM (Pmode, operands[0]));
+    emit_move_insn (temp2, gen_rtx_MEM (Pmode, operands[2]));
+    emit_insn (gen_unspec_bsp (temp0, operands[1], temp2, temp0, temp2));
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), temp0);
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[2]), temp2);
+    DONE;
+  }
+)
+
+(define_insn "unspec_bsp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "0")] UNSPEC_BSP))
+   (set (match_operand:SI 4 "register_operand" "=2")
+	(unspec:SI [(match_dup 1)
+		    (match_dup 2)
+		    (match_dup 0)] UNSPEC_BSP_2))]
+  ""
+  "bsp\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+;; String Extension
+
+(define_insn "unspec_ffb"
+  [(set (match_operand:SI 0 "register_operand" "=r, r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r, r")
+		    (match_operand:SI 2 "nonmemory_operand" "Iu08, r")] UNSPEC_FFB))]
+  ""
+  "@
+  ffbi\t%0, %1, %2
+  ffb\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_ffmism"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_FFMISM))]
+  ""
+  "ffmism\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_flmism"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_FLMISM))]
+  ""
+  "flmism\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+;; SATURATION
+
+(define_insn "unspec_kaddw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "kaddw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_minus:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "ksubw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kaddh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:SI 2 "register_operand" "r"))
+		    (const_int 15)] UNSPEC_CLIPS))]
+  ""
+  "kaddh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(minus:SI (match_operand:SI 1 "register_operand" "r")
+			      (match_operand:SI 2 "register_operand" "r"))
+		    (const_int 15)] UNSPEC_CLIPS))]
+  ""
+  "ksubh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmbb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))]
+  ""
+  "kdmbb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmbt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))]
+  ""
+  "kdmbt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmtb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))]
+  ""
+  "kdmtb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmtt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))]
+  ""
+  "kdmtt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmbb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))]
+  ""
+  "khmbb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmbt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))]
+  ""
+  "khmbt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmtb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))]
+  ""
+  "khmtb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmtt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))]
+  ""
+  "khmtt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kslraw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))]
+  ""
+  "kslraw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kslrawu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))]
+  ""
+  "kslraw.u\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_rdov"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RDOV))]
+  ""
+  "rdov\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_clrov"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLROV)]
+  ""
+  "clrov"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+;; System
+
+(define_insn "unspec_sva"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_SVA))]
+  ""
+  "sva\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_svs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_SVS))]
+  ""
+  "svs\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_jr_itoff"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JR_ITOFF)]
+  ""
+  "jr.itoff\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_jr_toff"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JR_TOFF)]
+  ""
+  "jr.toff\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_jral_iton"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JRAL_ITON)]
+  ""
+  "jral.iton\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_jral_ton"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JRAL_TON)]
+  ""
+  "jral.ton\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_ret_itoff"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_RET_ITOFF)]
+  ""
+  "ret.itoff\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_ret_toff"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_RET_TOFF)]
+  ""
+  "ret.toff\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_standby_no_wake_grant"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT)]
+  ""
+  "standby\tno_wake_grant"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_standby_wake_grant"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_WAKE_GRANT)]
+  ""
+  "standby\twake_grant"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_standby_wait_done"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_WAKE_DONE)]
+  ""
+  "standby\twait_done"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_teqz"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_TEQZ)]
+  ""
+  "teqz\t%0, %1"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_tnez"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_TNEZ)]
+  ""
+  "tnez\t%0, %1"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_trap"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_TRAP)]
+  ""
+  "trap\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_setend_big"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETEND_BIG)]
+  ""
+  "setend.b"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_setend_little"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETEND_LITTLE)]
+  ""
+  "setend.l"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_break"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_BREAK)]
+  ""
+  "break\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_syscall"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_SYSCALL)]
+  ""
+  "syscall\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_nop"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NOP)]
+  ""
+  "nop"
+  [(set_attr "type" "misc")]
+)
+
+(define_expand "unspec_get_current_sp"
+  [(match_operand:SI 0 "register_operand" "")]
+  ""
+{
+  emit_move_insn (operands[0], gen_rtx_REG (SImode, SP_REGNUM));
+  DONE;
+})
+
+(define_expand "unspec_set_current_sp"
+  [(match_operand:SI 0 "register_operand" "")]
+  ""
+{
+  emit_move_insn (gen_rtx_REG (SImode, SP_REGNUM), operands[0]);
+  DONE;
+})
+
+(define_expand "unspec_return_address"
+  [(match_operand:SI 0 "register_operand" "")]
+  ""
+{
+  emit_move_insn (operands[0], gen_rtx_REG (SImode, LP_REGNUM));
+  DONE;
+})
+
+(define_insn "unspec_signature_begin"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SIGNATURE_BEGIN)]
+  ""
+  "isps"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "unspec_signature_end"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SIGNATURE_END)]
+  ""
+  "! -----\;.signature_end\;j8 2\;! -----"
+  [(set_attr "length" "2")]
+)
+
+;; Swap
+
+(define_insn "unspec_wsbh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_WSBH))]
+  ""
+  "wsbh\t%0, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+;; TLBOP Intrinsic
+
+(define_insn "unspec_tlbop_trd"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TRD)]
+  ""
+  "tlbop\t%0, TRD"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_twr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TWR)]
+  ""
+  "tlbop\t%0, TWR"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_rwr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWR)]
+  ""
+  "tlbop\t%0, RWR"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_rwlk"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWLK)]
+  ""
+  "tlbop\t%0, RWLK"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_unlk"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_UNLK)]
+  ""
+  "tlbop\t%0, UNLK"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_pb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_PB))]
+  ""
+  "tlbop\t%0, %1, PB"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_inv"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_INV)]
+  ""
+  "tlbop\t%0, INV"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_flua"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_TLBOP_FLUA)]
+  ""
+  "tlbop\tFLUA"
+  [(set_attr "type" "mmu")]
+)
+
+;;Unaligned Load/Store
+
+(define_expand "unaligned_load_hw"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(unspec:HI [(mem:HI (match_operand:SI 1 "register_operand" ""))] UNSPEC_UALOAD_HW))]
+  ""
+{
+  operands[0] = simplify_gen_subreg (SImode, operands[0],
+				     GET_MODE (operands[0]), 0);
+  if (TARGET_ISA_V3M)
+    {
+      nds32_expand_unaligned_load (operands, HImode);
+    }
+  else
+    {
+      emit_insn (gen_unaligned_load_w (operands[0],
+				       gen_rtx_MEM (SImode, operands[1])));
+
+      if (WORDS_BIG_ENDIAN)
+	emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT(16)));
+      else
+	emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0xffff)));
+    }
+
+  DONE;
+})
+
+(define_expand "unaligned_loadsi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))]
+  ""
+{
+  if (flag_unaligned_access)
+    {
+      rtx mem = gen_rtx_MEM (SImode, operands[1]);
+      emit_move_insn (operands[0], mem);
+    }
+  else
+    {
+      if (TARGET_ISA_V3M)
+	nds32_expand_unaligned_load (operands, SImode);
+      else
+	emit_insn (gen_unaligned_load_w (operands[0],
+					 gen_rtx_MEM (SImode, (operands[1]))));
+    }
+  DONE;
+})
+
+(define_insn "unaligned_load_w"
+  [(set (match_operand:SI 0 "register_operand"                       "=  r")
+	(unspec:SI [(match_operand:SI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))]
+  ""
+{
+  return nds32_output_lmw_single_word (operands);
+}
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "unaligned_loaddi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_DW))]
+  ""
+{
+  if (TARGET_ISA_V3M)
+    {
+      nds32_expand_unaligned_load (operands, DImode);
+    }
+  else
+    emit_insn (gen_unaligned_load_dw (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_load_dw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_DW))]
+  ""
+{
+  rtx otherops[3];
+  otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  otherops[2] = operands[1];
+
+  output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops);
+  return "";
+}
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "unaligned_store_hw"
+  [(set (mem:SI (match_operand:SI 0 "register_operand" ""))
+	(unspec:HI [(match_operand:HI 1 "register_operand" "")] UNSPEC_UASTORE_HW))]
+  ""
+{
+  operands[1] = simplify_gen_subreg (SImode, operands[1],
+				     GET_MODE (operands[1]), 0);
+  nds32_expand_unaligned_store (operands, HImode);
+  DONE;
+})
+
+(define_expand "unaligned_storesi"
+  [(set (mem:SI (match_operand:SI 0 "register_operand" "r"))
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_UASTORE_W))]
+  ""
+{
+  if (flag_unaligned_access)
+    {
+      rtx mem = gen_rtx_MEM (SImode, operands[0]);
+      emit_move_insn (mem, operands[1]);
+    }
+  else
+    {
+      if (TARGET_ISA_V3M)
+	nds32_expand_unaligned_store (operands, SImode);
+      else
+	emit_insn (gen_unaligned_store_w (gen_rtx_MEM (SImode, operands[0]),
+					  operands[1]));
+    }
+  DONE;
+})
+
+(define_insn "unaligned_store_w"
+  [(set (match_operand:SI 0 "nds32_lmw_smw_base_operand"   "=Umw")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "   r")] UNSPEC_UASTORE_W))]
+  ""
+{
+  return nds32_output_smw_single_word (operands);
+}
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_expand "unaligned_storedi"
+  [(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))]
+  ""
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_store (operands, DImode);
+  else
+    emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]),
+				       operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_store_dw"
+  [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand"   "=Umw")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "   r")] UNSPEC_UASTORE_DW))]
+  ""
+{
+  return nds32_output_smw_double_word (operands);
+}
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_expand "unspec_unaligned_feature"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE))]
+  ""
+{
+  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx temp2_reg = gen_reg_rtx (SImode);
+
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_move_insn (temp_reg, operands[0]);
+  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
+  emit_insn (gen_iorsi3 (operands[0], operands[0], temp2_reg));
+  emit_insn (gen_unspec_volatile_mtsr (operands[0], system_reg));
+  emit_insn (gen_unspec_dsb ());
+
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+
+  emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (8)));
+  emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
+  DONE;
+})
+
+(define_expand "unspec_enable_unaligned"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE)]
+  ""
+{
+  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx temp2_reg = gen_reg_rtx (SImode);
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
+  emit_insn (gen_iorsi3 (temp_reg, temp_reg, temp2_reg));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_disable_unaligned"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE)]
+  ""
+{
+  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx temp2_reg = gen_reg_rtx (SImode);
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
+  emit_insn (gen_one_cmplsi2 (temp2_reg, temp2_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, temp2_reg));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+;; abs alias kabs
+
+(define_insn "unspec_kabs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))]
+  ""
+  "kabs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "no_hwloop"
+  [(const_int 0)]
+  ""
+{
+  if (NDS32_HW_LOOP_P ())
+    emit_insn (gen_unspec_no_hwloop ());
+  else
+    emit_insn (gen_nop ());
+
+  DONE;
+})
+
+(define_insn "unspec_no_hwloop"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_NO_HWLOOP)]
+  ""
+  ""
+  [(set_attr "type" "misc")]
+)
 ;; ------------------------------------------------------------------------
diff --git a/gcc/config/nds32/nds32-isr.c b/gcc/config/nds32/nds32-isr.c
index 79be27e..be82609 100644
--- a/gcc/config/nds32/nds32-isr.c
+++ b/gcc/config/nds32/nds32-isr.c
@@ -24,11 +24,41 @@
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
-#include "target.h"
-#include "rtl.h"
 #include "tree.h"
-#include "diagnostic-core.h"
+#include "rtl.h"
+#include "df.h"
+#include "alias.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
 #include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "builtins.h"
 
 /* ------------------------------------------------------------------------ */
 
@@ -39,7 +69,260 @@
    We use an array to record essential information for each vector.  */
 static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS];
 
-/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------- */
+/* FIXME:
+   FOR BACKWARD COMPATIBILITY, we need to support following patterns:
+
+       __attribute__((interrupt("XXX;YYY;id=ZZZ")))
+       __attribute__((exception("XXX;YYY;id=ZZZ")))
+       __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
+
+   We provide several functions to parse the strings.  */
+
+static void
+nds32_interrupt_attribute_parse_string (const char *original_str,
+					const char *func_name,
+					unsigned int s_level)
+{
+  char target_str[100];
+  enum nds32_isr_save_reg save_reg;
+  enum nds32_isr_nested_type nested_type;
+
+  char *save_all_regs_str, *save_caller_regs_str;
+  char *nested_str, *not_nested_str, *ready_nested_str, *critical_str;
+  char *id_str, *value_str;
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+
+  /* 1. Detect 'save_all_regs'    : NDS32_SAVE_ALL
+	       'save_caller_regs' : NDS32_PARTIAL_SAVE */
+  save_all_regs_str    = strstr (target_str, "save_all_regs");
+  save_caller_regs_str = strstr (target_str, "save_caller_regs");
+
+  /* Note that if no argument is found,
+     use NDS32_PARTIAL_SAVE by default.  */
+  if (save_all_regs_str)
+    save_reg = NDS32_SAVE_ALL;
+  else if (save_caller_regs_str)
+    save_reg = NDS32_PARTIAL_SAVE;
+  else
+    save_reg = NDS32_PARTIAL_SAVE;
+
+  /* 2. Detect 'nested'       : NDS32_NESTED
+	       'not_nested'   : NDS32_NOT_NESTED
+	       'ready_nested' : NDS32_NESTED_READY
+	       'critical'     : NDS32_CRITICAL */
+  nested_str       = strstr (target_str, "nested");
+  not_nested_str   = strstr (target_str, "not_nested");
+  ready_nested_str = strstr (target_str, "ready_nested");
+  critical_str     = strstr (target_str, "critical");
+
+  /* Note that if no argument is found,
+     use NDS32_NOT_NESTED by default.
+     Also, since 'not_nested' and 'ready_nested' both contains
+     'nested' string, we check 'nested' with lowest priority.  */
+  if (not_nested_str)
+    nested_type = NDS32_NOT_NESTED;
+  else if (ready_nested_str)
+    nested_type = NDS32_NESTED_READY;
+  else if (nested_str)
+    nested_type = NDS32_NESTED;
+  else if (critical_str)
+    nested_type = NDS32_CRITICAL;
+  else
+    nested_type = NDS32_NOT_NESTED;
+
+  /* 3. Traverse each id value and set corresponding information.  */
+  id_str = strstr (target_str, "id=");
+
+  /* If user forgets to assign 'id', issue an error message.  */
+  if (id_str == NULL)
+    error ("require id argument in the string");
+  /* Extract the value_str first.  */
+  id_str    = strtok (id_str, "=");
+  value_str = strtok (NULL, ";");
+
+  /* Pick up the first id value token.  */
+  value_str = strtok (value_str, ",");
+  while (value_str != NULL)
+    {
+      int i;
+      i = atoi (value_str);
+
+      /* For interrupt(0..63), the actual vector number is (9..72).  */
+      i = i + 9;
+      if (i < 9 || i > 72)
+	error ("invalid id value for interrupt attribute");
+
+      /* Setup nds32_isr_vectors[] array.  */
+      nds32_isr_vectors[i].category = NDS32_ISR_INTERRUPT;
+      strcpy (nds32_isr_vectors[i].func_name, func_name);
+      nds32_isr_vectors[i].save_reg = save_reg;
+      nds32_isr_vectors[i].nested_type = nested_type;
+      nds32_isr_vectors[i].security_level = s_level;
+
+      /* Fetch next token.  */
+      value_str = strtok (NULL, ",");
+    }
+
+  return;
+}
+
+static void
+nds32_exception_attribute_parse_string (const char *original_str,
+					const char *func_name,
+					unsigned int s_level)
+{
+  char target_str[100];
+  enum nds32_isr_save_reg save_reg;
+  enum nds32_isr_nested_type nested_type;
+
+  char *save_all_regs_str, *save_caller_regs_str;
+  char *nested_str, *not_nested_str, *ready_nested_str, *critical_str;
+  char *id_str, *value_str;
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+
+  /* 1. Detect 'save_all_regs'    : NDS32_SAVE_ALL
+	       'save_caller_regs' : NDS32_PARTIAL_SAVE */
+  save_all_regs_str    = strstr (target_str, "save_all_regs");
+  save_caller_regs_str = strstr (target_str, "save_caller_regs");
+
+  /* Note that if no argument is found,
+     use NDS32_PARTIAL_SAVE by default.  */
+  if (save_all_regs_str)
+    save_reg = NDS32_SAVE_ALL;
+  else if (save_caller_regs_str)
+    save_reg = NDS32_PARTIAL_SAVE;
+  else
+    save_reg = NDS32_PARTIAL_SAVE;
+
+  /* 2. Detect 'nested'       : NDS32_NESTED
+	       'not_nested'   : NDS32_NOT_NESTED
+	       'ready_nested' : NDS32_NESTED_READY
+	       'critical'     : NDS32_CRITICAL */
+  nested_str       = strstr (target_str, "nested");
+  not_nested_str   = strstr (target_str, "not_nested");
+  ready_nested_str = strstr (target_str, "ready_nested");
+  critical_str     = strstr (target_str, "critical");
+
+  /* Note that if no argument is found,
+     use NDS32_NOT_NESTED by default.
+     Also, since 'not_nested' and 'ready_nested' both contains
+     'nested' string, we check 'nested' with lowest priority.  */
+  if (not_nested_str)
+    nested_type = NDS32_NOT_NESTED;
+  else if (ready_nested_str)
+    nested_type = NDS32_NESTED_READY;
+  else if (nested_str)
+    nested_type = NDS32_NESTED;
+  else if (critical_str)
+    nested_type = NDS32_CRITICAL;
+  else
+    nested_type = NDS32_NOT_NESTED;
+
+  /* 3. Traverse each id value and set corresponding information.  */
+  id_str = strstr (target_str, "id=");
+
+  /* If user forgets to assign 'id', issue an error message.  */
+  if (id_str == NULL)
+    error ("require id argument in the string");
+  /* Extract the value_str first.  */
+  id_str    = strtok (id_str, "=");
+  value_str = strtok (NULL, ";");
+
+  /* Pick up the first id value token.  */
+  value_str = strtok (value_str, ",");
+  while (value_str != NULL)
+    {
+      int i;
+      i = atoi (value_str);
+
+      /* For exception(1..8), the actual vector number is (1..8).  */
+      if (i < 1 || i > 8)
+	error ("invalid id value for exception attribute");
+
+      /* Setup nds32_isr_vectors[] array.  */
+      nds32_isr_vectors[i].category = NDS32_ISR_EXCEPTION;
+      strcpy (nds32_isr_vectors[i].func_name, func_name);
+      nds32_isr_vectors[i].save_reg = save_reg;
+      nds32_isr_vectors[i].nested_type = nested_type;
+      nds32_isr_vectors[i].security_level = s_level;
+
+      /* Fetch next token.  */
+      value_str = strtok (NULL, ",");
+    }
+
+  return;
+}
+
+static void
+nds32_reset_attribute_parse_string (const char *original_str,
+				    const char *func_name)
+{
+  char target_str[100];
+  char *vectors_str, *nmi_str, *warm_str, *value_str;
+
+  /* Deal with reset attribute.  Its vector number is always 0.  */
+  nds32_isr_vectors[0].category = NDS32_ISR_RESET;
+
+
+  /* 1. Parse 'vectors=XXXX'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  vectors_str = strstr (target_str, "vectors=");
+  /* The total vectors = interrupt + exception numbers + reset.
+     There are 8 exception and 1 reset in nds32 architecture.
+     If user forgets to assign 'vectors', user default 16 interrupts.  */
+  if (vectors_str != NULL)
+    {
+      /* Extract the value_str.  */
+      vectors_str = strtok (vectors_str, "=");
+      value_str  = strtok (NULL, ";");
+      nds32_isr_vectors[0].total_n_vectors = atoi (value_str) + 8 + 1;
+    }
+  else
+    nds32_isr_vectors[0].total_n_vectors = 16 + 8 + 1;
+  strcpy (nds32_isr_vectors[0].func_name, func_name);
+
+
+  /* 2. Parse 'nmi_func=YYYY'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  nmi_str = strstr (target_str, "nmi_func=");
+  if (nmi_str != NULL)
+    {
+      /* Extract the value_str.  */
+      nmi_str = strtok (nmi_str, "=");
+      value_str  = strtok (NULL, ";");
+      strcpy (nds32_isr_vectors[0].nmi_name, value_str);
+    }
+
+  /* 3. Parse 'warm_func=ZZZZ'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  warm_str = strstr (target_str, "warm_func=");
+  if (warm_str != NULL)
+    {
+      /* Extract the value_str.  */
+      warm_str = strtok (warm_str, "=");
+      value_str  = strtok (NULL, ";");
+      strcpy (nds32_isr_vectors[0].warm_name, value_str);
+    }
+
+  return;
+}
+/* ------------------------------------------------------------- */
 
 /* A helper function to emit section head template.  */
 static void
@@ -75,6 +358,15 @@ nds32_emit_isr_jmptbl_section (int vector_id)
   char section_name[100];
   char symbol_name[100];
 
+  /* A critical isr does not need jump table section because
+     its behavior is not performed by two-level handler.  */
+  if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL)
+    {
+      fprintf (asm_out_file, "\t! The vector %02d is a critical isr !\n",
+			     vector_id);
+      return;
+    }
+
   /* Prepare jmptbl section and symbol name.  */
   snprintf (section_name, sizeof (section_name),
 	    ".nds32_jmptbl.%02d", vector_id);
@@ -95,7 +387,6 @@ nds32_emit_isr_vector_section (int vector_id)
   const char *c_str = "CATEGORY";
   const char *sr_str = "SR";
   const char *nt_str = "NT";
-  const char *vs_str = "VS";
   char first_level_handler_name[100];
   char section_name[100];
   char symbol_name[100];
@@ -143,46 +434,63 @@ nds32_emit_isr_vector_section (int vector_id)
     case NDS32_NESTED_READY:
       nt_str = "nr";
       break;
+    case NDS32_CRITICAL:
+      /* The critical isr is not performed by two-level handler.  */
+      nt_str = "";
+      break;
     }
 
-  /* Currently we have 4-byte or 16-byte size for each vector.
-     If it is 4-byte, the first level handler name has suffix string "_4b".  */
-  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
-
   /* Now we can create first level handler name.  */
-  snprintf (first_level_handler_name, sizeof (first_level_handler_name),
-	    "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str);
+  if (nds32_isr_vectors[vector_id].security_level == 0)
+    {
+      /* For security level 0, use normal first level handler name.  */
+      snprintf (first_level_handler_name, sizeof (first_level_handler_name),
+		"_nds32_%s_%s_%s", c_str, sr_str, nt_str);
+    }
+  else
+    {
+      /* For security level 1-3, use corresponding spl_1, spl_2, or spl_3.  */
+      snprintf (first_level_handler_name, sizeof (first_level_handler_name),
+		"_nds32_spl_%d", nds32_isr_vectors[vector_id].security_level);
+    }
 
   /* Prepare vector section and symbol name.  */
   snprintf (section_name, sizeof (section_name),
 	    ".nds32_vector.%02d", vector_id);
   snprintf (symbol_name, sizeof (symbol_name),
-	    "_nds32_vector_%02d%s", vector_id, vs_str);
+	    "_nds32_vector_%02d", vector_id);
 
 
   /* Everything is ready.  We can start emit vector section content.  */
   nds32_emit_section_head_template (section_name, symbol_name,
 				    floor_log2 (nds32_isr_vector_size), false);
 
-  /* According to the vector size, the instructions in the
-     vector section may be different.  */
-  if (nds32_isr_vector_size == 4)
+  /* First we check if it is a critical isr.
+     If so, jump to user handler directly; otherwise, the instructions
+     in the vector section may be different according to the vector size.  */
+  if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL)
+    {
+      /* This block is for critical isr.  Jump to user handler directly.  */
+      fprintf (asm_out_file, "\tj\t%s ! jump to user handler directly\n",
+			     nds32_isr_vectors[vector_id].func_name);
+    }
+  else if (nds32_isr_vector_size == 4)
     {
       /* This block is for 4-byte vector size.
-         Hardware $VID support is necessary and only one instruction
-         is needed in vector section.  */
+	 Hardware $VID support is necessary and only one instruction
+	 is needed in vector section.  */
       fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n",
 			     first_level_handler_name);
     }
   else
     {
       /* This block is for 16-byte vector size.
-         There is NO hardware $VID so that we need several instructions
-         such as pushing GPRs and preparing software vid at vector section.
-         For pushing GPRs, there are four variations for
-         16-byte vector content and we have to handle each combination.
-         For preparing software vid, note that the vid need to
-         be substracted vector_number_offset.  */
+	 There is NO hardware $VID so that we need several instructions
+	 such as pushing GPRs and preparing software vid at vector section.
+	 For pushing GPRs, there are four variations for
+	 16-byte vector content and we have to handle each combination.
+	 For preparing software vid, note that the vid need to
+	 be substracted vector_number_offset.  */
       if (TARGET_REDUCED_REGS)
 	{
 	  if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL)
@@ -235,13 +543,11 @@ nds32_emit_isr_reset_content (void)
 {
   unsigned int i;
   unsigned int total_n_vectors;
-  const char *vs_str;
   char reset_handler_name[100];
   char section_name[100];
   char symbol_name[100];
 
   total_n_vectors = nds32_isr_vectors[0].total_n_vectors;
-  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
 
   fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n");
 
@@ -257,7 +563,7 @@ nds32_emit_isr_reset_content (void)
   /* Emit vector references.  */
   fprintf (asm_out_file, "\t ! references to vector section entries\n");
   for (i = 0; i < total_n_vectors; i++)
-    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str);
+    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d\n", i);
 
   /* Emit jmptbl_00 section.  */
   snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00");
@@ -271,9 +577,9 @@ nds32_emit_isr_reset_content (void)
 
   /* Emit vector_00 section.  */
   snprintf (section_name, sizeof (section_name), ".nds32_vector.00");
-  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str);
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00");
   snprintf (reset_handler_name, sizeof (reset_handler_name),
-	    "_nds32_reset%s", vs_str);
+	    "_nds32_reset");
 
   fprintf (asm_out_file, "\t! ....................................\n");
   nds32_emit_section_head_template (section_name, symbol_name,
@@ -319,12 +625,12 @@ void
 nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
 {
   int save_all_p, partial_save_p;
-  int nested_p, not_nested_p, nested_ready_p;
+  int nested_p, not_nested_p, nested_ready_p, critical_p;
   int intr_p, excp_p, reset_p;
 
   /* Initialize variables.  */
   save_all_p = partial_save_p = 0;
-  nested_p = not_nested_p = nested_ready_p = 0;
+  nested_p = not_nested_p = nested_ready_p = critical_p = 0;
   intr_p = excp_p = reset_p = 0;
 
   /* We must check at MOST one attribute to set save-reg.  */
@@ -343,8 +649,10 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
     not_nested_p = 1;
   if (lookup_attribute ("nested_ready", func_attrs))
     nested_ready_p = 1;
+  if (lookup_attribute ("critical", func_attrs))
+    critical_p = 1;
 
-  if ((nested_p + not_nested_p + nested_ready_p) > 1)
+  if ((nested_p + not_nested_p + nested_ready_p + critical_p) > 1)
     error ("multiple nested types attributes to function %qD", func_decl);
 
   /* We must check at MOST one attribute to
@@ -358,6 +666,17 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
 
   if ((intr_p + excp_p + reset_p) > 1)
     error ("multiple interrupt attributes to function %qD", func_decl);
+
+  /* Do not allow isr attributes under linux toolchain.  */
+  if (TARGET_LINUX_ABI && intr_p)
+      error ("cannot use interrupt attributes to function %qD "
+	     "under linux toolchain", func_decl);
+  if (TARGET_LINUX_ABI && excp_p)
+      error ("cannot use exception attributes to function %qD "
+	     "under linux toolchain", func_decl);
+  if (TARGET_LINUX_ABI && reset_p)
+      error ("cannot use reset attributes to function %qD "
+	     "under linux toolchain", func_decl);
 }
 
 /* Function to construct isr vectors information array.
@@ -369,15 +688,21 @@ nds32_construct_isr_vectors_information (tree func_attrs,
 					 const char *func_name)
 {
   tree save_all, partial_save;
-  tree nested, not_nested, nested_ready;
+  tree nested, not_nested, nested_ready, critical;
   tree intr, excp, reset;
 
+  tree secure;
+  tree security_level_list;
+  tree security_level;
+  unsigned int s_level;
+
   save_all     = lookup_attribute ("save_all", func_attrs);
   partial_save = lookup_attribute ("partial_save", func_attrs);
 
   nested       = lookup_attribute ("nested", func_attrs);
   not_nested   = lookup_attribute ("not_nested", func_attrs);
   nested_ready = lookup_attribute ("nested_ready", func_attrs);
+  critical     = lookup_attribute ("critical", func_attrs);
 
   intr  = lookup_attribute ("interrupt", func_attrs);
   excp  = lookup_attribute ("exception", func_attrs);
@@ -387,6 +712,63 @@ nds32_construct_isr_vectors_information (tree func_attrs,
   if (!intr && !excp && !reset)
     return;
 
+  /* At first, we need to retrieve security level.  */
+  secure = lookup_attribute ("secure", func_attrs);
+  if (secure != NULL)
+    {
+      security_level_list = TREE_VALUE (secure);
+      security_level = TREE_VALUE (security_level_list);
+      s_level = TREE_INT_CST_LOW (security_level);
+    }
+  else
+    {
+      /* If there is no secure attribute, the security level is set by
+	 nds32_isr_secure_level, which is controlled by -misr-secure=X option.
+	 By default nds32_isr_secure_level should be 0.  */
+      s_level = nds32_isr_secure_level;
+    }
+
+  /* ------------------------------------------------------------- */
+  /* FIXME:
+     FOR BACKWARD COMPATIBILITY, we need to support following patterns:
+
+	 __attribute__((interrupt("XXX;YYY;id=ZZZ")))
+	 __attribute__((exception("XXX;YYY;id=ZZZ")))
+	 __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
+
+     If interrupt/exception/reset appears and its argument is a
+     STRING_CST, we will parse string with some auxiliary functions
+     which set necessary isr information in the nds32_isr_vectors[] array.
+     After that, we can return immediately to avoid new-syntax isr
+     information construction.  */
+  if (intr != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (intr));
+      nds32_interrupt_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					      func_name,
+					      s_level);
+      return;
+    }
+  if (excp != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (excp));
+      nds32_exception_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					      func_name,
+					      s_level);
+      return;
+    }
+  if (reset != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (reset));
+      nds32_reset_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					  func_name);
+      return;
+    }
+  /* ------------------------------------------------------------- */
+
   /* If we are here, either we have interrupt/exception,
      or reset attribute.  */
   if (intr || excp)
@@ -413,6 +795,9 @@ nds32_construct_isr_vectors_information (tree func_attrs,
 	  /* Add vector_number_offset to get actual vector number.  */
 	  vector_id = TREE_INT_CST_LOW (id) + vector_number_offset;
 
+	  /* Set security level.  */
+	  nds32_isr_vectors[vector_id].security_level = s_level;
+
 	  /* Enable corresponding vector and set function name.  */
 	  nds32_isr_vectors[vector_id].category = (intr)
 						  ? (NDS32_ISR_INTERRUPT)
@@ -432,6 +817,8 @@ nds32_construct_isr_vectors_information (tree func_attrs,
 	    nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED;
 	  else if (nested_ready)
 	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY;
+	  else if (critical)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_CRITICAL;
 
 	  /* Advance to next id.  */
 	  id_list = TREE_CHAIN (id_list);
@@ -447,12 +834,12 @@ nds32_construct_isr_vectors_information (tree func_attrs,
       nds32_isr_vectors[0].category = NDS32_ISR_RESET;
 
       /* Prepare id_list and identify id value so that
-         we can set total number of vectors.  */
+	 we can set total number of vectors.  */
       id_list = TREE_VALUE (reset);
       id = TREE_VALUE (id_list);
 
       /* The total vectors = interrupt + exception numbers + reset.
-         There are 8 exception and 1 reset in nds32 architecture.  */
+	 There are 8 exception and 1 reset in nds32 architecture.  */
       nds32_isr_vectors[0].total_n_vectors = TREE_INT_CST_LOW (id) + 8 + 1;
       strcpy (nds32_isr_vectors[0].func_name, func_name);
 
@@ -488,7 +875,6 @@ nds32_construct_isr_vectors_information (tree func_attrs,
     }
 }
 
-/* A helper function to handle isr stuff at the beginning of asm file.  */
 void
 nds32_asm_file_start_for_isr (void)
 {
@@ -501,15 +887,14 @@ nds32_asm_file_start_for_isr (void)
       strcpy (nds32_isr_vectors[i].func_name, "");
       nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE;
       nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED;
+      nds32_isr_vectors[i].security_level = 0;
       nds32_isr_vectors[i].total_n_vectors = 0;
       strcpy (nds32_isr_vectors[i].nmi_name, "");
       strcpy (nds32_isr_vectors[i].warm_name, "");
     }
 }
 
-/* A helper function to handle isr stuff at the end of asm file.  */
-void
-nds32_asm_file_end_for_isr (void)
+void nds32_asm_file_end_for_isr (void)
 {
   int i;
 
@@ -543,6 +928,8 @@ nds32_asm_file_end_for_isr (void)
 	  /* Found one vector which is interupt or exception.
 	     Output its jmptbl and vector section content.  */
 	  fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i);
+	  fprintf (asm_out_file, "\t! security level: %d\n",
+		   nds32_isr_vectors[i].security_level);
 	  fprintf (asm_out_file, "\t! ------------------------------------\n");
 	  nds32_emit_isr_jmptbl_section (i);
 	  fprintf (asm_out_file, "\t! ....................................\n");
@@ -576,4 +963,65 @@ nds32_isr_function_p (tree func)
 	  || (t_reset != NULL_TREE));
 }
 
-/* ------------------------------------------------------------------------ */
+/* Return true if FUNC is a isr function with critical attribute.  */
+bool
+nds32_isr_function_critical_p (tree func)
+{
+  tree t_intr;
+  tree t_excp;
+  tree t_critical;
+
+  tree attrs;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    abort ();
+
+  attrs = DECL_ATTRIBUTES (func);
+
+  t_intr  = lookup_attribute ("interrupt", attrs);
+  t_excp  = lookup_attribute ("exception", attrs);
+
+  t_critical = lookup_attribute ("critical", attrs);
+
+  /* If both interrupt and exception attribute does not appear,
+     we can return false immediately.  */
+  if ((t_intr == NULL_TREE) && (t_excp == NULL_TREE))
+    return false;
+
+  /* Here we can guarantee either interrupt or ecxception attribute
+     does exist, so further check critical attribute.
+     If it also appears, we can return true.  */
+  if (t_critical != NULL_TREE)
+    return true;
+
+  /* ------------------------------------------------------------- */
+  /* FIXME:
+     FOR BACKWARD COMPATIBILITY, we need to handle string type.
+     If the string 'critical' appears in the interrupt/exception
+     string argument, we can return true.  */
+  if (t_intr != NULL_TREE || t_excp != NULL_TREE)
+    {
+      char target_str[100];
+      char *critical_str;
+      tree t_check;
+      tree string_arg;
+
+      t_check = t_intr ? t_intr : t_excp;
+      if (TREE_CODE (TREE_VALUE (TREE_VALUE (t_check))) == STRING_CST)
+	{
+	  string_arg = TREE_VALUE (TREE_VALUE (t_check));
+	  strcpy (target_str, TREE_STRING_POINTER (string_arg));
+	  critical_str = strstr (target_str, "critical");
+
+	  /* Found 'critical' string, so return true.  */
+	  if (critical_str)
+	    return true;
+	}
+    }
+  /* ------------------------------------------------------------- */
+
+  /* Other cases, this isr function is not critical type.  */
+  return false;
+}
+
+/* ------------------------------------------------------------- */
diff --git a/gcc/config/nds32/nds32-linux.opt b/gcc/config/nds32/nds32-linux.opt
new file mode 100644
index 0000000..75ccd76
--- /dev/null
+++ b/gcc/config/nds32/nds32-linux.opt
@@ -0,0 +1,16 @@
+mcmodel=
+Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE)
+Specify the address generation strategy for code model.
+
+Enum
+Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
+Known cmodel types (for use with the -mcmodel= option):
+
+EnumValue
+Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
+
+EnumValue
+Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
+
+EnumValue
+Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
diff --git a/gcc/config/nds32/nds32-lmwsmw.c b/gcc/config/nds32/nds32-lmwsmw.c
new file mode 100644
index 0000000..e3b66bf
--- /dev/null
+++ b/gcc/config/nds32/nds32-lmwsmw.c
@@ -0,0 +1,1998 @@
+
+/* lmwsmw pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+/* ------------------------------------------------------------------------ */
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "vec.h"
+#include "double-int.h"
+#include "input.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "input.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "predict.h"
+#include "basic-block.h"
+#include "bitmap.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+#include "tree-pass.h"
+#include "target-globals.h"
+#include "ira.h"
+#include "ira-int.h"
+#include "regrename.h"
+#include "nds32-load-store-opt.h"
+#include "nds32-reg-utils.h"
+#include 
+#include 
+#include 
+
+#define NDS32_GPR_NUM 32
+
+static int
+compare_order (const void *a, const void *b)
+{
+  const load_store_info_t *fp1 = (const load_store_info_t *) a;
+  const load_store_info_t *fp2 = (const load_store_info_t *) b;
+  const load_store_info_t f1 = *fp1;
+  const load_store_info_t f2 = *fp2;
+
+  return f1.order < f2.order ? -1 : 1;
+}
+
+static int
+compare_offset (const void *a, const void *b)
+{
+  const load_store_info_t *fp1 = (const load_store_info_t *) a;
+  const load_store_info_t *fp2 = (const load_store_info_t *) b;
+  const load_store_info_t f1 = *fp1;
+  const load_store_info_t f2 = *fp2;
+
+  return f1.offset < f2.offset ? -1 : 1;
+}
+
+static bool
+compare_amount(available_reg_info_t a, available_reg_info_t b)
+{
+    return a.amount > b.amount;
+}
+
+static bool
+nds32_load_store_reg_plus_offset (rtx_insn *insn, load_store_info_t *load_store_info)
+{
+  rtx pattern, mem, reg, base_reg, addr;
+  HOST_WIDE_INT offset;
+  bool load_p;
+  enum nds32_memory_post_type post_type = NDS32_NONE;
+
+  pattern = PATTERN (insn);
+  mem = NULL_RTX;
+  reg = NULL_RTX;
+  base_reg = NULL_RTX;
+  offset = 0;
+  load_p = false;
+
+  if (GET_CODE (pattern) != SET)
+    return false;
+
+  if (MEM_P (SET_SRC (pattern)))
+    {
+      mem = SET_SRC (pattern);
+      reg = SET_DEST (pattern);
+      load_p = true;
+    }
+
+  if (MEM_P (SET_DEST (pattern)))
+    {
+      mem = SET_DEST (pattern);
+      reg = SET_SRC (pattern);
+      load_p = false;
+    }
+
+  if (mem == NULL_RTX || reg == NULL_RTX || !REG_P (reg))
+    return false;
+
+  /* The FPU ISA has not load-store-multiple instruction.  */
+  if (!NDS32_IS_GPR_REGNUM (REGNO (reg)))
+    return false;
+
+  if (MEM_VOLATILE_P (mem))
+    return false;
+
+  if (GET_MODE (reg) != SImode)
+    return false;
+
+  gcc_assert (REG_P (reg));
+
+  addr = XEXP (mem, 0);
+
+  /* We only care about [reg] and [reg+const].  */
+  if (REG_P (addr))
+    {
+      base_reg = addr;
+      offset = 0;
+    }
+  else if (GET_CODE (addr) == PLUS
+	   && CONST_INT_P (XEXP (addr, 1)))
+    {
+      base_reg = XEXP (addr, 0);
+      offset = INTVAL (XEXP (addr, 1));
+      if (!REG_P (base_reg))
+	return false;
+    }
+  else if (GET_CODE (addr) == POST_INC)
+    {
+      base_reg = XEXP (addr, 0);
+      offset = 0;
+      post_type = NDS32_POST_INC;
+    }
+  else if (GET_CODE (addr) == POST_DEC)
+    {
+      base_reg = XEXP (addr, 0);
+      offset = 0;
+      post_type = NDS32_POST_DEC;
+    }
+  else
+    return false;
+
+  if ((REGNO (base_reg) > NDS32_LAST_GPR_REGNUM)
+      && (REGNO (base_reg) < FIRST_PSEUDO_REGISTER))
+    return false;
+
+  if (load_store_info)
+    {
+      load_store_info->load_p   = load_p;
+      load_store_info->offset   = offset;
+      load_store_info->reg      = reg;
+      load_store_info->base_reg = base_reg;
+      load_store_info->insn     = insn;
+      load_store_info->mem      = mem;
+      load_store_info->post_type = post_type;
+    }
+
+  return true;
+}
+
+static bool
+nds32_insn_alias_p (rtx memref, rtx x)
+{
+  rtx mem;
+
+  if (GET_CODE (x) == PARALLEL)
+    {
+      int i, j;
+
+      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+	{
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (nds32_insn_alias_p (memref, XVECEXP (x, i, j)))
+	      return true;
+	}
+
+      return false;
+    }
+
+  if (GET_CODE (x) != SET)
+    return true;
+
+  if (MEM_P (SET_SRC (x)))
+    mem = SET_SRC (x);
+  else if (MEM_P (SET_DEST (x)))
+    mem = SET_DEST (x);
+  else
+    return false;
+
+  if (may_alias_p (memref, mem))
+    return true;
+  else
+    return false;
+}
+
+static void
+nds32_emit_multiple_insn (load_store_infos_t *multiple_insn,
+			  rtx base_reg, rtx place, bool update_p)
+{
+  unsigned int i;
+  unsigned int num_use_regs = multiple_insn->length ();
+  int par_index = 0;
+  int offset = 0;
+  bool load_p = (*multiple_insn)[0].load_p;
+
+  rtx reg;
+  rtx mem;
+  rtx push_rtx;
+  rtx update_offset;
+  rtx parallel_insn;
+
+  /* In addition to used registers,
+     we need one more space for (set base base-x) rtx.  */
+  if (update_p)
+    num_use_regs++;
+
+  parallel_insn = gen_rtx_PARALLEL (VOIDmode,
+				    rtvec_alloc (num_use_regs));
+
+  /* Set update insn.  */
+    if (update_p)
+      {
+	update_offset = GEN_INT (multiple_insn->length () * 4);
+	push_rtx = gen_addsi3 (base_reg, base_reg, update_offset);
+	XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+	par_index++;
+      }
+
+  /* Create (set mem regX) from start_reg to end_reg.  */
+  for (i = 0; i < multiple_insn->length (); ++i)
+    {
+      reg = (*multiple_insn)[i].reg;
+      mem = gen_frame_mem (SImode, plus_constant (Pmode,
+						  base_reg,
+						  offset));
+      MEM_COPY_ATTRIBUTES (mem, (*multiple_insn)[i].mem);
+
+      if (load_p)
+	push_rtx = gen_rtx_SET (reg, mem);
+      else
+	push_rtx = gen_rtx_SET (mem, reg);
+
+      XVECEXP (parallel_insn, 0, par_index) = push_rtx;
+      offset = offset + 4;
+      par_index++;
+    }
+
+  emit_insn_before (parallel_insn, place);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "lmw/smw instruction:\n");
+      print_rtl_single (dump_file, parallel_insn);
+    }
+}
+
+static void
+nds32_emit_add_insn (load_store_info_t insn, rtx base_reg,
+		     rtx place, bool add_p)
+{
+  rtx add_insn;
+  HOST_WIDE_INT offset = insn.offset;
+  if (!add_p)
+    offset = -offset;
+
+  add_insn = gen_addsi3 (base_reg, insn.base_reg, GEN_INT (offset));
+  emit_insn_before (add_insn, place);
+}
+
+/* Get the instruction of same ID.  */
+static void
+nds32_fetch_group_insn (load_store_infos_t *src,
+			load_store_infos_t *dst, int id)
+{
+  unsigned int i = 0;
+
+  while (i < src->length ())
+    {
+      if (id == (*src)[i].group)
+	{
+	  dst->safe_push ((*src)[i]);
+	  src->ordered_remove (i);
+	  i = 0;
+	}
+      else
+	i++;
+    }
+}
+
+/* Check registers are not used and defined.  */
+static rtx
+nds32_lmwsmw_insert_place (load_store_infos_t *insn_set)
+{
+  unsigned int i, position;
+  bool combine_p;
+  rtx_insn *insn;
+  auto_vec temp_set;
+
+  for (i = 0; i < insn_set->length (); i++)
+    temp_set.safe_push ((*insn_set)[i]);
+
+  /* Check registers are not used and defined
+     between first instruction and last instruction,
+     and find insert lmw/smw instruction place.
+       example:
+	 lwi $r0, [$r2 + 4]
+	 lwi $r1, [$r2 + 8]
+
+     Check $r0 and $r1 are not used and defined.  */
+  temp_set.qsort (compare_order);
+
+  for (position = 0; position < temp_set.length (); ++position)
+    {
+      combine_p = true;
+
+      /* Check instruction form first instruction to position.  */
+      for (i = 0; i < position; i++)
+	{
+	  for (insn = NEXT_INSN (temp_set[i].insn);
+	       insn != temp_set[position].insn;
+	       insn = NEXT_INSN (insn))
+	    {
+	      if (!NONDEBUG_INSN_P (insn))
+		continue;
+	      if (df_reg_used (insn, temp_set[i].reg)
+		  || df_reg_defined (insn, temp_set[i].reg))
+		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "Fail:register has modify\n");
+		      fprintf (dump_file, "insn uid:%d, reg: r%d,\n",
+			       INSN_UID (temp_set[position].insn),
+			       REGNO (temp_set[position].reg));
+		      fprintf (dump_file, "Modify instruction:\n");
+		      print_rtl_single (dump_file, insn);
+		    }
+		  combine_p = false;
+		  break;
+		}
+	    }
+	}
+
+      /* Check instruction form position to last instruction.  */
+      for (i = position + 1; i < temp_set.length (); i++)
+	{
+	  for (insn = temp_set[position].insn;
+	       insn != temp_set[i].insn;
+	       insn = NEXT_INSN (insn))
+	    {
+	      if (!NONDEBUG_INSN_P (insn))
+		continue;
+	      if (df_reg_used (insn, temp_set[i].reg)
+		  || df_reg_defined (insn, temp_set[i].reg))
+		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "Fail:register has modify\n");
+		      fprintf (dump_file, "insn uid:%d, reg: r%d,\n",
+			       INSN_UID (temp_set[position].insn),
+			       REGNO (temp_set[position].reg));
+		      fprintf (dump_file, "Modify instruction:\n");
+		      print_rtl_single (dump_file, insn);
+		    }
+		  combine_p = false;
+		  break;
+		}
+	    }
+	}
+
+      if (combine_p)
+	return temp_set[position].insn;
+    }
+
+  return NULL_RTX;
+}
+
+/* Check registers are not used and defined.  */
+static bool
+nds32_base_reg_safe_p (load_store_infos_t *insn_set)
+{
+  unsigned int i;
+  rtx_insn *insn;
+  auto_vec temp_set;
+
+  /* We will change 'insn_set' element order,
+     to avoid change order using 'temp_set'.  */
+  for (i = 0; i < insn_set->length (); i++)
+    temp_set.safe_push ((*insn_set)[i]);
+
+  /* We want to combine load and store instructions,
+     need to check base register is not used and defined
+     between first insn and last insn.
+     example:
+       lwi $r0, [$r3 + 4]
+	    ...		  <- check here
+       lwi $r1, [$r3 + 8]
+	    ...		  <- check here
+       lwi $r2, [$r3 + 12]
+
+     Check $r3 is not used and defined,
+     between first insn and last insn.  */
+
+  /* Scan instruction from top to bottom,
+     so need to sort by order.  */
+  temp_set.qsort (compare_order);
+
+  for (i = 0; i < temp_set.length () - 1; ++i)
+    {
+      for (insn = NEXT_INSN (temp_set[i].insn);
+	   insn != temp_set[i + 1].insn;
+	   insn = NEXT_INSN (insn))
+	{
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  if (nds32_insn_alias_p (temp_set[0].mem, PATTERN (insn)))
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "Memory alias:\n");
+		  print_rtl_single (dump_file, insn);
+		}
+	      return false;
+	    }
+
+	  if (temp_set[0].load_p)
+ 	    {
+	      if (df_reg_defined (insn, temp_set[0].base_reg))
+ 		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "Fail: base register has modify\n");
+		      fprintf (dump_file, "insn uid:%d, base reg: r%d,\n",
+			       INSN_UID (temp_set[i].insn),
+			       REGNO (temp_set[i].reg));
+		      fprintf (dump_file, "Modify instruction:\n");
+		      print_rtl_single (dump_file, insn);
+		    }
+		  return false;
+		}
+	    }
+	  else
+	    {
+	      if (df_reg_used (insn, temp_set[0].base_reg))
+		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "Fail: base register has modify\n");
+		      fprintf (dump_file, "insn uid:%d, base reg: r%d,\n",
+			       INSN_UID (temp_set[i].insn),
+			       REGNO (temp_set[i].reg));
+		      fprintf (dump_file, "Modify instruction:\n");
+		      print_rtl_single (dump_file, insn);
+		    }
+		  return false;
+ 		}
+ 	    }
+	}
+    }
+  return true;
+}
+
+static bool
+nds32_gain_size_p (load_store_infos_t *insn, bool new_base_p)
+{
+  unsigned int i, new_cost = 4, old_cost = 0;
+  rtx reg;
+  rtx base_reg = (*insn)[0].base_reg;
+  HOST_WIDE_INT offset;
+
+  for (i = 0; i < insn->length (); ++i)
+    {
+      reg = (*insn)[i].reg;
+      offset = (*insn)[i].offset;
+
+      if (in_reg_class_p (reg, LOW_REGS))
+	{
+	  /* lwi37.sp/swi37.sp/lwi37/swi37 */
+	  if ((REGNO (base_reg) == SP_REGNUM
+	      || REGNO (base_reg) == FP_REGNUM)
+	      && (offset >= 0 && offset < 512 && (offset % 4 == 0)))
+	    old_cost += 2;
+	  /* lwi333/swi333 */
+	  else if (in_reg_class_p (base_reg, LOW_REGS)
+		   && (offset >= 0 && offset < 32 && (offset % 4 == 0)))
+	    old_cost += 2;
+	  else
+	    old_cost += 4;
+        }
+      else
+	{
+	  /* lwi450/swi450 */
+	  if (in_reg_class_p (reg, MIDDLE_REGS)
+	      && offset == 0)
+	    old_cost += 2;
+	  else
+	    old_cost += 4;
+	}
+    }
+
+  offset = (*insn)[0].offset;
+  if (offset != 0)
+    {
+      /* addi333 */
+      if (in_reg_class_p (base_reg, LOW_REGS)
+	  && satisfies_constraint_Iu05 (GEN_INT (offset)))
+	new_cost += 2;
+      /* addi45 */
+      else if (in_reg_class_p (base_reg, MIDDLE_REGS)
+	       && satisfies_constraint_Iu05 (GEN_INT (offset)))
+	new_cost += 2;
+      else
+	new_cost += 4;
+
+      /* subri */
+      if (!new_base_p)
+	new_cost += 4;
+    }
+
+  if (dump_file)
+    fprintf (dump_file, "Code size compare: old code size is %d,"
+			" new code size is %d\n", old_cost, new_cost);
+
+  return new_cost < old_cost;
+}
+
+static bool
+nds32_gain_speed_p (load_store_infos_t *insn, bool new_base_p)
+{
+  unsigned int new_cost = 0, old_cost = insn->length ();
+
+  if (TARGET_PIPELINE_GRAYWOLF)
+    {
+      new_cost = insn->length () / 2 + insn->length () % 2;
+
+      if ((*insn)[0].offset != 0)
+	{
+	  /* Need addi instruction. */
+	  new_cost += 1;
+
+	  /* Need subri instruction. */
+	  if (!new_base_p)
+	    new_cost += 1;
+	}
+    }
+  else
+    {
+      if ((*insn)[0].offset != 0)
+	return false;
+    }
+
+  return new_cost < old_cost;
+}
+
+/* Check instructions can combine into a mulitple-instruction.  */
+static bool
+nds32_combine_multiple_p (load_store_infos_t *insn_set, bool new_base_p)
+{
+  unsigned int i;
+  auto_vec temp_set;
+
+  /* We will change 'insn_set' element order,
+     to avoid change order using 'temp_set'.  */
+  for (i = 0; i < insn_set->length (); i++)
+    temp_set.safe_push ((*insn_set)[i]);
+
+  /* Check start offset need to sort by offset.  */
+  temp_set.qsort (compare_offset);
+
+  /* The lmw/smw pattern, need two or more instructions.  */
+  if (temp_set.length () < 2)
+    return false;
+
+  /* The lmw/smw pattern, only allow combine 25 instruction.  */
+  if (temp_set.length () > 25)
+    return false;
+
+  if (TARGET_LMWSMW_OPT_SIZE
+      || (TARGET_LMWSMW_OPT_AUTO && optimize_size))
+    {
+      /* Compare original instructions with multiple instruction,
+	 when mupltiple instruction is small than original instructions
+	 then combine it.  */
+      if (!nds32_gain_size_p (&temp_set, new_base_p))
+	return false;
+    }
+  else if (TARGET_LMWSMW_OPT_SPEED
+	   || (TARGET_LMWSMW_OPT_AUTO && !optimize_size))
+    {
+      /* The start offset is not zero, we need add a instrucion
+	 to handle offset, it is not worth on -O3, -O2 level.  */
+      if (!nds32_gain_speed_p (&temp_set, new_base_p))
+	return false;
+    }
+
+  /* Base register is not equal register, when offset is not zero.  */
+  if (temp_set[0].offset != 0)
+    for (i = 0; i < temp_set.length (); ++i)
+      {
+	if (REGNO (temp_set[i].reg)
+	    == REGNO (temp_set[0].base_reg))
+	  return false;
+      }
+
+  /* Don't combine, when start offset is greater then Is15,
+     because need extra register.  */
+  if (!satisfies_constraint_Is15 (GEN_INT (temp_set[0].offset)))
+    return false;
+
+  return true;
+}
+
+static bool
+nds32_use_bim_p (load_store_infos_t *insn_set,
+		 load_store_infos_t *ref_set)
+{
+  rtx_insn *insn;
+  bool combine_p = true;
+
+  /* Generate .bim form, need offset is continuous.  */
+  if (insn_set->last ().offset != ((*ref_set)[0].offset - 4))
+    return false;
+
+  /* Reject 'insn_set' instructions bottom
+     of the 'ref_set' instructions.  */
+  if ((*insn_set)[0].group > (*ref_set)[0].group)
+    return false;
+
+  /* Scan instruction from top to bottom,
+     so need to sort by order.  */
+  insn_set->qsort (compare_order);
+  ref_set->qsort (compare_order);
+
+  /* We want to combine .bim form instruction,
+     so need to check base register is not used and defined
+     between multiple-insn and next mulitple-insn.
+     example:
+      lmw.bim $r0, [$r2], $r1
+		...		       <- check here
+      lmw.bi  $r3, [$r2], $r4
+
+    Use .bim form need to check $r2 is not used and defined,
+    between lmw.bim and lmw.bi.  */
+    for (insn = NEXT_INSN (insn_set->last ().insn);
+	 insn != (*ref_set)[0].insn;
+	 insn = NEXT_INSN (insn))
+      {
+	if (!NONDEBUG_INSN_P (insn))
+	  continue;
+
+	if (nds32_insn_alias_p ((*insn_set)[0].mem, PATTERN (insn)))
+	  {
+	    if (dump_file)
+	      {
+		fprintf (dump_file, "Have memory instruction:\n");
+		print_rtl_single (dump_file, insn);
+	      }
+	    combine_p = false;
+	    break;
+	  }
+
+	if (df_reg_used (insn, (*insn_set)[0].base_reg)
+	    || df_reg_defined (insn, (*insn_set)[0].base_reg))
+	  {
+	    if (dump_file)
+	      {
+		fprintf (dump_file, "Use .bi form: Base reg is"
+			 " used or defined between multiple-insn"
+			 " and next multiple-insn\n");
+		fprintf (dump_file, "Base register: r%d,\n",
+			 REGNO ((*insn_set)[0].base_reg));
+		fprintf (dump_file, "use or def instruction:\n");
+		print_rtl_single (dump_file, insn);
+	      }
+	    combine_p = false;
+	    break;
+	  }
+      }
+
+  /* Restore element order.  */
+  insn_set->qsort (compare_offset);
+  ref_set->qsort (compare_offset);
+
+  if (combine_p)
+    return true;
+  else
+    return false;
+}
+
+static void
+nds32_merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head)
+{
+  bitmap_iterator bi;
+  unsigned i;
+  IOR_HARD_REG_SET (*pset, head->hard_conflicts);
+  EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
+    {
+      du_head_p other = regrename_chain_from_id (i);
+      unsigned j = other->nregs;
+      gcc_assert (other != head);
+      while (j-- > 0)
+	SET_HARD_REG_BIT (*pset, other->regno + j);
+    }
+}
+
+/* Check if NEW_REG can be the candidate register to rename for
+   REG in THIS_HEAD chain.  THIS_UNAVAILABLE is a set of unavailable hard
+   registers.  */
+static bool
+nds32_check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg,
+		       struct du_head *this_head, HARD_REG_SET this_unavailable)
+{
+  enum machine_mode mode = GET_MODE (*this_head->first->loc);
+  int nregs = hard_regno_nregs[new_reg][mode];
+  int i;
+  struct du_chain *tmp;
+
+  for (i = nregs - 1; i >= 0; --i)
+    if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
+	|| fixed_regs[new_reg + i]
+	|| global_regs[new_reg + i]
+	/* Can't use regs which aren't saved by the prologue.  */
+	|| (! df_regs_ever_live_p (new_reg + i)
+	    && ! call_used_regs[new_reg + i])
+#ifdef LEAF_REGISTERS
+	/* We can't use a non-leaf register if we're in a
+	   leaf function.  */
+	|| (crtl->is_leaf
+	    && !LEAF_REGISTERS[new_reg + i])
+#endif
+#ifdef HARD_REGNO_RENAME_OK
+	|| ! HARD_REGNO_RENAME_OK (reg + i, new_reg + i)
+#endif
+	)
+      return false;
+
+  /* See whether it accepts all modes that occur in
+     definition and uses.  */
+  for (tmp = this_head->first; tmp; tmp = tmp->next_use)
+    if ((! HARD_REGNO_MODE_OK (new_reg, GET_MODE (*tmp->loc))
+	 && ! DEBUG_INSN_P (tmp->insn))
+	|| (this_head->need_caller_save_reg
+	    && ! (HARD_REGNO_CALL_PART_CLOBBERED
+		  (reg, GET_MODE (*tmp->loc)))
+	    && (HARD_REGNO_CALL_PART_CLOBBERED
+		(new_reg, GET_MODE (*tmp->loc)))))
+      return false;
+
+  return true;
+}
+
+static int
+nds32_find_best_rename_reg (du_head_p this_head, int new_reg, int old_reg)
+{
+  HARD_REG_SET unavailable;
+  int best_new_reg = old_reg;
+
+  COMPL_HARD_REG_SET (unavailable, reg_class_contents[GENERAL_REGS]);
+  CLEAR_HARD_REG_BIT (unavailable, this_head->regno);
+
+  /* Further narrow the set of registers we can use for renaming.
+     If the chain needs a call-saved register, mark the call-used
+     registers as unavailable.  */
+  if (this_head->need_caller_save_reg)
+    IOR_HARD_REG_SET (unavailable, call_used_reg_set);
+
+  /* Mark registers that overlap this chain's lifetime as unavailable.  */
+  nds32_merge_overlapping_regs (&unavailable, this_head);
+
+  if (nds32_check_new_reg_p (old_reg, new_reg, this_head, unavailable))
+    best_new_reg = new_reg;
+
+  return best_new_reg;
+}
+
+static bool
+nds32_try_rename_reg (rtx_insn *insn, unsigned op_pos, unsigned best_reg)
+{
+  insn_rr_info *info;
+  du_head_p op_chain;
+  unsigned oldreg, newreg;
+
+  info = &insn_rr[INSN_UID (insn)];
+
+  if (info->op_info == NULL)
+    return false;
+
+  if (info->op_info[op_pos].n_chains == 0)
+    return false;
+
+  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
+
+  if (op_chain->cannot_rename)
+    return false;
+
+  oldreg = op_chain->regno;
+  newreg = nds32_find_best_rename_reg (op_chain, best_reg, oldreg);
+
+  if (newreg == oldreg)
+    return false;
+
+  return true;
+}
+
+/* Grouping consecutive registers.  */
+static void
+nds32_group_available_reg (HARD_REG_SET *available_regset, enum reg_class clazz,
+			   std::vector  *available_group)
+{
+  hard_reg_set_iterator hrsi;
+  unsigned regno, pre_regno = 0;
+  unsigned count = 0;
+  available_reg_info_t reg_info;
+  std::vector::iterator it;
+
+  if (!available_group->empty ())
+    available_group->clear ();
+
+  /* Find available register form $r16 to $r31.  */
+  EXECUTE_IF_SET_IN_HARD_REG_SET (reg_class_contents[clazz], 2, regno, hrsi)
+    {
+      /* Caller-save register or callee-save register but it's ever live.  */
+      if (TEST_HARD_REG_BIT (*available_regset, regno)
+	  && (call_used_regs[regno] || df_regs_ever_live_p (regno)))
+	{
+	  if (pre_regno == 0
+	      || (pre_regno + 1) == regno)
+	    count++;
+	}
+      else
+	{
+	  if (count >= 2)
+	    {
+	      reg_info.amount = count;
+	      reg_info.end = pre_regno;
+	      reg_info.start = pre_regno - count + 1;
+	      available_group->push_back (reg_info);
+	    }
+	  count = 0;
+	}
+      pre_regno = regno;
+    }
+
+  sort (available_group->begin(), available_group->end(), compare_amount);
+
+  if (dump_file)
+    {
+      for (it = available_group->begin();
+	   it != available_group->end(); ++it)
+	fprintf (dump_file,
+		 "available amount = %d start = %d "
+		 "end = %d \n", it->amount, it->start,
+		 it->end);
+    }
+}
+
+/* Try to rename insn's register in order.  */
+static void
+nds32_find_reg (load_store_infos_t *insn, load_store_infos_t *rename_insn,
+		HARD_REG_SET *available_regset)
+{
+  int can_rename_number;
+  unsigned i, regno, amount;
+  unsigned op_pos = (*insn)[0].load_p ? 0 : 1;
+  auto_vec temp_set;
+  std::vector available_group;
+  std::vector::iterator it;
+  auto_vec down_set, up_set;
+  unsigned int down_num = 0, up_num = 0;
+  long offset;
+  int m;
+
+  /* We will change 'insn' element order,
+     to avoid change order using 'temp_set'.  */
+  for (i = 0; i < insn->length (); i++)
+    temp_set.safe_push ((*insn)[i]);
+
+  if (temp_set[0].post_type == NDS32_NONE)
+    temp_set.qsort (compare_offset);
+
+  nds32_group_available_reg (available_regset, GENERAL_REGS, &available_group);
+
+ /* Check rename register form top insn to bottom insn,
+    and avoid using fp, sp, lp, gp registers.  */
+  regno = REGNO (temp_set[0].reg);
+  can_rename_number = regno + temp_set.length () - 1;
+  offset = temp_set[0].offset;
+
+  if (can_rename_number < FP_REGNUM)
+    for (i = 1; i < temp_set.length (); ++i)
+      {
+	/* Find this case:
+	     lwi $r0, [$r2 + 4]
+	     lwi $r3, [$r2 + 8]
+
+	   Rename $r3 to $r1.  */
+	down_num++;
+	if ((regno + i) != REGNO (temp_set[i].reg))
+	  {
+	    if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno + i))
+	      {
+		/* Store in temparary set.  */
+		down_set.safe_push (temp_set[i]);
+		down_set.last ().new_reg = regno + i;
+	      }
+	    else
+	      /* Stop when the register sequence is broken.  */
+	      break;
+	  }
+      }
+
+  /* Check rename register form bottom insn to top insn,
+     and avoid using fp, sp, lp, gp registers.  */
+  regno = REGNO (temp_set.last ().reg);
+  can_rename_number = regno - temp_set.length () + 1;
+
+  if (can_rename_number > 0 && regno < FP_REGNUM)
+    for (i = temp_set.length () - 1; i > 0; --i)
+      {
+	/* Find this case:
+	     lwi $r1, [$r2 + 4]
+	     lwi $r4, [$r2 + 8]
+
+	   Rename $r1 to $r3.  */
+	up_num++;
+	if ((regno - i) != REGNO (temp_set[i - 1].reg))
+	  {
+	    if (nds32_try_rename_reg (temp_set[i - 1].insn, op_pos, regno - i))
+	      {
+		/* Store in rename_insn.  */
+		up_set.safe_push (temp_set[i - 1]);
+		up_set.last ().new_reg = regno - i;
+	      }
+	    else
+	      /* Stop when the register sequence is broken.  */
+	      break;
+	  }
+      }
+
+  /* Rename for the longest sequence.  */
+  /* The overhead of zero offset instruction is lowest, so try it first.  */
+  if ((offset == 0 || down_num >= up_num) && !down_set.is_empty ())
+    {
+      for (m = down_set.length () - 1; m >= 0; --m)
+	{
+	  regno = REGNO (down_set[m].reg);
+	  CLEAR_HARD_REG_BIT (*available_regset, regno);
+	  rename_insn->safe_push (down_set[m]);
+	}
+      nds32_group_available_reg (available_regset, GENERAL_REGS,
+				 &available_group);
+      return;
+    }
+  else if (up_num >= down_num && !up_set.is_empty ())
+    {
+      for (m = up_set.length () - 1; m >= 0; --m)
+	{
+	  regno = REGNO (up_set[m].reg);
+	  CLEAR_HARD_REG_BIT (*available_regset, regno);
+	  rename_insn->safe_push (up_set[m]);
+	}
+      nds32_group_available_reg (available_regset, GENERAL_REGS,
+				 &available_group);
+      return;
+    }
+  /* Check whether it is empty, We will use available table.  */
+  else if (available_group.empty ())
+    return;
+
+  amount = available_group.begin ()->amount;
+  /* Using the minimum number, as the rename amount.  */
+  if (amount > temp_set.length ())
+    amount = temp_set.length ();
+
+  /* Using most available register number to rename.  */
+  regno = available_group.begin ()->start;
+  for (i = 0; i < amount; ++i)
+    {
+      if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno))
+	{
+	  rename_insn->safe_push (temp_set[i]);
+	  rename_insn->last ().new_reg = regno;
+	  CLEAR_HARD_REG_BIT (*available_regset, regno);
+	  regno++;
+	}
+      else
+	/* Stop when the register sequence is broken.  */
+	break;
+    }
+
+  /* Check length here because the whole sequence entries
+     have to be renamed.  */
+  if (rename_insn->length () > 1)
+    {
+      /* Update available table.  */
+      nds32_group_available_reg (available_regset, GENERAL_REGS,
+				 &available_group);
+      return;
+    }
+
+  /* Using all available register to rename each insn.  */
+  for (i = 0; i < (temp_set.length () - 1); i += 2)
+    {
+      for (it = available_group.begin();
+	   it != available_group.end(); ++it)
+	{
+	  bool change_p = false;
+	  unsigned int j;
+	  regno = it->start;
+
+	  /* Once replaced two instructions. */
+	  for (j = regno; j < (it->end + 1); j += 2)
+	    {
+	      if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno)
+		  && nds32_try_rename_reg (temp_set[i + 1].insn,
+					   op_pos, regno + 1))
+		{
+		  rename_insn->safe_push (temp_set[i]);
+		  rename_insn->last ().new_reg = regno;
+		  CLEAR_HARD_REG_BIT (*available_regset, regno);
+
+		  rename_insn->safe_push (temp_set[i + 1]);
+		  rename_insn->last ().new_reg = regno + 1;
+		  CLEAR_HARD_REG_BIT (*available_regset, regno + 1);
+		  change_p = true;
+		  break;
+		}
+	    }
+
+	  if (change_p)
+	    {
+	      nds32_group_available_reg (available_regset, GENERAL_REGS,
+					 &available_group);
+	      break;
+	    }
+	}
+    }
+}
+
+static void
+nds32_rename_reg (rtx_insn *insn, unsigned op_pos, unsigned newreg)
+{
+  insn_rr_info *info;
+  du_head_p op_chain;
+
+  info = &insn_rr[INSN_UID (insn)];
+  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Try to rename operand %d to %d:\n",
+	       op_pos, newreg);
+      print_rtl_single (dump_file, insn);
+    }
+
+  regrename_do_replace (op_chain, newreg);
+
+  if (dump_file)
+    {
+      print_rtl_single (dump_file, insn);
+    }
+}
+
+/* Combine mutilple load/store insn into a lmw/smw insn.  */
+static void
+nds32_combine_bi_insn (load_store_infos_t *load_store_info)
+{
+  auto_vec candidate_set, bi_set;
+  unsigned int i, j, regno;
+
+  bool load_insn_p;
+  enum nds32_memory_post_type post_type;
+
+  for (i = 0; i < load_store_info->length (); ++i)
+    {
+      /* Recording instruction order of priority and initinal place.  */
+      (*load_store_info)[i].order = i;
+      (*load_store_info)[i].place = false;
+      candidate_set.safe_push ((*load_store_info)[i]);
+    }
+
+  for (i = 0; i < candidate_set.length (); ++i)
+    {
+      load_insn_p = candidate_set[i].load_p;
+      post_type = candidate_set[i].post_type;
+      regno = REGNO (candidate_set[i].reg);
+
+      for (j = i + 1; j < candidate_set.length (); ++j)
+	{
+	  if ((post_type == candidate_set[j].post_type)
+	      && (load_insn_p == candidate_set[j].load_p)
+	      && ((regno + 1) == REGNO (candidate_set[j].reg)))
+	    {
+	      bi_set.safe_push (candidate_set[i]);
+	      bi_set.safe_push (candidate_set[j]);
+
+	      if (nds32_combine_multiple_p (&bi_set, false)
+		  && nds32_base_reg_safe_p (&bi_set)
+		  && nds32_lmwsmw_insert_place (&bi_set) != NULL_RTX)
+		{
+		  rtx place = nds32_lmwsmw_insert_place (&bi_set);
+		  rtx base_reg = bi_set[0].base_reg;
+
+		  nds32_emit_multiple_insn (&bi_set, base_reg, place, true);
+		  delete_insn (bi_set[i].insn);
+		  delete_insn (bi_set[j].insn);
+		  candidate_set.ordered_remove (j);
+		  bi_set.block_remove (0, bi_set.length ());
+		  break;
+		}
+
+	      bi_set.block_remove (0, bi_set.length ());
+	    }
+	}
+    }
+}
+
+/* Combine mutilple load/store insn into a lmw/smw insn.  */
+static void
+nds32_combine_load_store_insn (load_store_infos_t *load_store_info,
+			       HARD_REG_SET *available_regset)
+{
+  auto_vec candidate_set, main_set, temp_set;
+  auto_vec first_set, second_set;
+  HOST_WIDE_INT current_offset, last_offset = 0, add_offset = 0;
+  unsigned int i, j, regno;
+  int group_num = 0, group_id;
+  bool load_insn_p;
+  bool new_base_p = false;
+  bool prev_bim_p = false;
+  bool inc_p = true, dec_p = true;
+  rtx new_base_reg = NULL_RTX;
+  rtx base_reg = (*load_store_info)[0].base_reg;
+  rtx place;
+  unsigned new_base_regnum;
+
+  /* Get available register to add offset for first instruction.  */
+  new_base_regnum = find_available_reg (available_regset, GENERAL_REGS);
+  if (new_base_regnum != INVALID_REGNUM)
+    {
+      CLEAR_HARD_REG_BIT (*available_regset, new_base_regnum);
+      new_base_reg = gen_rtx_REG (Pmode, new_base_regnum);
+      /* Copy attribute form base register to new base register.  */
+      ORIGINAL_REGNO (new_base_reg) =
+	ORIGINAL_REGNO ((*load_store_info)[0].base_reg);
+      REG_ATTRS (new_base_reg) = REG_ATTRS ((*load_store_info)[0].base_reg);
+      new_base_p = true;
+
+      if (dump_file)
+	fprintf (dump_file, "Have new base register: %d\n", new_base_regnum);
+    }
+
+  /* Recording instruction order of priority and initinal place.  */
+  for (i = 0; i < load_store_info->length (); ++i)
+    {
+      (*load_store_info)[i].order = i;
+      (*load_store_info)[i].place = false;
+    }
+
+  /* Fetch first instruction information from 'load_store_info',
+     we will use first instruction as base, to search next instruction.  */
+  candidate_set.safe_push ((*load_store_info)[0]);
+  /* Set offset, regno, load_p state from candidate_set.  */
+  current_offset = candidate_set[0].offset;
+  regno = REGNO (candidate_set[0].reg);
+  load_insn_p = candidate_set[0].load_p;
+  /* Set first instruction group ID,
+     the group ID mark instruction for the same group.  */
+  candidate_set[0].group = group_num;
+
+  /* Search instructions can be combined to a lmw/smw instruction.  */
+  for (i = 1; i < load_store_info->length (); ++i)
+    {
+      /* Collecting register number and offset is increase,
+	 for example:
+
+	   lwi $r0, [$r22 + 4]  <- base instruction
+	   lwi $r1, [$r22 + 8]  <- collect object
+
+	 The collect object (regno + 1), (offset + 4)
+	 from base instruction.  */
+      if ((current_offset == (*load_store_info)[i].offset - 4)
+	  && ((regno + 1) == REGNO ((*load_store_info)[i].reg))
+	  && (load_insn_p == (*load_store_info)[i].load_p)
+	  && inc_p)
+	{
+	  /* Give instruction group ID.  */
+	  (*load_store_info)[i].group = group_num;
+	  /* Save instruction.  */
+	  candidate_set.safe_push ((*load_store_info)[i]);
+	  /* Update state, next register number and offset.  */
+	  regno = REGNO ((*load_store_info)[i].reg);
+	  current_offset += 4;
+	  /* Close decrease type, search increase type.  */
+	  dec_p = false;
+	}
+      /* Collecting register number and offset is decrease,
+	 for example:
+
+	   lwi $r2, [$r22 + 8]  <- base instruction
+	   lwi $r1, [$r22 + 4]  <- collect object
+
+	 The collect object (regno - 1), (offset - 4)
+	 from base instruction.  */
+      else if ((current_offset == (*load_store_info)[i].offset + 4)
+	       && ((regno - 1) == REGNO ((*load_store_info)[i].reg))
+	       && (load_insn_p == (*load_store_info)[i].load_p)
+	       && dec_p)
+	{
+	  /* Give instruction group ID.  */
+	  (*load_store_info)[i].group = group_num;
+	  /* Save instruction.  */
+	  candidate_set.safe_push ((*load_store_info)[i]);
+
+	  /* Update state, next register number and offset.  */
+	  regno = REGNO ((*load_store_info)[i].reg);
+	  current_offset -= 4;
+	  /* Close increase type, search decrease type.  */
+	  inc_p = false;
+	}
+      else
+	{
+	  inc_p = true;
+	  dec_p = true;
+	}
+
+      /* Instructions collect is complete.  */
+      if ((inc_p && dec_p)
+          || (i + 1) == load_store_info->length ())
+	{
+	  /* Filter candidate instructions.  */
+	  if (nds32_combine_multiple_p (&candidate_set, new_base_p)
+	      && nds32_base_reg_safe_p (&candidate_set)
+	      && nds32_lmwsmw_insert_place (&candidate_set) != NULL_RTX)
+	    {
+	      /* Store candidate instructions to 'main_set'.  */
+	      for (j = 0; j < candidate_set.length (); j++)
+		main_set.safe_push (candidate_set[j]);
+	    }
+
+	  /* Scan to the last instruction, it is complete.  */
+	  if ((i + 1) == load_store_info->length ())
+	    break;
+
+	  /* Clean candidate_set sequence.  */
+	  candidate_set.block_remove (0, candidate_set.length ());
+	  /* Reinitialize first instruction infomation
+	     to search next instruction.  */
+	  candidate_set.safe_push ((*load_store_info)[i]);
+	  /* Update group number for next sequence.  */
+	  group_num ++;
+	  /* Set offset, regno, load_p state from candidate_set.  */
+	  current_offset = candidate_set.last ().offset;
+	  regno = REGNO (candidate_set.last ().reg);
+	  load_insn_p = candidate_set.last ().load_p;
+	  candidate_set.last ().group = group_num;
+	}
+      else if (!nds32_base_reg_safe_p (&candidate_set)
+	       || nds32_lmwsmw_insert_place (&candidate_set) == NULL_RTX)
+	{
+	  /* Check collect instruction for each instruction,
+	     we store (n - 1) instructions in group, and
+	     last instruction make next group First instruction.  */
+	  for (j = 0; j < (candidate_set.length () - 1); j++)
+	    temp_set.safe_push (candidate_set[j]);
+
+	  /* Store candidate instructions to 'main_set'.  */
+	  if (nds32_combine_multiple_p (&temp_set, new_base_p))
+	    {
+	      for (j = 0; j < (temp_set.length ()); j++)
+		main_set.safe_push (temp_set[j]);
+	    }
+
+	  /* Clean temp_set sequence.  */
+	  temp_set.block_remove (0, temp_set.length ());
+	  /* Clean candidate_set sequence.  */
+	  candidate_set.block_remove (0, (candidate_set.length () - 1));
+	  /* Update group number for next sequence.  */
+	  group_num ++;
+	  /* Set offset, regno, load_p state from candidate_set.  */
+	  current_offset = candidate_set.last ().offset;
+	  regno = REGNO (candidate_set.last ().reg);
+	  load_insn_p = candidate_set.last ().load_p;
+	  candidate_set.last ().group = group_num;
+	  /* Reset it for search increase and decrease type.  */
+	  inc_p = true;
+	  dec_p = true;
+	}
+    }
+
+  if (dump_file)
+    {
+      if (!main_set.is_empty ())
+	fprintf (dump_file,"Do lmwsmw instructions:\n");
+      for (i = 0; i < main_set.length (); ++i)
+	{
+	  fprintf (dump_file,
+		   "regno = %d base_regno = %d "
+		   "offset = " HOST_WIDE_INT_PRINT_DEC " "
+		   "load_p = %d UID = %u group = %d,"
+		   " order = %d, place = %d\n",
+		   REGNO (main_set[i].reg),
+		   REGNO (main_set[i].base_reg),
+		   main_set[i].offset,
+		   main_set[i].load_p,
+		   INSN_UID (main_set[i].insn),
+		   main_set[i].group,
+		   main_set[i].order,
+		   main_set[i].place);
+	}
+    }
+
+  /* Fetch first group instruction from main_set.  */
+  if (!main_set.is_empty ())
+    {
+      /* Sort main_set by offset.  */
+      main_set.qsort (compare_offset);
+
+      group_id = main_set[0].group;
+      nds32_fetch_group_insn (&main_set, &first_set, group_id);
+      last_offset = first_set.last ().offset;
+    }
+
+  /* Main loop for emit lmw/smw instrucion.  */
+  while (!main_set.is_empty ())
+    {
+      /* Get second group ID.  */
+      group_id = main_set[0].group;
+      for (i = 0; i < main_set.length (); ++i)
+	{
+	  /* Prefer get consecutive offset form
+	     first group to second group  */
+	  if ((last_offset + 4) == main_set[i].offset)
+	    {
+	      group_id = main_set[i].group;
+	      break;
+	    }
+	}
+
+      /* Fetch second instrucion group.  */
+      nds32_fetch_group_insn (&main_set, &second_set, group_id);
+      /* Get lmw/smw insert place.  */
+      place = nds32_lmwsmw_insert_place (&first_set);
+
+      /* Adjust address offset, because lmw/smw instruction
+	 only allow offset is zero.
+	   example:
+	    lwi $r0, [$r3 + 4]
+	    lwi $r1, [$r3 + 8]
+	    lwi $r2, [$r3 + 12]
+
+	    combine into
+
+	    addi $r3, $r3, 4
+	    lwm.bi(m) $r0, [$r3], $r2
+
+	 Need addi instrucion to handle offset.  */
+      if (first_set[0].offset != 0 && !prev_bim_p)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "Use addi insn handle offset: "
+		     "" HOST_WIDE_INT_PRINT_DEC "\n",
+		     first_set[0].offset);
+	  /* Use available register to process offset,
+	     and don't recovey base register value.  */
+	  if (new_base_p)
+	    {
+	      base_reg = new_base_reg;
+	      add_offset = 0;
+	      CLEAR_HARD_REG_BIT (*available_regset, new_base_regnum);
+	    }
+	  else
+	    add_offset = first_set[0].offset;
+
+	  nds32_emit_add_insn (first_set[0], base_reg, place, true);
+	}
+
+      if (nds32_use_bim_p (&first_set, &second_set))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "Generate BIM form.\n");
+
+	  nds32_emit_multiple_insn (&first_set, base_reg, place, true);
+
+	  /* Update status, for next instruction sequence.
+	     The add_offset need add 4, because the instruction
+	     is post increase.  */
+	  add_offset = first_set.last ().offset + 4;
+	  prev_bim_p = true;
+	}
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "Generate BI form.\n");
+
+	  nds32_emit_multiple_insn (&first_set, base_reg, place, false);
+
+	  if (add_offset != 0)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "Use addi insn handle -offset: "
+			 "" HOST_WIDE_INT_PRINT_DEC "\n",
+			 add_offset);
+
+	      nds32_emit_add_insn (first_set[0], base_reg, place, false);
+	      add_offset = 0;
+	    }
+	  prev_bim_p = false;
+
+	  /* Recovey base register for next instruction sequence.  */
+	  if (REGNO (base_reg) != REGNO (first_set[0].base_reg))
+	    base_reg = first_set[0].base_reg;
+	}
+
+      /* Delete insn, replace by lmw/smw instruction.  */
+      for (i = 0; i < first_set.length (); ++i)
+	delete_insn (first_set[i].insn);
+
+      /* Clean first_set for store next instruction group.  */
+      first_set.block_remove (0, first_set.length ());
+      /* Store next instruction group.  */
+      for (i = 0; i < second_set.length (); ++i)
+	first_set.safe_insert (i, second_set[i]);
+
+      /* Clean second_set.  */
+      second_set.block_remove (0, second_set.length ());
+
+      /* Update last_offset for search next group.  */
+      last_offset = first_set.last ().offset;
+    }
+
+  /* Processing the last instruction group.  */
+  if (!first_set.is_empty ())
+    {
+      /* Get lmw/smw insert place.  */
+      place = nds32_lmwsmw_insert_place (&first_set);
+
+      if (first_set[0].offset != 0 && !prev_bim_p)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "Use addi insn handle offset: "
+		     "" HOST_WIDE_INT_PRINT_DEC "\n",
+		     first_set[0].offset);
+
+	  if (new_base_p)
+	    {
+	      base_reg = new_base_reg;
+	      add_offset = 0;
+	    }
+	  else
+	    add_offset = first_set[0].offset;
+
+	  nds32_emit_add_insn (first_set[0], base_reg, place, true);
+	}
+
+      if (dump_file)
+	fprintf (dump_file, "Generate BI form.\n");
+
+      nds32_emit_multiple_insn (&first_set, base_reg, place, false);
+
+      if (add_offset != 0)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "Use addi insn handle -offset: "
+		     "" HOST_WIDE_INT_PRINT_DEC "\n",
+		     -add_offset);
+
+	  nds32_emit_add_insn (first_set[0], base_reg, place, false);
+	}
+
+      /* Delete insn, replace by lmw/smw instruction.  */
+      for (i = 0; i < first_set.length (); ++i)
+	delete_insn (first_set[i].insn);
+    }
+}
+
+/* Combine mutilple load/store insn into a lmw/smw insn.  */
+static void
+nds32_rename_bi_insn (load_store_infos_t *load_store_info,
+		       HARD_REG_SET *available_regset)
+{
+  auto_vec candidate_set, bi_set, replace_set;
+  unsigned int i, j;
+
+  bool load_insn_p;
+  enum nds32_memory_post_type post_type;
+
+  for (i = 0; i < load_store_info->length (); ++i)
+    {
+      /* Recording instruction order of priority and initinal place.  */
+      (*load_store_info)[i].order = i;
+      (*load_store_info)[i].place = false;
+      candidate_set.safe_push ((*load_store_info)[i]);
+    }
+
+  for (i = 0; i < candidate_set.length (); ++i)
+    {
+      load_insn_p = candidate_set[i].load_p;
+      post_type = candidate_set[i].post_type;
+
+      for (j = i + 1; j < candidate_set.length (); ++j)
+	{
+	  if ((post_type == candidate_set[j].post_type)
+	      && (load_insn_p == candidate_set[j].load_p))
+	    {
+	      bi_set.safe_push (candidate_set[i]);
+	      bi_set.safe_push (candidate_set[j]);
+
+	      if (nds32_combine_multiple_p (&bi_set, false)
+		  && nds32_base_reg_safe_p (&bi_set)
+		  && nds32_lmwsmw_insert_place (&bi_set) != NULL_RTX)
+		{
+		  nds32_find_reg (&bi_set, &replace_set, available_regset);
+
+		  if (!replace_set.is_empty ())
+		    {
+		      unsigned k;
+		      unsigned op_pos = replace_set[0].load_p ? 0 : 1;
+
+		      /* Do rename register.  */
+		      for (k = 0; k < replace_set.length (); ++k)
+			nds32_rename_reg (replace_set[k].insn, op_pos,
+					  replace_set[k].new_reg);
+
+		      replace_set.block_remove (0, replace_set.length ());
+		    }
+
+		  candidate_set.ordered_remove (j);
+		  bi_set.block_remove (0, bi_set.length ());
+		  break;
+		}
+
+	      bi_set.block_remove (0, bi_set.length ());
+	    }
+	}
+    }
+}
+
+/* Rename register, can be combined mutilple load/store insn.  */
+static void
+nds32_rename_load_store_reg (load_store_infos_t *load_store_info,
+			     HARD_REG_SET *available_regset)
+{
+  auto_vec rename_set, temp_set, replace_set;
+  HOST_WIDE_INT current_offset;
+  unsigned int i, j;
+  bool load_insn_p;
+  bool inc_p = true, dec_p = true;
+
+  /* Recording instruction order of priority and initinal place.  */
+  for (i = 0; i < load_store_info->length (); ++i)
+    {
+      (*load_store_info)[i].order = i;
+      (*load_store_info)[i].place = false;
+    }
+
+  /* Fetch first instruction information from 'load_store_info',
+     we will use first instruction as base, to search next instruction.  */
+  rename_set.safe_push ((*load_store_info)[0]);
+  /* Set offset, load_p state from rename_set.  */
+  current_offset = rename_set[0].offset;
+  load_insn_p = rename_set[0].load_p;
+
+  /* Search instructions can be combined to a lmw/smw instruction.  */
+  for (i = 1; i < load_store_info->length (); ++i)
+    {
+      /* Collecting offset is increase, for example:
+
+	   lwi pseudo_reg, [$r22 + 4]  <- base instruction
+	   lwi pseudo_reg, [$r22 + 8]  <- collect object
+
+	 The collect object (offset + 4) from base instruction.  */
+      if ((current_offset == (*load_store_info)[i].offset - 4)
+	  && (load_insn_p == (*load_store_info)[i].load_p)
+	  && inc_p)
+	{
+	  /* Save instruction.  */
+	  rename_set.safe_push ((*load_store_info)[i]);
+	  /* Update offset.  */
+	  current_offset += 4;
+	  /* Close decrease type, search increase type.  */
+	  dec_p = false;
+	}
+      /* Collecting offset is decrease, for example:
+
+	   lwi pseudo_reg, [$r22 + 8]  <- base instruction
+	   lwi pseudo_reg, [$r22 + 4]  <- collect object
+
+	 The collect object (offset - 4) from base instruction.  */
+      else if ((current_offset == (*load_store_info)[i].offset + 4)
+	       && (load_insn_p == (*load_store_info)[i].load_p)
+	       && dec_p)
+	{
+	  /* Save instruction.  */
+	  rename_set.safe_push ((*load_store_info)[i]);
+
+	  /* Update offset.  */
+	  current_offset -= 4;
+	  /* Close increase type, search decrease type.  */
+	  inc_p = false;
+	}
+      else
+	{
+	  inc_p = true;
+	  dec_p = true;
+	}
+
+      /* Instructions collect is completed.  */
+      if ((inc_p && dec_p)
+	  || (i + 1) == load_store_info->length ())
+	{
+	  /* Check whether the rename register. */
+	  if (nds32_combine_multiple_p (&rename_set, false)
+	      && nds32_base_reg_safe_p (&rename_set)
+	      && nds32_lmwsmw_insert_place (&rename_set) != NULL_RTX)
+	    {
+	      /* Find can rename instruction, and store in 'replace_set'.  */
+	      nds32_find_reg (&rename_set, &replace_set, available_regset);
+
+	      if (!replace_set.is_empty ())
+		{
+		  unsigned op_pos = replace_set[0].load_p ? 0 : 1;
+
+		  /* Do rename register.  */
+		  for (j = 0; j < replace_set.length (); ++j)
+		    nds32_rename_reg (replace_set[j].insn, op_pos,
+				      replace_set[j].new_reg);
+
+		  replace_set.block_remove (0, replace_set.length ());
+		}
+	    }
+
+	  /* Scan to the last instruction, it is complete.  */
+	  if ((i + 1) == load_store_info->length ())
+	    break;
+
+	  /* Clean rename_set sequence.  */
+	  rename_set.block_remove (0, rename_set.length ());
+	  /* Reinitialize first instruction infomation
+	     to search next instruction.  */
+	  rename_set.safe_push ((*load_store_info)[i]);
+	  /* Set offset, load_p state from rename_set.  */
+	  current_offset = rename_set.last ().offset;
+	  load_insn_p = rename_set.last ().load_p;
+	}
+      else if (!nds32_base_reg_safe_p (&rename_set)
+	       || nds32_lmwsmw_insert_place (&rename_set) == NULL_RTX)
+	{
+	  /* Check collect instruction for each instruction,
+	     we store (n - 1) instructions in group, and
+	     last instruction as the first instruction of the next group.  */
+	  for (j = 0; j < (rename_set.length () - 1); j++)
+	    temp_set.safe_push (rename_set[j]);
+
+	  if (nds32_combine_multiple_p (&temp_set, false))
+	    {
+	      /* Find can rename instruction, and store in 'replace_set'.  */
+	      nds32_find_reg (&temp_set, &replace_set, available_regset);
+
+	      if (!replace_set.is_empty ())
+		{
+		  unsigned op_pos = replace_set[0].load_p ? 0 : 1;
+
+		  /* Do rename register.  */
+		  for (j = 0; j < replace_set.length (); ++j)
+		    nds32_rename_reg (replace_set[j].insn, op_pos,
+				      replace_set[j].new_reg);
+
+		  replace_set.block_remove (0, replace_set.length ());
+		}
+	    }
+
+	  /* Clean temp_set sequence.  */
+	  temp_set.block_remove (0, temp_set.length ());
+	  /* Clean rename_set sequence.  */
+	  rename_set.block_remove (0, (rename_set.length () - 1));
+	  /* Set offset, regno, load_p state from rename_set.  */
+	  current_offset = rename_set.last ().offset;
+	  load_insn_p = rename_set.last ().load_p;
+	  /* Reset it for search increase and decrease type.  */
+	  inc_p = true;
+	  dec_p = true;
+	}
+    }
+}
+
+static void
+nds32_do_lmwsmw_opt (basic_block bb, bool rename_p)
+{
+  rtx_insn *insn;
+  HARD_REG_SET available_regset;
+  load_store_info_t load_store_info;
+  auto_vec load_store_infos[NDS32_GPR_NUM];
+  auto_vec plus_infos[NDS32_GPR_NUM];
+  auto_vec post_infos[NDS32_GPR_NUM];
+  int i;
+  unsigned j;
+  unsigned regno;
+  unsigned polluting;
+  df_ref def;
+  /* Dirty mean a register is define again after
+     first load/store instruction.
+     For example:
+
+     lwi $r2, [$r3 + #0x100]
+     mov $r3, $r4            ! $r3 is dirty after this instruction.
+     lwi $r1, [$r3 + #0x120] ! so this load can't chain with prev load.
+   */
+  bool dirty[NDS32_GPR_NUM];
+
+  if (dump_file)
+    fprintf (dump_file, "scan bb %d\n", bb->index);
+
+  for (i = 0; i < NDS32_GPR_NUM; ++i)
+    dirty[i] = false;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (!INSN_P (insn))
+	continue;
+
+      polluting = INVALID_REGNUM;
+
+      /* Set def reg is dirty if chain is not empty.  */
+      FOR_EACH_INSN_USE (def, insn)
+	{
+	  regno = DF_REF_REGNO (def);
+
+	  if (!NDS32_IS_GPR_REGNUM (regno))
+	    continue;
+
+	  if (!load_store_infos[regno].is_empty ())
+	    {
+	      /* Set pulluting here because the source register
+		 may be the same one.  */
+	      if (dirty[regno] == false)
+		polluting = regno;
+
+	      dirty[regno] = true;
+	    }
+	}
+
+      /* Set all caller-save register is dirty if chain is not empty.  */
+      if (CALL_P (insn))
+	{
+	  for (i = 0; i < NDS32_GPR_NUM; ++i)
+	    {
+	      if (call_used_regs[i] && !load_store_infos[i].is_empty ())
+		dirty[i] = true;
+	    }
+	}
+
+      if (nds32_load_store_reg_plus_offset (insn, &load_store_info))
+	{
+	  regno = REGNO (load_store_info.base_reg);
+	  gcc_assert (NDS32_IS_GPR_REGNUM (regno));
+
+	  /* Don't add to chain if this reg is dirty.  */
+	  if (dirty[regno] && polluting != regno)
+	    break;
+
+	  /* If the register is first time to be used and be polluted
+	     right away, we don't push it.  */
+	  if (regno == REGNO (load_store_info.reg) && load_store_info.load_p
+	      && dirty[regno] == false)
+	    continue;
+
+	  load_store_infos[regno].safe_push (load_store_info);
+	}
+    }
+
+   for (i = 0; i < NDS32_GPR_NUM; ++i)
+    {
+      for (j = 0; j < load_store_infos[i].length (); ++j)
+	{
+	  if (load_store_infos[i][j].post_type == NDS32_NONE)
+	    plus_infos[i].safe_push (load_store_infos[i][j]);
+	  else
+	    post_infos[i].safe_push (load_store_infos[i][j]);
+	}
+    }
+
+  for (i = 0; i < NDS32_GPR_NUM; ++i)
+    {
+      if (load_store_infos[i].length () <= 1)
+	{
+	  if (dump_file && load_store_infos[i].length () == 1)
+	    fprintf (dump_file,
+		     "Skip Chain for $r%d since chain size only 1\n",
+		     i);
+	  continue;
+	}
+
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "Chain for $r%d: (size = %u)\n",
+		   i, load_store_infos[i].length ());
+
+	  for (j = 0; j < load_store_infos[i].length (); ++j)
+	    {
+	      fprintf (dump_file,
+		       "regno = %d base_regno = %d "
+		       "offset = " HOST_WIDE_INT_PRINT_DEC " "
+		       "load_p = %d UID = %u place = %d\n",
+		       REGNO (load_store_infos[i][j].reg),
+		       REGNO (load_store_infos[i][j].base_reg),
+		       load_store_infos[i][j].offset,
+		       load_store_infos[i][j].load_p,
+		       INSN_UID (load_store_infos[i][j].insn),
+		       load_store_infos[i][j].place);
+	    }
+	}
+
+      nds32_get_available_reg_set (bb,
+				   load_store_infos[i][0].insn,
+				   load_store_infos[i].last ().insn,
+				   &available_regset);
+      if (dump_file)
+	print_hard_reg_set (dump_file, "", available_regset);
+
+      /* If rename_p is true, then do rename register of load/store
+	 instruction. Otherwise combination of a multiple load/sotre
+	 a multiple load/store instruction.  */
+      if (rename_p)
+	{
+          if (plus_infos[i].length () > 1)
+	    nds32_rename_load_store_reg (&plus_infos[i], &available_regset);
+          if (post_infos[i].length () > 1)
+	    nds32_rename_bi_insn (&post_infos[i], &available_regset);
+	}
+      else
+	{
+          if (plus_infos[i].length () > 1)
+	    nds32_combine_load_store_insn (&plus_infos[i], &available_regset);
+          if (post_infos[i].length () > 1)
+	    nds32_combine_bi_insn (&post_infos[i]);
+	}
+    }
+}
+
+static void
+nds32_lmwsmw_opt (bool rename_p)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    nds32_do_lmwsmw_opt (bb, rename_p);
+}
+
+/* Implement rename register for load and store instruction.  */
+static unsigned int
+rest_of_handle_rename_lmwsmw_opt (void)
+{
+  init_alias_analysis ();
+
+  df_set_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  regrename_init (true);
+  regrename_analyze (NULL);
+
+  nds32_lmwsmw_opt (true);
+
+  regrename_finish ();
+
+  /* We are finished with alias.  */
+  end_alias_analysis ();
+  return 1;
+}
+
+/* Implement generate lmw and smw instruction.  */
+static unsigned int
+rest_of_handle_gen_lmwsmw_opt (void)
+{
+  init_alias_analysis ();
+
+  df_note_add_problem ();
+  df_analyze ();
+  nds32_lmwsmw_opt (false);
+
+  /* We are finished with alias.  */
+  end_alias_analysis ();
+  return 1;
+}
+
+
+const pass_data pass_data_nds32_rename_lmwsmw_opt =
+{
+  RTL_PASS,				/* type */
+  "rename_lmwsmw_opt",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_df_finish,			/* todo_flags_finish */
+};
+
+class pass_nds32_rename_lmwsmw_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_rename_lmwsmw_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_rename_lmwsmw_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) { return flag_nds32_lmwsmw_opt; }
+  unsigned int execute (function *) { return rest_of_handle_rename_lmwsmw_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_rename_lmwsmw_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_rename_lmwsmw_opt (ctxt);
+}
+
+const pass_data pass_data_nds32_gen_lmwsmw_opt =
+{
+  RTL_PASS,				/* type */
+  "gen_lmwsmw_opt",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_df_finish,			/* todo_flags_finish */
+};
+
+class pass_nds32_gen_lmwsmw_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_gen_lmwsmw_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_gen_lmwsmw_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) { return flag_nds32_lmwsmw_opt; }
+  unsigned int execute (function *) { return rest_of_handle_gen_lmwsmw_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_gen_lmwsmw_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_gen_lmwsmw_opt (ctxt);
+}
diff --git a/gcc/config/nds32/nds32-load-store-opt.c b/gcc/config/nds32/nds32-load-store-opt.c
new file mode 100644
index 0000000..9e5161e
--- /dev/null
+++ b/gcc/config/nds32/nds32-load-store-opt.c
@@ -0,0 +1,721 @@
+/* load-store-opt pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "rtl.h"
+#include "df.h"
+#include "alias.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "builtins.h"
+#include "cpplib.h"
+#include "params.h"
+#include "tree-pass.h"
+#include "target-globals.h"
+#include "nds32-load-store-opt.h"
+#include "nds32-reg-utils.h"
+#include 
+
+#define NDS32_GPR_NUM 32
+
+static new_base_reg_info_t gen_new_base (rtx,
+					 offset_info_t,
+					 unsigned,
+					 HOST_WIDE_INT,
+					 HOST_WIDE_INT);
+
+static const load_store_optimize_pass *load_store_optimizes[] =
+{
+  /*    allow_regclass, new_base_regclass,
+	offset_lower_bound, offset_upper_bound,
+	load_only_p, name */
+  new load_store_optimize_pass (
+	LOW_REGS, LOW_REGS,
+	0, (32-4),
+	false, "lswi333"),
+  new load_store_optimize_pass (
+	LOW_REGS, FRAME_POINTER_REG,
+	0, (512-4),
+	false, "lswi37"),
+  new load_store_optimize_pass (
+	MIDDLE_REGS, GENERAL_REGS,
+	0, 0,
+	false, "lswi450"),
+  new load_store_optimize_pass (
+	MIDDLE_REGS, R8_REG,
+	-128, -4,
+	true, "lwi45fe")
+};
+
+static const int N_LOAD_STORE_OPT_TYPE = sizeof (load_store_optimizes)
+					 / sizeof (load_store_optimize_pass*);
+
+load_store_optimize_pass
+::load_store_optimize_pass (enum reg_class allow_regclass,
+			    enum reg_class new_base_regclass,
+			    HOST_WIDE_INT offset_lower_bound,
+			    HOST_WIDE_INT offset_upper_bound,
+			    bool load_only_p,
+			    const char *name)
+  : m_allow_regclass (allow_regclass),
+    m_new_base_regclass (new_base_regclass),
+    m_offset_lower_bound (offset_lower_bound),
+    m_offset_upper_bound (offset_upper_bound),
+    m_load_only_p (load_only_p),
+    m_name (name)
+{
+  gcc_assert (offset_lower_bound <= offset_upper_bound);
+}
+
+int
+load_store_optimize_pass::calc_gain (HARD_REG_SET *available_regset,
+				     offset_info_t offset_info,
+				     load_store_infos_t *load_store_info) const
+{
+  int extra_cost = 0;
+  int gain = 0;
+  unsigned i;
+  unsigned chain_size;
+  unsigned new_base_regnum;
+  HOST_WIDE_INT allow_range = m_offset_upper_bound - m_offset_lower_bound;
+  new_base_regnum  = find_available_reg (available_regset, m_new_base_regclass);
+  chain_size = load_store_info->length ();
+
+  if (new_base_regnum == INVALID_REGNUM)
+    {
+      if (dump_file)
+	fprintf (dump_file,
+		 "%s have no avariable register, so give up try %s\n",
+		 reg_class_names[m_new_base_regclass],
+		 m_name);
+      return 0;
+    }
+  else if (dump_file)
+    fprintf (dump_file,
+	     "%s is avariable, get %s, try %s, chain size = %u\n",
+	     reg_class_names[m_new_base_regclass],
+	     reg_names[new_base_regnum],
+	     m_name,
+	     chain_size);
+
+  HOST_WIDE_INT range = offset_info.max_offset - offset_info.min_offset;
+
+  if (range > allow_range)
+    {
+      /* TODO: We can perform load-store opt for only part of load store.  */
+      if (dump_file)
+	fprintf (dump_file,
+		 "range is too large for %s"
+		 " (range = " HOST_WIDE_INT_PRINT_DEC ", "
+		 "allow_range = " HOST_WIDE_INT_PRINT_DEC ")\n",
+		 m_name, range, allow_range);
+      return 0;
+    }
+
+  if (offset_info.min_offset >= m_offset_lower_bound
+      && offset_info.max_offset <= m_offset_upper_bound)
+    {
+      /* mov55.  */
+      extra_cost = 2;
+    }
+  else
+    {
+      if (satisfies_constraint_Is15 (GEN_INT (offset_info.min_offset
+						   - m_offset_lower_bound)))
+	{
+	  /* add.  */
+	  extra_cost = 4;
+	}
+      else
+	{
+	  /* TODO: Try m_offset_upper_bound instead of m_offset_lower_bound
+		   again.  */
+	  /* add45 + movi.  */
+	  if (satisfies_constraint_Is20 (GEN_INT (offset_info.min_offset
+						  - m_offset_lower_bound)))
+	    extra_cost = 6;
+	  else
+	    return -1; /* Give up if this constant is too large.  */
+	}
+    }
+
+  for (i = 0; i < chain_size; ++i)
+    {
+      if (m_load_only_p && !(*load_store_info)[i].load_p)
+	continue;
+
+      if (in_reg_class_p ((*load_store_info)[i].reg, m_allow_regclass))
+	gain += 2;
+    }
+
+  if (dump_file)
+    fprintf (dump_file,
+	     "%s: gain = %d extra_cost = %d\n",
+	     m_name, gain, extra_cost);
+
+  return gain - extra_cost;
+}
+
+
+void
+load_store_optimize_pass::do_optimize (
+  HARD_REG_SET *available_regset,
+  offset_info_t offset_info,
+  load_store_infos_t *load_store_info) const
+{
+  new_base_reg_info_t new_base_reg_info;
+  rtx load_store_insn;
+  unsigned new_base_regnum;
+
+  new_base_regnum  = find_available_reg (available_regset, m_new_base_regclass);
+  gcc_assert (new_base_regnum != INVALID_REGNUM);
+
+  new_base_reg_info =
+    gen_new_base ((*load_store_info)[0].base_reg,
+		  offset_info,
+		  new_base_regnum,
+		  m_offset_lower_bound, m_offset_upper_bound);
+  unsigned i;
+  rtx insn;
+  insn = emit_insn_before (new_base_reg_info.set_insns[0],
+			   (*load_store_info)[0].insn);
+  if (new_base_reg_info.n_set_insns > 1)
+    {
+      gcc_assert (new_base_reg_info.n_set_insns == 2);
+      emit_insn_before (new_base_reg_info.set_insns[1], insn);
+    }
+
+  for (i = 0; i < load_store_info->length (); ++i)
+    {
+      if (m_load_only_p && !(*load_store_info)[i].load_p)
+	continue;
+
+      if (!in_reg_class_p ((*load_store_info)[i].reg, m_allow_regclass))
+	continue;
+
+      HOST_WIDE_INT offset = (*load_store_info)[i].offset;
+
+      if (new_base_reg_info.need_adjust_offset_p)
+	offset = offset + new_base_reg_info.adjust_offset;
+
+      load_store_insn =
+	gen_reg_plus_imm_load_store ((*load_store_info)[i].reg,
+				     new_base_reg_info.reg,
+				     offset,
+				     (*load_store_info)[i].load_p,
+				     (*load_store_info)[i].mem);
+
+      emit_insn_before (load_store_insn, (*load_store_info)[i].insn);
+
+      delete_insn ((*load_store_info)[i].insn);
+    }
+
+  /* Recompute it CFG, to update BB_END() instruction.  */
+  compute_bb_for_insn ();
+}
+
+static new_base_reg_info_t
+gen_new_base (rtx original_base_reg,
+	      offset_info_t offset_info,
+	      unsigned new_base_regno,
+	      HOST_WIDE_INT offset_lower,
+	      HOST_WIDE_INT offset_upper)
+{
+  new_base_reg_info_t new_base_reg_info;
+
+  /* Use gen_raw_REG instead of gen_rtx_REG to prevent break the reg
+     info for global one.
+     For example, gen_rtx_REG will return frame_pointer_rtx immediate
+     instead of create new rtx for gen_raw_REG (Pmode, FP_REGNUM). */
+  new_base_reg_info.reg = gen_raw_REG (Pmode, new_base_regno);
+
+  /* Setup register info.  */
+  ORIGINAL_REGNO (new_base_reg_info.reg) = ORIGINAL_REGNO (original_base_reg);
+  REG_ATTRS (new_base_reg_info.reg) = REG_ATTRS (original_base_reg);
+
+  if (offset_info.max_offset <= offset_upper
+      && offset_info.min_offset >= offset_lower)
+    {
+      new_base_reg_info.set_insns[0] = gen_movsi (new_base_reg_info.reg,
+						  original_base_reg);
+      new_base_reg_info.n_set_insns = 1;
+      new_base_reg_info.need_adjust_offset_p = false;
+      new_base_reg_info.adjust_offset = 0;
+    }
+  else
+    {
+      /* For example:
+	 lwi45.fe allow -4 ~ -128 range:
+	 offset_lower = #-4
+	 offset_upper = #-128
+
+	 lwi $r2, [$r12 + #10]
+	 ->
+	 addi $r8, $r12, #14      ! $r8 = $r12 + #10 - offset_lower
+				  ! = $r12 + #10 - #-4
+				  ! = $r12 + #14
+	 lwi45.fe $r2, [$r8 - #4] ! [$r8 - #4]
+				  ! = [$r12 + #14 - #4]
+				  ! = [$r12 + #10]
+      */
+      new_base_reg_info.adjust_offset =
+	-(offset_info.min_offset - offset_lower);
+
+      rtx offset = GEN_INT (-new_base_reg_info.adjust_offset);
+
+
+      if (satisfies_constraint_Is15 (offset))
+	{
+	  new_base_reg_info.set_insns[0] =
+	    gen_addsi3(new_base_reg_info.reg,
+		       original_base_reg,
+		       offset);
+
+	  new_base_reg_info.n_set_insns = 1;
+	}
+      else
+	{
+	  if (!satisfies_constraint_Is20 (offset))
+	    gcc_unreachable ();
+
+	  new_base_reg_info.set_insns[1] =
+	    gen_rtx_SET (new_base_reg_info.reg,
+			 GEN_INT (-new_base_reg_info.adjust_offset));
+
+	  new_base_reg_info.set_insns[0] =
+	    gen_addsi3 (new_base_reg_info.reg,
+			new_base_reg_info.reg,
+			original_base_reg);
+
+	  new_base_reg_info.n_set_insns = 2;
+	}
+
+      new_base_reg_info.need_adjust_offset_p = true;
+    }
+
+  return new_base_reg_info;
+}
+
+static bool
+nds32_4byte_load_store_reg_plus_offset (
+  rtx_insn *insn,
+  load_store_info_t *load_store_info)
+{
+  if (!INSN_P (insn))
+    return false;
+
+  rtx pattern = PATTERN (insn);
+  rtx mem = NULL_RTX;
+  rtx reg = NULL_RTX;
+  rtx base_reg = NULL_RTX;
+  rtx addr;
+  HOST_WIDE_INT offset = 0;
+  bool load_p = false;
+
+  if (GET_CODE (pattern) != SET)
+    return false;
+
+  if (MEM_P (SET_SRC (pattern)))
+    {
+      mem = SET_SRC (pattern);
+      reg = SET_DEST (pattern);
+      load_p = true;
+    }
+
+  if (MEM_P (SET_DEST (pattern)))
+    {
+      mem = SET_DEST (pattern);
+      reg = SET_SRC (pattern);
+      load_p = false;
+    }
+
+  if (mem == NULL_RTX || reg == NULL_RTX || !REG_P (reg))
+    return false;
+
+  gcc_assert (REG_P (reg));
+
+  addr = XEXP (mem, 0);
+
+  /* We only care about [reg] and [reg+const].  */
+  if (REG_P (addr))
+    {
+      base_reg = addr;
+      offset = 0;
+    }
+  else if (GET_CODE (addr) == PLUS
+	   && CONST_INT_P (XEXP (addr, 1)))
+    {
+      base_reg = XEXP (addr, 0);
+      offset = INTVAL (XEXP (addr, 1));
+      if (!REG_P (base_reg))
+	return false;
+    }
+  else
+    return false;
+
+  /* At least need MIDDLE_REGS.  */
+  if (!in_reg_class_p (reg, MIDDLE_REGS))
+    return false;
+
+  /* lwi450/swi450 */
+  if (offset == 0)
+    return false;
+
+  if (in_reg_class_p (reg, LOW_REGS))
+    {
+      /* lwi37.sp/swi37.sp/lwi37/swi37 */
+      if ((REGNO (base_reg) == SP_REGNUM
+	   || REGNO (base_reg) == FP_REGNUM)
+	  && (offset >= 0 && offset < 512 && (offset % 4 == 0)))
+	return false;
+
+      /* lwi333/swi333 */
+      if (in_reg_class_p (base_reg, LOW_REGS)
+	  && (offset >= 0 && offset < 32 && (offset % 4 == 0)))
+	return false;
+    }
+
+  if (load_store_info)
+    {
+      load_store_info->load_p   = load_p;
+      load_store_info->offset   = offset;
+      load_store_info->reg      = reg;
+      load_store_info->base_reg = base_reg;
+      load_store_info->insn     = insn;
+      load_store_info->mem      = mem;
+    }
+
+  if (GET_MODE (reg) != SImode)
+    return false;
+
+  return true;
+}
+
+static bool
+nds32_4byte_load_store_reg_plus_offset_p (rtx_insn *insn)
+{
+  return nds32_4byte_load_store_reg_plus_offset (insn, NULL);
+}
+
+static bool
+nds32_load_store_opt_profitable_p (basic_block bb)
+{
+  int candidate = 0;
+  int threshold = 2;
+  rtx_insn *insn;
+
+  if (dump_file)
+    fprintf (dump_file, "scan bb %d\n", bb->index);
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (nds32_4byte_load_store_reg_plus_offset_p (insn))
+	candidate++;
+    }
+
+  if (dump_file)
+    fprintf (dump_file, " candidate = %d\n", candidate);
+
+  return candidate >= threshold;
+}
+
+static offset_info_t
+nds32_get_offset_info (auto_vec *load_store_info)
+{
+  unsigned i;
+  std::set offsets;
+  offset_info_t offset_info;
+  offset_info.max_offset = 0;
+  offset_info.min_offset = 0;
+  offset_info.num_offset = 0;
+
+  if (load_store_info->length () == 0)
+    return offset_info;
+
+  offset_info.max_offset = (*load_store_info)[0].offset;
+  offset_info.min_offset = (*load_store_info)[0].offset;
+  offsets.insert ((*load_store_info)[0].offset);
+
+  for (i = 1; i < load_store_info->length (); i++)
+    {
+      HOST_WIDE_INT offset = (*load_store_info)[i].offset;
+      offset_info.max_offset = MAX (offset_info.max_offset, offset);
+      offset_info.min_offset = MIN (offset_info.min_offset, offset);
+      offsets.insert (offset);
+    }
+
+  offset_info.num_offset = offsets.size ();
+
+  return offset_info;
+}
+
+static void
+nds32_do_load_store_opt (basic_block bb)
+{
+  rtx_insn *insn;
+  load_store_info_t load_store_info;
+  auto_vec load_store_infos[NDS32_GPR_NUM];
+  HARD_REG_SET available_regset;
+  int i;
+  unsigned j;
+  unsigned regno;
+  unsigned polluting;
+  df_ref def;
+  /* Dirty mean a register is define again after
+     first load/store instruction.
+     For example:
+
+     lwi $r2, [$r3 + #0x100]
+     mov $r3, $r4            ! $r3 is dirty after this instruction.
+     lwi $r1, [$r3 + #0x120] ! so this load can't chain with prev load.
+   */
+  bool dirty[NDS32_GPR_NUM];
+
+  if (dump_file)
+    fprintf (dump_file, "try load store opt for bb %d\n", bb->index);
+
+  for (i = 0; i < NDS32_GPR_NUM; ++i)
+    dirty[i] = false;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (!INSN_P (insn))
+	continue;
+
+      polluting = INVALID_REGNUM;
+
+      /* Set def reg is dirty if chain is not empty.  */
+      FOR_EACH_INSN_DEF (def, insn)
+	{
+	  regno = DF_REF_REGNO (def);
+
+	  if (!NDS32_IS_GPR_REGNUM (regno))
+	    continue;
+
+	  if (!load_store_infos[regno].is_empty ())
+	    {
+	      /* Set pulluting here because the source register
+		 may be the same one.  */
+	      if (dirty[regno] == false)
+		polluting = regno;
+
+	      dirty[regno] = true;
+	    }
+	}
+
+      /* Set all caller-save register is dirty if chain is not empty.  */
+      if (CALL_P (insn))
+	{
+	  for (i = 0; i < NDS32_GPR_NUM; ++i)
+	    {
+	      if (call_used_regs[i] && !load_store_infos[i].is_empty ())
+		dirty[i] = true;
+	    }
+	}
+
+      if (nds32_4byte_load_store_reg_plus_offset (insn, &load_store_info))
+	{
+	  regno = REGNO (load_store_info.base_reg);
+	  gcc_assert (NDS32_IS_GPR_REGNUM (regno));
+
+	  /* Don't add to chain if this reg is dirty.  */
+	  if (dirty[regno] && polluting != regno)
+	    break;
+
+	  /* If the register is first time to be used and be polluted
+	     right away, we don't push it.  */
+	  if (regno == REGNO (load_store_info.reg) && load_store_info.load_p
+	      && dirty[regno] == false)
+	    continue;
+
+	  load_store_infos[regno].safe_push (load_store_info);
+	}
+    }
+  for (i = 0; i < NDS32_GPR_NUM; ++i)
+    {
+      if (load_store_infos[i].length () <= 1)
+	{
+	  if (dump_file && load_store_infos[i].length () == 1)
+	    fprintf (dump_file,
+		     "Skip Chain for $r%d since chain size only 1\n",
+		     i);
+	  continue;
+	}
+
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "Chain for $r%d: (size = %u)\n",
+		   i, load_store_infos[i].length ());
+
+	  for (j = 0; j < load_store_infos[i].length (); ++j)
+	    {
+	      fprintf (dump_file,
+		       "regno = %d base_regno = %d "
+		       "offset = " HOST_WIDE_INT_PRINT_DEC " "
+		       "load_p = %d UID = %u\n",
+		       REGNO (load_store_infos[i][j].reg),
+		       REGNO (load_store_infos[i][j].base_reg),
+		       load_store_infos[i][j].offset,
+		       load_store_infos[i][j].load_p,
+		       INSN_UID (load_store_infos[i][j].insn));
+	    }
+	}
+
+      nds32_get_available_reg_set (bb,
+				   load_store_infos[i][0].insn,
+				   load_store_infos[i].last ().insn,
+				   &available_regset);
+
+      if (dump_file)
+	{
+	  print_hard_reg_set (dump_file, "", available_regset);
+	}
+
+      offset_info_t offset_info = nds32_get_offset_info (&load_store_infos[i]);
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "max offset = " HOST_WIDE_INT_PRINT_DEC "\n"
+		   "min offset = " HOST_WIDE_INT_PRINT_DEC "\n"
+		   "num offset = %d\n",
+		   offset_info.max_offset,
+		   offset_info.min_offset,
+		   offset_info.num_offset);
+	}
+
+      int gain;
+      int best_gain = 0;
+      const load_store_optimize_pass *best_load_store_optimize_pass = NULL;
+
+      for (j = 0; j < N_LOAD_STORE_OPT_TYPE; ++j)
+	{
+	  gain = load_store_optimizes[j]->calc_gain (&available_regset,
+						     offset_info,
+						     &load_store_infos[i]);
+
+	  if (dump_file)
+	    fprintf (dump_file, "%s gain = %d\n",
+		     load_store_optimizes[j]->name (), gain);
+
+	  if (gain > best_gain)
+	    {
+	      best_gain = gain;
+	      best_load_store_optimize_pass = load_store_optimizes[j];
+	    }
+	}
+
+      if (best_load_store_optimize_pass)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "%s is most profit, optimize it!\n",
+		     best_load_store_optimize_pass->name ());
+
+	  best_load_store_optimize_pass->do_optimize (&available_regset,
+						      offset_info,
+						      &load_store_infos[i]);
+
+	  df_insn_rescan_all ();
+	}
+
+    }
+}
+
+static unsigned int
+nds32_load_store_opt (void)
+{
+  basic_block bb;
+
+  df_set_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
+  df_analyze ();
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      if (nds32_load_store_opt_profitable_p (bb))
+	nds32_do_load_store_opt (bb);
+    }
+
+  return 1;
+}
+
+const pass_data pass_data_nds32_load_store_opt =
+{
+  RTL_PASS,				/* type */
+  "load_store_opt",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_df_finish,			/* todo_flags_finish */
+};
+
+class pass_nds32_load_store_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_load_store_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_load_store_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) { return TARGET_16_BIT && TARGET_LOAD_STORE_OPT; }
+  unsigned int execute (function *) { return nds32_load_store_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_load_store_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_load_store_opt (ctxt);
+}
diff --git a/gcc/config/nds32/nds32-load-store-opt.h b/gcc/config/nds32/nds32-load-store-opt.h
new file mode 100644
index 0000000..f94b56a
--- /dev/null
+++ b/gcc/config/nds32/nds32-load-store-opt.h
@@ -0,0 +1,117 @@
+/* Prototypes for load-store-opt of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+#ifndef NDS32_LOAD_STORE_OPT_H
+#define NDS32_LOAD_STORE_OPT_H
+
+/* Define the type of a set of hard registers.  */
+
+enum nds32_memory_post_type
+{
+  NDS32_NONE,
+  NDS32_POST_INC,
+  NDS32_POST_DEC
+};
+
+typedef struct {
+  rtx reg;
+  rtx base_reg;
+  rtx offset;
+  HOST_WIDE_INT shift;
+  bool load_p;
+  rtx insn;
+} rr_load_store_info_t;
+
+typedef struct {
+  rtx reg;
+  rtx base_reg;
+  HOST_WIDE_INT offset;
+  bool load_p;
+  rtx_insn *insn;
+  rtx mem;
+  int new_reg;
+  int order;
+  int group;
+  bool place;
+  enum nds32_memory_post_type post_type;
+} load_store_info_t;
+
+typedef struct {
+  HOST_WIDE_INT max_offset;
+  HOST_WIDE_INT min_offset;
+  /* How many different offset.  */
+  int num_offset;
+} offset_info_t;
+
+typedef struct {
+  rtx set_insns[2];
+  int n_set_insns;
+  rtx reg;
+  bool need_adjust_offset_p;
+  HOST_WIDE_INT adjust_offset;
+} new_base_reg_info_t;
+
+typedef struct {
+  unsigned int amount;
+  unsigned int start;
+  unsigned int end;
+} available_reg_info_t;
+
+typedef auto_vec load_store_infos_t;
+
+class load_store_optimize_pass
+{
+public:
+  load_store_optimize_pass (enum reg_class,
+			    enum reg_class,
+			    HOST_WIDE_INT,
+			    HOST_WIDE_INT,
+			    bool,
+			    const char *);
+  const char *name () const { return m_name; };
+  int calc_gain (HARD_REG_SET *,
+		 offset_info_t,
+		 load_store_infos_t *) const;
+  void do_optimize (HARD_REG_SET *,
+		    offset_info_t,
+		    load_store_infos_t *) const;
+private:
+  enum reg_class m_allow_regclass;
+  enum reg_class m_new_base_regclass;
+  HOST_WIDE_INT m_offset_lower_bound;
+  HOST_WIDE_INT m_offset_upper_bound;
+  bool m_load_only_p;
+  const char *m_name;
+};
+
+static inline rtx
+gen_reg_plus_imm_load_store (rtx reg, rtx base_reg,
+			     HOST_WIDE_INT offset, bool load_p, rtx oldmem)
+{
+  rtx addr = plus_constant(Pmode, base_reg, offset);
+  rtx mem = gen_rtx_MEM (SImode, addr);
+  MEM_COPY_ATTRIBUTES (mem, oldmem);
+  if (load_p)
+    return gen_movsi (reg, mem);
+  else
+    return gen_movsi (mem, reg);
+}
+
+#endif /* ! NDS32_LOAD_STORE_OPT_H */
diff --git a/gcc/config/nds32/nds32-md-auxiliary.c b/gcc/config/nds32/nds32-md-auxiliary.c
index def8eda..3881df7 100644
--- a/gcc/config/nds32/nds32-md-auxiliary.c
+++ b/gcc/config/nds32/nds32-md-auxiliary.c
@@ -25,17 +25,74 @@
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
-#include "target.h"
-#include "rtl.h"
 #include "tree.h"
-#include "tm_p.h"
-#include "optabs.h"		/* For GEN_FCN.  */
-#include "recog.h"
+#include "rtl.h"
+#include "df.h"
+#include "alias.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "regs.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
 #include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "insn-config.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "tm_p.h"
 #include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "builtins.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* This file is divided into three parts:
+
+     PART 1: Auxiliary static function definitions.
+
+     PART 2: Auxiliary function for expand RTL pattern.
+
+     PART 3: Auxiliary function for output asm template.  */
 
 /* ------------------------------------------------------------------------ */
 
+/* PART 1: Auxiliary static function definitions.  */
+
+static int
+nds32_regno_to_enable4 (unsigned regno)
+{
+  switch (regno)
+    {
+    case 28: /* $r28/fp */
+      return 0x8;
+    case 29: /* $r29/gp */
+      return 0x4;
+    case 30: /* $r30/lp */
+      return 0x2;
+    case 31: /* $r31/sp */
+      return 0x1;
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* A helper function to return character based on byte size.  */
 static char
 nds32_byte_to_size (int byte)
@@ -54,796 +111,3825 @@ nds32_byte_to_size (int byte)
     }
 }
 
-/* A helper function to return memory format.  */
-enum nds32_16bit_address_type
-nds32_mem_format (rtx op)
+static int
+nds32_inverse_cond_code (int code)
 {
-  machine_mode mode_test;
-  int val;
-  int regno;
-
-  if (!TARGET_16_BIT)
-    return ADDRESS_NOT_16BIT_FORMAT;
-
-  mode_test = GET_MODE (op);
-
-  op = XEXP (op, 0);
+  switch (code)
+    {
+      case NE:
+	return EQ;
+      case EQ:
+	return NE;
+      case GT:
+	return LE;
+      case LE:
+	return GT;
+      case GE:
+	return LT;
+      case LT:
+	return GE;
+      default:
+	gcc_unreachable ();
+    }
+}
 
-  /* 45 format.  */
-  if (GET_CODE (op) == REG && (mode_test == SImode))
-    return ADDRESS_REG;
+static const char *
+nds32_cond_code_str (int code)
+{
+  switch (code)
+    {
+      case NE:
+	return "ne";
+      case EQ:
+	return "eq";
+      case GT:
+	return "gt";
+      case LE:
+	return "le";
+      case GE:
+	return "ge";
+      case LT:
+	return "lt";
+      default:
+	gcc_unreachable ();
+    }
+}
 
-  /* 333 format for QI/HImode.  */
-  if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM))
-    return ADDRESS_LO_REG_IMM3U;
+static void
+output_cond_branch (int code, const char *suffix, bool r5_p,
+		    bool long_jump_p, rtx *operands)
+{
+  char pattern[256];
+  const char *cond_code;
+  bool align_p = NDS32_ALIGN_P ();
+  const char *align = align_p ? "\t.align\t2\n" : "";
 
-  /* post_inc 333 format.  */
-  if ((GET_CODE (op) == POST_INC) && (mode_test == SImode))
+  if (r5_p && REGNO (operands[2]) == 5 && TARGET_16_BIT)
     {
-      regno = REGNO(XEXP (op, 0));
-
-      if (regno < 8)
-	return ADDRESS_POST_INC_LO_REG_IMM3U;
+      /* This is special case for beqs38 and bnes38,
+	 second operand 2 can't be $r5 and it's almost meanless,
+	 however it may occur after copy propgation.  */
+      if (code == EQ)
+	{
+	  /* $r5 == $r5 always taken! */
+	  if (long_jump_p)
+	    snprintf (pattern, sizeof (pattern),
+		      "j\t%%3");
+	  else
+	    snprintf (pattern, sizeof (pattern),
+		      "j8\t%%3");
+	}
+      else
+	/* Don't output anything since $r5 != $r5 never taken! */
+	pattern[0] = '\0';
     }
-
-  /* post_inc 333 format.  */
-  if ((GET_CODE (op) == POST_MODIFY)
-      && (mode_test == SImode)
-      && (REG_P (XEXP (XEXP (op, 1), 0)))
-      && (CONST_INT_P (XEXP (XEXP (op, 1), 1))))
+  else if (long_jump_p)
     {
-      regno = REGNO (XEXP (XEXP (op, 1), 0));
-      val = INTVAL (XEXP (XEXP (op, 1), 1));
-      if (regno < 8 && val < 32)
-	return ADDRESS_POST_INC_LO_REG_IMM3U;
+      int inverse_code = nds32_inverse_cond_code (code);
+      cond_code = nds32_cond_code_str (inverse_code);
+
+      /*      b  $r0, $r1, .L0
+	    =>
+	      b  $r0, $r1, .LCB0
+	      j  .L0
+	    .LCB0:
+
+	    or
+
+	      b