diff -rNubwp gcc-8.3.0/gcc/DATESTAMP gcc-8.4.0/gcc/DATESTAMP --- gcc-8.3.0/gcc/DATESTAMP 2019-02-21 16:16:15.000000000 -0800 +++ gcc-8.4.0/gcc/DATESTAMP 2020-03-04 00:30:00.000000000 -0800 @@ -1 +1 @@ -20190222 +20200304 diff -rNubwp gcc-8.3.0/gcc/Makefile.in gcc-8.4.0/gcc/Makefile.in --- gcc-8.3.0/gcc/Makefile.in 2018-03-09 07:24:44.000000000 -0800 +++ gcc-8.4.0/gcc/Makefile.in 2020-03-04 00:30:00.000000000 -0800 @@ -1114,6 +1114,7 @@ endif # Support for additional languages (other than C). diff -rNubwp gcc-8.3.0/gcc/config/pa/pa-linux.h gcc-8.4.0/gcc/config/pa/pa-linux.h --- gcc-8.3.0/gcc/config/pa/pa-linux.h 2018-05-27 14:54:27.000000000 -0700 +++ gcc-8.4.0/gcc/config/pa/pa-linux.h 2020-03-04 00:30:00.000000000 -0800 @@ -101,7 +101,7 @@ along with GCC; see the file COPYING3. /* FIXME: Hacked from the one so that we avoid multiple labels in a function declaration (since pa.c seems determined to do - it differently) */ + it differently). */ #undef ASM_DECLARE_FUNCTION_NAME #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ @@ -109,9 +109,14 @@ along with GCC; see the file COPYING3. { \ ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ + pa_output_function_label (FILE); \ } \ while (0) +/* Output function prologue for linux. */ +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE pa_linux_output_function_prologue + /* As well as globalizing the label, we need to encode the label to ensure a plabel is generated in an indirect call. */ diff -rNubwp gcc-8.3.0/gcc/config/pa/pa-protos.h gcc-8.4.0/gcc/config/pa/pa-protos.h --- gcc-8.3.0/gcc/config/pa/pa-protos.h 2018-01-16 06:47:49.000000000 -0800 +++ gcc-8.4.0/gcc/config/pa/pa-protos.h 2020-03-04 00:30:00.000000000 -0800 @@ -108,5 +108,6 @@ extern void pa_asm_output_aligned_local extern void pa_hpux_asm_output_external (FILE *, tree, const char *); extern HOST_WIDE_INT pa_initial_elimination_offset (int, int); extern HOST_WIDE_INT pa_function_arg_size (machine_mode, const_tree); +extern void pa_output_function_label (FILE *); extern const int pa_magic_milli[]; diff -rNubwp gcc-8.3.0/gcc/config/pa/pa.c gcc-8.4.0/gcc/config/pa/pa.c --- gcc-8.3.0/gcc/config/pa/pa.c 2018-07-29 08:54:08.000000000 -0700 +++ gcc-8.4.0/gcc/config/pa/pa.c 2020-03-04 00:30:00.000000000 -0800 @@ -118,11 +118,11 @@ static void set_reg_plus_d (int, int, HO static rtx pa_function_value (const_tree, const_tree, bool); static rtx pa_libcall_value (machine_mode, const_rtx); static bool pa_function_value_regno_p (const unsigned int); -static void pa_output_function_prologue (FILE *); +static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED; +static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED; static void update_total_code_bytes (unsigned int); static void pa_output_function_epilogue (FILE *); static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); -static int pa_adjust_priority (rtx_insn *, int); static int pa_issue_rate (void); static int pa_reloc_rw_mask (void); static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED; @@ -263,8 +263,6 @@ static size_t n_deferred_plabels = 0; #undef TARGET_ASM_INTEGER #define TARGET_ASM_INTEGER pa_assemble_integer -#undef TARGET_ASM_FUNCTION_PROLOGUE -#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue #undef TARGET_ASM_FUNCTION_EPILOGUE #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue @@ -280,8 +278,6 @@ static size_t n_deferred_plabels = 0; #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST pa_adjust_cost -#undef TARGET_SCHED_ADJUST_PRIORITY -#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE pa_issue_rate @@ -3842,16 +3838,10 @@ pa_compute_frame_size (poly_int64 size, & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)); } -/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block - of memory. If any fpu reg is used in the function, we allocate - such a block here, at the bottom of the frame, just in case it's needed. - - If this function is a leaf procedure, then we may choose not - to do a "save" insn. The decision about whether or not - to do this is made in regclass.c. */ +/* Output function label, and associated .PROC and .CALLINFO statements. */ -static void -pa_output_function_prologue (FILE *file) +void +pa_output_function_label (FILE *file) { /* The function's label and associated .PROC must never be separated and must be output *after* any profiling declarations @@ -3897,7 +3887,22 @@ pa_output_function_prologue (FILE *file) fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); fputs ("\n\t.ENTRY\n", file); +} + +/* Output function prologue. */ + +static void +pa_output_function_prologue (FILE *file) +{ + pa_output_function_label (file); + remove_useless_addtr_insns (0); +} + +/* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */ +static void +pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED) +{ remove_useless_addtr_insns (0); } @@ -4569,10 +4574,6 @@ output_deferred_profile_counters (void) void hppa_profile_hook (int label_no) { - /* We use SImode for the address of the function in both 32 and - 64-bit code to avoid having to provide DImode versions of the - lcla2 and load_offset_label_address insn patterns. */ - rtx reg = gen_reg_rtx (SImode); rtx_code_label *label_rtx = gen_label_rtx (); int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE); rtx arg_bytes, begin_label_rtx, mcount, sym; @@ -4604,18 +4605,13 @@ hppa_profile_hook (int label_no) if (!use_mcount_pcrel_call) { /* The address of the function is loaded into %r25 with an instruction- - relative sequence that avoids the use of relocations. The sequence - is split so that the load_offset_label_address instruction can - occupy the delay slot of the call to _mcount. */ + relative sequence that avoids the use of relocations. We use SImode + for the address of the function in both 32 and 64-bit code to avoid + having to provide DImode versions of the lcla2 pattern. */ if (TARGET_PA_20) - emit_insn (gen_lcla2 (reg, label_rtx)); + emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx)); else - emit_insn (gen_lcla1 (reg, label_rtx)); - - emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), - reg, - begin_label_rtx, - label_rtx)); + emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx)); } if (!NO_DEFERRED_PROFILE_COUNTERS) @@ -4992,37 +4988,6 @@ pa_adjust_cost (rtx_insn *insn, int dep_ } } -/* Adjust scheduling priorities. We use this to try and keep addil - and the next use of %r1 close together. */ -static int -pa_adjust_priority (rtx_insn *insn, int priority) -{ - rtx set = single_set (insn); - rtx src, dest; - if (set) - { - src = SET_SRC (set); - dest = SET_DEST (set); - if (GET_CODE (src) == LO_SUM - && symbolic_operand (XEXP (src, 1), VOIDmode) - && ! read_only_operand (XEXP (src, 1), VOIDmode)) - priority >>= 3; - - else if (GET_CODE (src) == MEM - && GET_CODE (XEXP (src, 0)) == LO_SUM - && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode) - && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode)) - priority >>= 1; - - else if (GET_CODE (dest) == MEM - && GET_CODE (XEXP (dest, 0)) == LO_SUM - && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode) - && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)) - priority >>= 3; - } - return priority; -} - /* The 700 can only issue a single insn at a time. The 7XXX processors can issue two insns at a time. The 8000 can issue 4 insns at a time. */ @@ -7888,7 +7853,7 @@ pa_attr_length_call (rtx_insn *insn, int /* 64-bit plabel sequence. */ else if (TARGET_64BIT && !local_call) - length += sibcall ? 28 : 24; + length += 24; /* non-pic long absolute branch sequence. */ else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) @@ -7960,39 +7925,25 @@ pa_output_call (rtx_insn *insn, rtx call xoperands[0] = pa_get_deferred_plabel (call_dest); xoperands[1] = gen_label_rtx (); - /* If this isn't a sibcall, we put the load of %r27 into the - delay slot. We can't do this in a sibcall as we don't - have a second call-clobbered scratch register available. - We don't need to do anything when generating fast indirect - calls. */ - if (seq_length != 0 && !sibcall) + /* Put the load of %r27 into the delay slot. We don't need to + do anything when generating fast indirect calls. */ + if (seq_length != 0) { final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, NULL); /* Now delete the delay insn. */ SET_INSN_DELETED (NEXT_INSN (insn)); - seq_length = 0; } output_asm_insn ("addil LT'%0,%%r27", xoperands); output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); - - if (sibcall) - { - output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); - output_asm_insn ("ldd 16(%%r1),%%r1", xoperands); - output_asm_insn ("bve (%%r1)", xoperands); - } - else - { output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); output_asm_insn ("bve,l (%%r2),%%r2", xoperands); output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); seq_length = 1; } - } else { int indirect_call = 0; @@ -8084,20 +8035,22 @@ pa_output_call (rtx_insn *insn, rtx call { output_asm_insn ("addil LT'%0,%%r19", xoperands); output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); - output_asm_insn ("ldw 0(%%r1),%%r1", xoperands); + output_asm_insn ("ldw 0(%%r1),%%r22", xoperands); } else { output_asm_insn ("addil LR'%0-$global$,%%r27", xoperands); - output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1", + output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22", xoperands); } - output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands); - output_asm_insn ("depi 0,31,2,%%r1", xoperands); - output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands); - output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands); + output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r22", xoperands); + /* Should this be an ordered load to ensure the target + address is loaded before the global pointer? */ + output_asm_insn ("ldw 0(%%r22),%%r1", xoperands); + output_asm_insn ("ldw 4(%%r22),%%r19", xoperands); if (!sibcall && !TARGET_PA_20) { @@ -8190,10 +8143,6 @@ pa_attr_length_indirect_call (rtx_insn * if (TARGET_PORTABLE_RUNTIME) return 16; - /* Inline version of $$dyncall. */ - if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size) - return 20; - if (!TARGET_LONG_CALLS && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) || distance < MAX_PCREL17F_OFFSET)) @@ -8203,12 +8152,15 @@ pa_attr_length_indirect_call (rtx_insn * if (!flag_pic) return 12; - /* Inline version of $$dyncall. */ - if (TARGET_NO_SPACE_REGS || TARGET_PA_20) - return 20; - + /* Inline versions of $$dyncall. */ if (!optimize_size) - return 36; + { + if (TARGET_NO_SPACE_REGS) + return 28; + + if (TARGET_PA_20) + return 32; + } /* Long PIC pc-relative call. */ return 20; @@ -8246,22 +8198,6 @@ pa_output_indirect_call (rtx_insn *insn, return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)"; } - /* Maybe emit a fast inline version of $$dyncall. */ - if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size) - { - output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t" - "ldw 2(%%r22),%%r19\n\t" - "ldw -2(%%r22),%%r22", xoperands); - pa_output_arg_descriptor (insn); - if (TARGET_NO_SPACE_REGS) - { - if (TARGET_PA_20) - return "bve,l,n (%%r22),%%r2\n\tnop"; - return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; - } - return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)"; - } - /* Now the normal case -- we can reach $$dyncall directly or we're sure that we can get there via a long-branch stub. @@ -8290,33 +8226,38 @@ pa_output_indirect_call (rtx_insn *insn, return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; } - /* Maybe emit a fast inline version of $$dyncall. The long PIC - pc-relative call sequence is five instructions. The inline PA 2.0 - version of $$dyncall is also five instructions. The PA 1.X versions - are longer but still an overall win. */ - if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size) - { - output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t" - "ldw 2(%%r22),%%r19\n\t" - "ldw -2(%%r22),%%r22", xoperands); + /* The long PIC pc-relative call sequence is five instructions. So, + let's use an inline version of $$dyncall when the calling sequence + has a roughly similar number of instructions and we are not optimizing + for size. We need two instructions to load the return pointer plus + the $$dyncall implementation. */ + if (!optimize_size) + { if (TARGET_NO_SPACE_REGS) { pa_output_arg_descriptor (insn); - if (TARGET_PA_20) - return "bve,l,n (%%r22),%%r2\n\tnop"; - return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; + output_asm_insn ("bl .+8,%%r2\n\t" + "ldo 20(%%r2),%%r2\n\t" + "extru,<> %%r22,30,1,%%r0\n\t" + "bv,n %%r0(%%r22)\n\t" + "ldw -2(%%r22),%%r21\n\t" + "bv %%r0(%%r21)\n\t" + "ldw 2(%%r22),%%r19", xoperands); + return ""; } if (TARGET_PA_20) { pa_output_arg_descriptor (insn); - return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)"; - } output_asm_insn ("bl .+8,%%r2\n\t" - "ldo 16(%%r2),%%r2\n\t" - "ldsid (%%r22),%%r1\n\t" - "mtsp %%r1,%%sr0", xoperands); - pa_output_arg_descriptor (insn); - return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)"; + "ldo 24(%%r2),%%r2\n\t" + "stw %%r2,-24(%%sp)\n\t" + "extru,<> %r22,30,1,%%r0\n\t" + "bve,n (%%r22)\n\t" + "ldw -2(%%r22),%%r21\n\t" + "bve (%%r21)\n\t" + "ldw 2(%%r22),%%r19", xoperands); + return ""; + } } /* We need a long PIC call to $$dyncall. */ @@ -9398,7 +9339,7 @@ pa_function_value (const_tree valtype, HOST_WIDE_INT valsize = int_size_in_bytes (valtype); /* Handle aggregates that fit exactly in a word or double word. */ - if ((valsize & (UNITS_PER_WORD - 1)) == 0) + if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD) return gen_rtx_REG (TYPE_MODE (valtype), 28); if (TARGET_64BIT) @@ -10039,10 +9980,11 @@ pa_can_change_mode_class (machine_mode f /* There is no way to load QImode or HImode values directly from memory to a FP register. SImode loads to the FP registers are not zero extended. On the 64-bit target, this conflicts with the definition - of LOAD_EXTEND_OP. Thus, we can't allow changing between modes with - different sizes in the floating-point registers. */ + of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers + except for DImode to SImode on the 64-bit target. It is handled by + register renaming in pa_print_operand. */ if (MAYBE_FP_REG_CLASS_P (rclass)) - return false; + return TARGET_64BIT && from == DImode && to == SImode; /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word in specific sets of registers. Thus, we cannot allow changing @@ -10076,7 +10018,7 @@ pa_modes_tieable_p (machine_mode mode1, /* Length in units of the trampoline instruction code. */ -#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) +#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48)) /* Output assembler code for a block containing the constant parts @@ -10097,27 +10039,46 @@ pa_asm_trampoline_template (FILE *f) { if (!TARGET_64BIT) { - fputs ("\tldw 36(%r22),%r21\n", f); - fputs ("\tbb,>=,n %r21,30,.+16\n", f); - if (ASSEMBLER_DIALECT == 0) - fputs ("\tdepi 0,31,2,%r21\n", f); - else - fputs ("\tdepwi 0,31,2,%r21\n", f); - fputs ("\tldw 4(%r21),%r19\n", f); - fputs ("\tldw 0(%r21),%r21\n", f); if (TARGET_PA_20) { + fputs ("\tmfia %r20\n", f); + fputs ("\tldw 48(%r20),%r22\n", f); + fputs ("\tcopy %r22,%r21\n", f); + fputs ("\tbb,>=,n %r22,30,.+16\n", f); + fputs ("\tdepwi 0,31,2,%r22\n", f); + fputs ("\tldw 0(%r22),%r21\n", f); + fputs ("\tldw 4(%r22),%r19\n", f); fputs ("\tbve (%r21)\n", f); - fputs ("\tldw 40(%r22),%r29\n", f); + fputs ("\tldw 52(%r1),%r29\n", f); + fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); } else { + if (ASSEMBLER_DIALECT == 0) + { + fputs ("\tbl .+8,%r20\n", f); + fputs ("\tdepi 0,31,2,%r20\n", f); + } + else + { + fputs ("\tb,l .+8,%r20\n", f); + fputs ("\tdepwi 0,31,2,%r20\n", f); + } + fputs ("\tldw 40(%r20),%r22\n", f); + fputs ("\tcopy %r22,%r21\n", f); + fputs ("\tbb,>=,n %r22,30,.+16\n", f); + if (ASSEMBLER_DIALECT == 0) + fputs ("\tdepi 0,31,2,%r22\n", f); + else + fputs ("\tdepwi 0,31,2,%r22\n", f); + fputs ("\tldw 0(%r22),%r21\n", f); + fputs ("\tldw 4(%r22),%r19\n", f); fputs ("\tldsid (%r21),%r1\n", f); fputs ("\tmtsp %r1,%sr0\n", f); fputs ("\tbe 0(%sr0,%r21)\n", f); - fputs ("\tldw 40(%r22),%r29\n", f); + fputs ("\tldw 44(%r20),%r29\n", f); } fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); @@ -10131,11 +10092,11 @@ pa_asm_trampoline_template (FILE *f) fputs ("\t.dword 0\n", f); fputs ("\t.dword 0\n", f); fputs ("\tmfia %r31\n", f); - fputs ("\tldd 24(%r31),%r1\n", f); - fputs ("\tldd 24(%r1),%r27\n", f); - fputs ("\tldd 16(%r1),%r1\n", f); - fputs ("\tbve (%r1)\n", f); + fputs ("\tldd 24(%r31),%r27\n", f); fputs ("\tldd 32(%r31),%r31\n", f); + fputs ("\tldd 16(%r27),%r1\n", f); + fputs ("\tbve (%r1)\n", f); + fputs ("\tldd 24(%r27),%r27\n", f); fputs ("\t.dword 0 ; fptr\n", f); fputs ("\t.dword 0 ; static link\n", f); } @@ -10145,10 +10106,10 @@ pa_asm_trampoline_template (FILE *f) FNADDR is an RTX for the address of the function's pure code. CXT is an RTX for the static chain value for the function. - Move the function address to the trampoline template at offset 36. - Move the static chain value to trampoline template at offset 40. - Move the trampoline address to trampoline template at offset 44. - Move r19 to trampoline template at offset 48. The latter two + Move the function address to the trampoline template at offset 48. + Move the static chain value to trampoline template at offset 52. + Move the trampoline address to trampoline template at offset 56. + Move r19 to trampoline template at offset 60. The latter two words create a plabel for the indirect call to the trampoline. A similar sequence is used for the 64-bit port but the plabel is @@ -10174,15 +10135,15 @@ pa_trampoline_init (rtx m_tramp, tree fn if (!TARGET_64BIT) { - tmp = adjust_address (m_tramp, Pmode, 36); + tmp = adjust_address (m_tramp, Pmode, 48); emit_move_insn (tmp, fnaddr); - tmp = adjust_address (m_tramp, Pmode, 40); + tmp = adjust_address (m_tramp, Pmode, 52); emit_move_insn (tmp, chain_value); /* Create a fat pointer for the trampoline. */ - tmp = adjust_address (m_tramp, Pmode, 44); + tmp = adjust_address (m_tramp, Pmode, 56); emit_move_insn (tmp, r_tramp); - tmp = adjust_address (m_tramp, Pmode, 48); + tmp = adjust_address (m_tramp, Pmode, 60); emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); /* fdc and fic only use registers for the address to flush, @@ -10234,20 +10195,20 @@ pa_trampoline_init (rtx m_tramp, tree fn } #ifdef HAVE_ENABLE_EXECUTE_STACK -  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); #endif } /* Perform any machine-specific adjustment in the address of the trampoline. ADDR contains the address that was passed to pa_trampoline_init. - Adjust the trampoline address to point to the plabel at offset 44. */ + Adjust the trampoline address to point to the plabel at offset 56. */ static rtx pa_trampoline_adjust_address (rtx addr) { if (!TARGET_64BIT) - addr = memory_address (Pmode, plus_constant (Pmode, addr, 46)); + addr = memory_address (Pmode, plus_constant (Pmode, addr, 58)); return addr; } diff -rNubwp gcc-8.3.0/gcc/config/pa/pa.h gcc-8.4.0/gcc/config/pa/pa.h --- gcc-8.3.0/gcc/config/pa/pa.h 2018-07-29 08:54:08.000000000 -0700 +++ gcc-8.4.0/gcc/config/pa/pa.h 2020-03-04 00:30:00.000000000 -0800 @@ -689,7 +689,7 @@ extern int may_call_alloca; /* Length in units of the trampoline for entering a nested function. */ -#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52) +#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 64) /* Alignment required by the trampoline. */ @@ -1293,12 +1293,14 @@ do { \ #endif /* The maximum offset in bytes for a PA 1.X pc-relative call to the - head of the preceding stub table. The selected offsets have been - chosen so that approximately one call stub is allocated for every - 86.7 instructions. A long branch stub is two instructions when - not generating PIC code. For HP-UX and ELF targets, PIC stubs are - seven and four instructions, respectively. */ -#define MAX_PCREL17F_OFFSET \ - (flag_pic ? (TARGET_HPUX ? 198164 : 221312) : 240000) + head of the preceding stub table. A long branch stub is two or three + instructions for non-PIC and PIC, respectively. Import stubs are + seven and five instructions for HP-UX and ELF targets, respectively. + The default stub group size for ELF targets is 217856 bytes. + FIXME: We need an option to set the maximum offset. */ +#define MAX_PCREL17F_OFFSET (TARGET_HPUX ? 198164 : 217856) #define NEED_INDICATE_EXEC_STACK 0 + +/* Output default function prologue for hpux. */ +#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue diff -rNubwp gcc-8.3.0/gcc/config/pa/pa.md gcc-8.4.0/gcc/config/pa/pa.md --- gcc-8.3.0/gcc/config/pa/pa.md 2018-09-19 18:07:42.000000000 -0700 +++ gcc-8.4.0/gcc/config/pa/pa.md 2020-03-04 00:30:00.000000000 -0800 @@ -765,7 +765,7 @@ (define_insn "scc" [(set (match_operand:SI 0 "register_operand" "=r") - (match_operator:SI 3 "comparison_operator" + (match_operator:SI 3 "ordered_comparison_operator" [(match_operand:SI 1 "reg_or_0_operand" "rM") (match_operand:SI 2 "arith11_operand" "rI")]))] "" @@ -775,7 +775,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") - (match_operator:DI 3 "comparison_operator" + (match_operator:DI 3 "ordered_comparison_operator" [(match_operand:DI 1 "reg_or_0_operand" "rM") (match_operand:DI 2 "arith11_operand" "rI")]))] "TARGET_64BIT" @@ -785,10 +785,10 @@ (define_insn "iorscc" [(set (match_operand:SI 0 "register_operand" "=r") - (ior:SI (match_operator:SI 3 "comparison_operator" + (ior:SI (match_operator:SI 3 "ordered_comparison_operator" [(match_operand:SI 1 "reg_or_0_operand" "rM") (match_operand:SI 2 "arith11_operand" "rI")]) - (match_operator:SI 6 "comparison_operator" + (match_operator:SI 6 "ordered_comparison_operator" [(match_operand:SI 4 "reg_or_0_operand" "rM") (match_operand:SI 5 "arith11_operand" "rI")])))] "" @@ -798,10 +798,10 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") - (ior:DI (match_operator:DI 3 "comparison_operator" + (ior:DI (match_operator:DI 3 "ordered_comparison_operator" [(match_operand:DI 1 "reg_or_0_operand" "rM") (match_operand:DI 2 "arith11_operand" "rI")]) - (match_operator:DI 6 "comparison_operator" + (match_operator:DI 6 "ordered_comparison_operator" [(match_operand:DI 4 "reg_or_0_operand" "rM") (match_operand:DI 5 "arith11_operand" "rI")])))] "TARGET_64BIT" @@ -813,7 +813,7 @@ ;; from an scc insn (negscc and incscc). (define_insn "negscc" [(set (match_operand:SI 0 "register_operand" "=r") - (neg:SI (match_operator:SI 3 "comparison_operator" + (neg:SI (match_operator:SI 3 "ordered_comparison_operator" [(match_operand:SI 1 "reg_or_0_operand" "rM") (match_operand:SI 2 "arith11_operand" "rI")])))] "" @@ -823,7 +823,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") - (neg:DI (match_operator:DI 3 "comparison_operator" + (neg:DI (match_operator:DI 3 "ordered_comparison_operator" [(match_operand:DI 1 "reg_or_0_operand" "rM") (match_operand:DI 2 "arith11_operand" "rI")])))] "TARGET_64BIT" @@ -904,7 +904,7 @@ (define_insn "incscc" [(set (match_operand:SI 0 "register_operand" "=r,r") - (plus:SI (match_operator:SI 4 "comparison_operator" + (plus:SI (match_operator:SI 4 "ordered_comparison_operator" [(match_operand:SI 2 "register_operand" "r,r") (match_operand:SI 3 "arith11_operand" "rI,rI")]) (match_operand:SI 1 "register_operand" "0,?r")))] @@ -917,7 +917,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r,r") - (plus:DI (match_operator:DI 4 "comparison_operator" + (plus:DI (match_operator:DI 4 "ordered_comparison_operator" [(match_operand:DI 2 "register_operand" "r,r") (match_operand:DI 3 "arith11_operand" "rI,rI")]) (match_operand:DI 1 "register_operand" "0,?r")))] @@ -1062,7 +1062,7 @@ (define_insn "decscc" [(set (match_operand:SI 0 "register_operand" "=r,r") (minus:SI (match_operand:SI 1 "register_operand" "0,?r") - (match_operator:SI 4 "comparison_operator" + (match_operator:SI 4 "ordered_comparison_operator" [(match_operand:SI 2 "register_operand" "r,r") (match_operand:SI 3 "arith11_operand" "rI,rI")])))] "" @@ -1075,7 +1075,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r,r") (minus:DI (match_operand:DI 1 "register_operand" "0,?r") - (match_operator:DI 4 "comparison_operator" + (match_operator:DI 4 "ordered_comparison_operator" [(match_operand:DI 2 "register_operand" "r,r") (match_operand:DI 3 "arith11_operand" "rI,rI")])))] "TARGET_64BIT" @@ -1242,7 +1242,7 @@ (define_expand "movsicc" [(set (match_operand:SI 0 "register_operand" "") (if_then_else:SI - (match_operand 1 "comparison_operator" "") + (match_operand 1 "ordered_comparison_operator" "") (match_operand:SI 2 "reg_or_cint_move_operand" "") (match_operand:SI 3 "reg_or_cint_move_operand" "")))] "" @@ -1264,7 +1264,7 @@ (define_insn "" [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") (if_then_else:SI - (match_operator 2 "comparison_operator" + (match_operator 2 "ordered_comparison_operator" [(match_operand:SI 3 "register_operand" "r,r,r,r") (match_operand:SI 4 "arith11_operand" "rI,rI,rI,rI")]) (match_operand:SI 1 "reg_or_cint_move_operand" "0,J,N,K") @@ -1281,7 +1281,7 @@ (define_insn "" [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r") (if_then_else:SI - (match_operator 5 "comparison_operator" + (match_operator 5 "ordered_comparison_operator" [(match_operand:SI 3 "register_operand" "r,r,r,r,r,r,r,r") (match_operand:SI 4 "arith11_operand" "rI,rI,rI,rI,rI,rI,rI,rI")]) (match_operand:SI 1 "reg_or_cint_move_operand" "0,0,0,0,r,J,N,K") @@ -1302,7 +1302,7 @@ (define_expand "movdicc" [(set (match_operand:DI 0 "register_operand" "") (if_then_else:DI - (match_operand 1 "comparison_operator" "") + (match_operand 1 "ordered_comparison_operator" "") (match_operand:DI 2 "reg_or_cint_move_operand" "") (match_operand:DI 3 "reg_or_cint_move_operand" "")))] "TARGET_64BIT" @@ -1318,7 +1318,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") (if_then_else:DI - (match_operator 2 "comparison_operator" + (match_operator 2 "ordered_comparison_operator" [(match_operand:DI 3 "register_operand" "r,r,r,r,r") (match_operand:DI 4 "arith11_operand" "rI,rI,rI,rI,rI")]) (match_operand:DI 1 "reg_or_cint_move_operand" "0,r,J,N,K") @@ -1336,7 +1336,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r,r") (if_then_else:DI - (match_operator 5 "comparison_operator" + (match_operator 5 "ordered_comparison_operator" [(match_operand:DI 3 "register_operand" "r,r,r,r,r,r,r,r") (match_operand:DI 4 "arith11_operand" "rI,rI,rI,rI,rI,rI,rI,rI")]) (match_operand:DI 1 "reg_or_cint_move_operand" "0,0,0,0,r,J,N,K") @@ -1413,7 +1413,7 @@ (define_insn "" [(set (pc) (if_then_else - (match_operator 3 "comparison_operator" + (match_operator 3 "ordered_comparison_operator" [(match_operand:SI 1 "reg_or_0_operand" "rM") (match_operand:SI 2 "arith5_operand" "rL")]) (label_ref (match_operand 0 "" "")) @@ -1442,7 +1442,7 @@ (define_insn "" [(set (pc) (if_then_else - (match_operator 3 "comparison_operator" + (match_operator 3 "ordered_comparison_operator" [(match_operand:SI 1 "reg_or_0_operand" "rM") (match_operand:SI 2 "arith5_operand" "rL")]) (pc) @@ -1469,7 +1469,7 @@ (define_insn "" [(set (pc) (if_then_else - (match_operator 3 "comparison_operator" + (match_operator 3 "ordered_comparison_operator" [(match_operand:DI 1 "reg_or_0_operand" "rM") (match_operand:DI 2 "reg_or_0_operand" "rM")]) (label_ref (match_operand 0 "" "")) @@ -1498,7 +1498,7 @@ (define_insn "" [(set (pc) (if_then_else - (match_operator 3 "comparison_operator" + (match_operator 3 "ordered_comparison_operator" [(match_operand:DI 1 "reg_or_0_operand" "rM") (match_operand:DI 2 "reg_or_0_operand" "rM")]) (pc) @@ -5319,8 +5319,8 @@ (define_insn "umulsidi3" [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f")) - (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "f"))))] + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "f"))))] "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT" "xmpyu %1,%2,%0" [(set_attr "type" "fpmuldbl") @@ -5328,7 +5328,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f")) + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) (match_operand:DI 2 "uint32_operand" "f")))] "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT && !TARGET_64BIT" "xmpyu %1,%R2,%0" @@ -5337,7 +5337,7 @@ (define_insn "" [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f")) + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) (match_operand:DI 2 "uint32_operand" "f")))] "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT && TARGET_64BIT" "xmpyu %1,%2R,%0" @@ -6904,21 +6904,24 @@ rtx stack = operands[2]; rtx fp = operands[3]; - lab = copy_to_reg (lab); - emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); - /* Restore the frame pointer. The virtual_stack_vars_rtx is saved - instead of the hard_frame_pointer_rtx in the save area. As a - result, an extra instruction is needed to adjust for the offset - of the virtual stack variables and the hard frame pointer. */ - if (GET_CODE (fp) != REG) - fp = force_reg (Pmode, fp); - emit_move_insn (hard_frame_pointer_rtx, plus_constant (Pmode, fp, -8)); + lab = copy_to_reg (lab); + /* Restore the stack and frame pointers. The virtual_stack_vars_rtx + is saved instead of the hard_frame_pointer_rtx in the save area. + As a result, an extra instruction is needed to adjust for the offset + of the virtual stack variables and the hard frame pointer. */ + fp = copy_to_reg (fp); emit_stack_restore (SAVE_NONLOCAL, stack); + /* Ensure the frame pointer move is not optimized. */ + emit_insn (gen_blockage ()); + emit_clobber (hard_frame_pointer_rtx); + emit_clobber (frame_pointer_rtx); + emit_move_insn (hard_frame_pointer_rtx, plus_constant (Pmode, fp, -8)); + emit_use (hard_frame_pointer_rtx); emit_use (stack_pointer_rtx); @@ -8695,23 +8698,26 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); - /* Restore the frame pointer. The virtual_stack_vars_rtx is saved - instead of the hard_frame_pointer_rtx in the save area. We need - to adjust for the offset between these two values. */ - if (GET_CODE (fp) != REG) - fp = force_reg (Pmode, fp); - emit_move_insn (hard_frame_pointer_rtx, plus_constant (Pmode, fp, -8)); - - /* This bit is the same as expand_builtin_longjmp. */ - emit_stack_restore (SAVE_NONLOCAL, stack); - emit_use (hard_frame_pointer_rtx); - emit_use (stack_pointer_rtx); - /* Load the label we are jumping through into r1 so that we know where to look for it when we get back to setjmp's function for restoring the gp. */ emit_move_insn (pv, lab); + /* Restore the stack and frame pointers. The virtual_stack_vars_rtx + is saved instead of the hard_frame_pointer_rtx in the save area. + We need to adjust for the offset between these two values. */ + fp = copy_to_reg (fp); + emit_stack_restore (SAVE_NONLOCAL, stack); + + /* Ensure the frame pointer move is not optimized. */ + emit_insn (gen_blockage ()); + emit_clobber (hard_frame_pointer_rtx); + emit_clobber (frame_pointer_rtx); + emit_move_insn (hard_frame_pointer_rtx, plus_constant (Pmode, fp, -8)); + + emit_use (hard_frame_pointer_rtx); + emit_use (stack_pointer_rtx); + /* Prevent the insns above from being scheduled into the delay slot of the interspace jump because the space register could change. */ emit_insn (gen_blockage ()); @@ -9056,7 +9062,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (define_insn "decrement_and_branch_until_zero" [(set (pc) (if_then_else - (match_operator 2 "comparison_operator" + (match_operator 2 "ordered_comparison_operator" [(plus:SI (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*Q") (match_operand:SI 1 "int5_operand" "L,L,L")) @@ -10085,23 +10091,55 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (set_attr "length" "4,16")]) ;; PA 2.0 hardware supports out-of-order execution of loads and stores, so -;; we need a memory barrier to enforce program order for memory references. -;; Since we want PA 1.x code to be PA 2.0 compatible, we also need the -;; barrier when generating PA 1.x code. +;; we need memory barriers to enforce program order for memory references +;; when the TLB and PSW O bits are not set. We assume all PA 2.0 systems +;; are weakly ordered since neither HP-UX or Linux set the PSW O bit. Since +;; we want PA 1.x code to be PA 2.0 compatible, we also need barriers when +;; generating PA 1.x code even though all PA 1.x systems are strongly ordered. + +;; When barriers are needed, we use a strongly ordered ldcw instruction as +;; the barrier. Most PA 2.0 targets are cache coherent. In that case, we +;; can use the coherent cache control hint and avoid aligning the ldcw +;; address. In spite of its description, it is not clear that the sync +;; instruction works as a barrier. (define_expand "memory_barrier" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] + [(parallel + [(set (match_dup 0) (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (clobber (match_dup 1))])] "" { - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + /* We don't need a barrier if the target uses ordered memory references. */ + if (TARGET_ORDERED) + FAIL; + operands[1] = gen_reg_rtx (Pmode); + operands[0] = gen_rtx_MEM (BLKmode, operands[1]); MEM_VOLATILE_P (operands[0]) = 1; }) -(define_insn "*memory_barrier" +(define_insn "*memory_barrier_coherent" [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] - "" - "sync" + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (clobber (match_operand 1 "pmode_register_operand" "=r"))] + "TARGET_PA_20 && TARGET_COHERENT_LDCW" + "ldcw,co 0(%%sp),%1" [(set_attr "type" "binary") (set_attr "length" "4")]) + +(define_insn "*memory_barrier_64" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (clobber (match_operand 1 "pmode_register_operand" "=&r"))] + "TARGET_64BIT" + "ldo 15(%%sp),%1\n\tdepd %%r0,63,3,%1\n\tldcw 0(%1),%1" + [(set_attr "type" "binary") + (set_attr "length" "12")]) + +(define_insn "*memory_barrier_32" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (clobber (match_operand 1 "pmode_register_operand" "=&r"))] + "" + "ldo 15(%%sp),%1\n\t{dep|depw} %%r0,31,3,%1\n\tldcw 0(%1),%1" + [(set_attr "type" "binary") + (set_attr "length" "12")]) diff -rNubwp gcc-8.3.0/gcc/config/pa/pa.opt gcc-8.4.0/gcc/config/pa/pa.opt --- gcc-8.3.0/gcc/config/pa/pa.opt 2018-01-03 02:03:58.000000000 -0800 +++ gcc-8.4.0/gcc/config/pa/pa.opt 2020-03-04 00:30:00.000000000 -0800 @@ -45,6 +45,10 @@ mcaller-copies Target Report Mask(CALLER_COPIES) Caller copies function arguments passed by hidden reference. +mcoherent-ldcw +Target Report Var(TARGET_COHERENT_LDCW) Init(1) +Use ldcw/ldcd coherent cache-control hint. + mdisable-fpregs Target Report Mask(DISABLE_FPREGS) Disable FP regs. @@ -90,6 +94,10 @@ mno-space-regs Target RejectNegative Report Mask(NO_SPACE_REGS) Disable space regs. +mordered +Target Report Var(TARGET_ORDERED) Init(0) +Assume memory references are ordered and barriers are not needed. + mpa-risc-1-0 Target RejectNegative Generate PA1.0 code. diff -rNubwp gcc-8.3.0/gcc/config/pa/predicates.md gcc-8.4.0/gcc/config/pa/predicates.md --- gcc-8.3.0/gcc/config/pa/predicates.md 2018-02-01 09:12:28.000000000 -0800 +++ gcc-8.4.0/gcc/config/pa/predicates.md 2020-03-04 00:30:00.000000000 -0800 @@ -662,8 +662,8 @@ (and (match_code "symbol_ref") (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC"))) -;; True iff this is a comparison operator. This allows the use of -;; MATCH_OPERATOR to recognize all the branch insns. +;; True iff OP is an operator suitable for use in a double-word cmpib +;; instruction. (define_predicate "cmpib_comparison_operator" (match_code "eq,ne,lt,le,leu,gt,gtu,ge")) diff -rNubwp gcc-8.3.0/gcc/config/pa/som.h gcc-8.4.0/gcc/config/pa/som.h --- gcc-8.3.0/gcc/config/pa/som.h 2018-01-16 16:09:44.000000000 -0800 +++ gcc-8.4.0/gcc/config/pa/som.h 2020-03-04 00:30:00.000000000 -0800 @@ -98,8 +98,8 @@ do { \ #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ - do { tree fntype = TREE_TYPE (TREE_TYPE (DECL)); \ - tree tree_type = TREE_TYPE (DECL); \ + do { tree tree_type = TREE_TYPE (DECL); \ + tree fntype = TREE_TYPE (tree_type); \ tree parm; \ int i; \ if (TREE_PUBLIC (DECL) || TARGET_GAS) \ @@ -121,9 +121,11 @@ do { \ { \ tree type = DECL_ARG_TYPE (parm); \ machine_mode mode = TYPE_MODE (type); \ - if (mode == SFmode && ! TARGET_SOFT_FLOAT) \ + if (!AGGREGATE_TYPE_P (type) \ + && mode == SFmode && ! TARGET_SOFT_FLOAT) \ fprintf (FILE, ",ARGW%d=FR", i++); \ - else if (mode == DFmode && ! TARGET_SOFT_FLOAT) \ + else if (!AGGREGATE_TYPE_P (type) \ + && mode == DFmode && ! TARGET_SOFT_FLOAT) \ { \ if (i <= 2) \ { \ @@ -158,9 +160,13 @@ do { \ for (; i < 4; i++) \ fprintf (FILE, ",ARGW%d=GR", i); \ } \ - if (TYPE_MODE (fntype) == DFmode && ! TARGET_SOFT_FLOAT) \ + if (!AGGREGATE_TYPE_P (fntype) \ + && TYPE_MODE (fntype) == DFmode \ + && ! TARGET_SOFT_FLOAT) \ fputs (DFMODE_RETURN_STRING, FILE); \ - else if (TYPE_MODE (fntype) == SFmode && ! TARGET_SOFT_FLOAT) \ + else if (!AGGREGATE_TYPE_P (fntype) \ + && TYPE_MODE (fntype) == SFmode \ + && ! TARGET_SOFT_FLOAT) \ fputs (SFMODE_RETURN_STRING, FILE); \ else if (fntype != void_type_node) \ fputs (",RTNVAL=GR", FILE); \