diff --git a/bilby/core/result.py b/bilby/core/result.py
index dc56ffcc5d3a57ca92b6ce9d64d2f55d76f5f3d9..ede6bf64327d1fa97745979ab3fb7b1def57c96f 100644
--- a/bilby/core/result.py
+++ b/bilby/core/result.py
@@ -1443,7 +1443,7 @@ def plot_multiple(results, filename=None, labels=None, colours=None,
 
 
 def make_pp_plot(results, filename=None, save=True, confidence_interval=0.9,
-                 lines=None, legend_fontsize=9, keys=None, title=True,
+                 lines=None, legend_fontsize='x-small', keys=None, title=True,
                  **kwargs):
     """
     Make a P-P plot for a set of runs with injected signals.
@@ -1475,6 +1475,9 @@ def make_pp_plot(results, filename=None, save=True, confidence_interval=0.9,
         `pvalues`, and `names`.
     """
 
+    if keys is None:
+        keys = results[0].search_parameter_keys
+
     credible_levels = pd.DataFrame()
     for result in results:
         credible_levels = credible_levels.append(
@@ -1507,11 +1510,17 @@ def make_pp_plot(results, filename=None, save=True, confidence_interval=0.9,
     for ii, key in enumerate(credible_levels):
         pp = np.array([sum(credible_levels[key].values < xx) /
                        len(credible_levels) for xx in x_values])
-        plt.plot(x_values, pp, lines[ii], label=key, **kwargs)
         pvalue = scipy.stats.kstest(credible_levels[key], 'uniform').pvalue
         pvalues.append(pvalue)
         logger.info("{}: {}".format(key, pvalue))
 
+        try:
+            name = results[0].priors[key].latex_label
+        except AttributeError:
+            name = key
+        label = "{} ({:2.3f})".format(name, pvalue)
+        plt.plot(x_values, pp, lines[ii], label=label, **kwargs)
+
     Pvals = namedtuple('pvals', ['combined_pvalue', 'pvalues', 'names'])
     pvals = Pvals(combined_pvalue=scipy.stats.combine_pvalues(pvalues)[1],
                   pvalues=pvalues,
@@ -1520,8 +1529,11 @@ def make_pp_plot(results, filename=None, save=True, confidence_interval=0.9,
         "Combined p-value: {}".format(pvals.combined_pvalue))
 
     if title:
-        ax.set_title("p-value = {:2.4f}".format(pvals.combined_pvalue))
-    ax.legend(linewidth=1, labelspacing=0.25)
+        ax.set_title("N={}, p-value={:2.4f}".format(
+            len(results), pvals.combined_pvalue))
+    ax.set_xlabel("C.I.")
+    ax.set_ylabel("Fraction of events in C.I.")
+    ax.legend(linewidth=1, labelspacing=0.25, fontsize=legend_fontsize)
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
     fig.tight_layout()