joblib · tomMoral · Apr 12, 2023 · Mar 27, 2023 · Mar 27, 2023 · Mar 27, 2023
diff --git a/examples/parallel/distributed_backend_simple.py b/examples/parallel/distributed_backend_simple.py
@@ -1,32 +1,36 @@
 """
-Using dask distributed for single-machine parallel computing
-=============================================================
+Using Dask for single-machine parallel computing
+================================================
 
-This example shows the simplest usage of the dask `distributed
-<https://distributed.readthedocs.io>`__ backend, on the local computer.
+This example shows the simplest usage of the
+`Dask <https://docs.dask.org/en/stable/>`_
+backend on your local machine.
 
 This is useful for prototyping a solution, to later be run on a truly
-distributed cluster, as the only change to be made is the address of the
-scheduler.
+`distributed Dask cluster
+<https://docs.dask.org/en/stable/deploying.html#distributed-computing>`_,
+as the only change needed is the cluster class.
 
 Another realistic usage scenario: combining dask code with joblib code,
 for instance using dask for preprocessing data, and scikit-learn for
 machine learning. In such a setting, it may be interesting to use
 distributed as a backend scheduler for both dask and joblib, to
-orchestrate well the computation.
+orchestrate the computation.
 
 """
 
 ###############################################################################
 # Setup the distributed client
 ###############################################################################
-from dask.distributed import Client
+from dask.distributed import Client, LocalCluster
 
-# If you have a remote cluster running Dask
-# client = Client('tcp://scheduler-address:8786')
+# replace with whichever cluster class you're using
+cluster = LocalCluster()
+# connect client to your cluster
+client = Client(cluster)
 
-# If you want Dask to set itself up on your personal computer
-client = Client(processes=False)
+# Monitor your computation with the Dask dashboard
+print(client.dashboard_link)
 
 ###############################################################################
 # Run parallel computation using dask.distributed
@@ -37,18 +41,16 @@
 
 
 def long_running_function(i):
-    time.sleep(.1)
+    time.sleep(0.1)
     return i
 
 
 ###############################################################################
 # The verbose messages below show that the backend is indeed the
 # dask.distributed one
-with joblib.parallel_backend('dask'):
+with joblib.parallel_backend("dask"):
     joblib.Parallel(verbose=100)(
-        joblib.delayed(long_running_function)(i)
-        for i in range(10))
+        joblib.delayed(long_running_function)(i) for i in range(10)
+    )
 
 ###############################################################################
-# Progress in computation can be followed on the distributed web
-# interface, see https://dask.pydata.org/en/latest/diagnostics-distributed.html
diff --git a/joblib/parallel.py b/joblib/parallel.py
@@ -152,10 +152,20 @@ class parallel_backend(object):
     multiprocessing and loky may not be available, in which case joblib
     defaults to threading.
 
-    In addition, if the `dask` and `distributed` Python packages are installed,
-    it is possible to use the 'dask' backend for better scheduling of nested
-    parallel calls without over-subscription and potentially distribute
-    parallel calls over a networked cluster of several hosts.
+    You can also use the `Dask <https://docs.dask.org/en/stable/>`_ joblib
+    backend to distribute work across machines. This works well with
+    scikit-learn estimators with the ``n_jobs`` parameter, for example::
+
+    >>> import joblib  # doctest: +SKIP
+    >>> from sklearn.model_selection import GridSearchCV  # doctest: +SKIP
+    >>> from dask.distributed import Client # doctest: +SKIP
+
+    >>> # create a local Dask cluster
+    >>> client = Client()  # doctest: +SKIP
+    >>> grid_search = GridSearchCV(estimator, param_grid, n_jobs=-1)
+    ... # doctest: +SKIP
+    >>> with joblib.parallel_backend("dask", scatter=[X, y]):  # doctest: +SKIP
+    ...     grid_search.fit(X, y)
 
     It is also possible to use the distributed 'ray' backend for distributing
     the workload to a cluster of nodes. To use the 'ray' joblib backend add