Merge pull request #68 from jgehrcke/jp/edgecase-64

analyze.py: handle case: no path/referrer data
jgehrcke · Dec 16, 2022 · 306db38 · 306db38
2 parents f174916 + e623edb
commit 306db38
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 7 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,4 +1,3 @@
 [flake8]
 max-line-length = 130
-ignore = E203,W503 # conflict with black, also see https://github.com/psf/black/issues/315
-
+ignore = E203,W503
diff --git a/.github/workflows/prs.yml b/.github/workflows/prs.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.10.0]
+        python-version: [3.10.8]
 
     steps:
       - uses: actions/checkout@v2

diff --git a/analyze.py b/analyze.py
@@ -24,7 +24,7 @@
 import sys
 import tempfile
 
-from typing import Iterable, Set, Any, Optional, Tuple
+from typing import Iterable, Set, Any, Optional, Tuple, Iterator, cast
 from datetime import datetime
 from io import StringIO
 
@@ -488,6 +488,8 @@ def _glob_csvpaths(basename_suffix):
 def analyse_top_x_snapshots(entity_type, date_axis_lim):
     assert entity_type in ["referrer", "path"]
 
+    heading = "Top referrers" if entity_type == "referrer" else "Top paths"
+
     log.info("read 'top %s' snapshots (CSV docs)", entity_type)
     basename_suffix = f"_top_{entity_type}s_snapshot.csv"
     csvpaths = _glob_csvpaths(basename_suffix)
@@ -516,6 +518,20 @@ def _get_uens(snapshot_dfs):
     # dataframes where each dataframe corresponds to a single referrer/path,
     # and contains imformation about multiple timestamps
 
+    if not len(snapshot_dfs):
+        MD_REPORT.write(
+            textwrap.dedent(
+                f"""
+
+        #### {heading}
+
+        No {entity_type} data available.
+
+        """
+            )
+        )
+        return
+
     # First, create a dataframe containing all information.
     dfa = pd.concat(snapshot_dfs)
 
@@ -737,8 +753,6 @@ def _get_uens(snapshot_dfs):
     # container may be a <div> element that has style width: 100%; height:
     # 300px.""
 
-    heading = "Top referrers" if entity_type == "referrer" else "Top paths"
-
     # Textual form: larger N, and no cutoff (arbitrary length and legend of
     # plot don't go well with each other).
     top_n = 15
@@ -831,7 +845,8 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame:
             log.error("columns in %s: %s", p, df.columns)
             sys.exit(1)
 
-        column_names_seen.update(df.columns)
+        # `.columns` is known to be only strings
+        column_names_seen.update(cast(Iterator[str], df.columns))
 
         df = df.sort_index()
 

diff --git a/tests/analyze.bats b/tests/analyze.bats
@@ -46,6 +46,26 @@ setup() {
   [ "$status" -eq 0 ]
 }
 
+@test "analyze.py: snapshots: none, vcagg: yes, stars: none, forks: none" {
+  run python analyze.py owner/repo tests/data/A/snapshots/does-not-exist \
+    --resources-directory=resources \
+    --output-directory $BATS_TEST_TMPDIR/outdir \
+    --outfile-prefix "" \
+    --stargazer-ts-resampled-outpath $BATS_TEST_TMPDIR/stargazers-rs.csv \
+    --fork-ts-resampled-outpath $BATS_TEST_TMPDIR/forks-rs.csv \
+    --views-clones-aggregate-inpath tests/data/A/views_clones_aggregate.csv
+  [ "$status" -eq 0 ]
+
+  assert_exist $BATS_TEST_TMPDIR/outdir/report.html
+
+  run grep "No referrer data available" $BATS_TEST_TMPDIR/outdir/report.html
+  [ "$status" -eq 0 ]
+
+  run grep "No path data available" $BATS_TEST_TMPDIR/outdir/report.html
+  [ "$status" -eq 0 ]
+}
+
+
 @test "analyze.py: snapshots: some, vcagg: yes, stars: some, forks: some" {
   assert_not_exist $BATS_TEST_TMPDIR/forks-rs.csv
   run python analyze.py owner/repo tests/data/A/snapshots \