Upset Plot#

IMDB Top 1000 Movies

plot upset
from matplotlib import pyplot as plt

import marsilea as ma
from marsilea.upset import UpsetData


imdb = ma.load_data("imdb")

items_attrs = imdb[
    [
        "Title",
        "Year",
        "Runtime (Minutes)",
        "Rating",
        "Votes",
        "Revenue (Millions)",
        "Metascore",
    ]
].set_index("Title")

upset_data = UpsetData.from_memberships(
    imdb.Genre.str.split(","), items_names=imdb["Title"], items_attrs=items_attrs
)

us = ma.upset.Upset(upset_data, orient="v", min_cardinality=15)
us.highlight_subsets(min_cardinality=48, facecolor="#D0104C", label="Larger than 48")
us.highlight_subsets(
    min_cardinality=32, edgecolor="green", edgewidth=1.5, label="Larger than 32"
)
us.add_items_attr(
    "left",
    "Revenue (Millions)",
    "strip",
    pad=0.2,
    size=0.5,
    plot_kws=dict(color="#24936E", size=1.2, label="Revenue\n(Millions)"),
)
us.add_items_attr(
    "right",
    "Rating",
    "box",
    pad=0.2,
    plot_kws=dict(color="orange", linewidth=1, fliersize=1),
)

us.add_legends(box_padding=0)
us.set_margin(0.3)
us.render()

Total running time of the script: (0 minutes 2.649 seconds)

Gallery generated by Sphinx-Gallery