Python altair.X Examples
The following are 23
code examples of altair.X().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
altair
, or try the search function
.
Example #1
Source File: BubbleDiachronicVisualization.py From scattertext with Apache License 2.0 | 8 votes |
def visualize(display_df): viridis = ['#440154', '#472c7a', '#3b518b', '#2c718e', '#21908d', '#27ad81', '#5cc863', '#aadc32', '#fde725'] import altair as alt color_scale = alt.Scale( domain=(display_df.dropna().trending.min(), 0, display_df.dropna().trending.max()), range=[viridis[0], viridis[len(viridis) // 2], viridis[-1]] ) return alt.Chart(display_df).mark_circle().encode( alt.X('variable'), alt.Y('term'), size='frequency', color=alt.Color('trending:Q', scale=color_scale), )
Example #2
Source File: core.py From starborn with BSD 3-Clause "New" or "Revised" License | 6 votes |
def violinplot(x=None, y=None, data=None, orient=None): # TODO: automatically infer orientation if orient is None or orient == 'v': kwargs = dict( x=alt.X('count(*):Q', axis=alt.Axis(grid=False, labels=False), stack='center', title=''), y=alt.Y('{y}:Q'.format(y=y), bin=alt.Bin(maxbins=100)), column='{x}:N'.format(x=x), color='{x}:N'.format(x=x) ) else: kwargs = dict( y=alt.Y('count(*):Q', axis=alt.Axis(grid=False, labels=False), stack='center', title=''), x=alt.X('{x}:Q'.format(x=x), bin=alt.Bin(maxbins=100)), row='{y}:N'.format(y=y), color='{y}:N'.format(y=y) ) chart = alt.Chart(data).mark_area().encode(**kwargs) return chart
Example #3
Source File: plot.py From retentioneering-tools with Mozilla Public License 2.0 | 6 votes |
def altair_step_matrix(diff, plot_name=None, title='', vmin=None, vmax=None, font_size=12, **kwargs): heatmap_data = diff.reset_index().melt('index') heatmap_data.columns = ['y', 'x', 'z'] table = alt.Chart(heatmap_data).encode( x=alt.X('x:O', sort=None), y=alt.Y('y:O', sort=None) ) heatmap = table.mark_rect().encode( color=alt.Color( 'z:Q', scale=alt.Scale(scheme='blues'), ) ) text = table.mark_text( align='center', fontSize=font_size ).encode( text='z', color=alt.condition( abs(alt.datum.z) < 0.8, alt.value('black'), alt.value('white')) ) heatmap_object = (heatmap + text).properties( width=3 * font_size * len(diff.columns), height=2 * font_size * diff.shape[0] ) return heatmap_object, plot_name, None, diff.retention.retention_config
Example #4
Source File: core.py From starborn with BSD 3-Clause "New" or "Revised" License | 6 votes |
def pairplot(data, hue=None, vars=None): if vars is None: vars = list(data.columns) chart = alt.Chart(data).mark_circle().encode( alt.X(alt.repeat("column"), type='quantitative'), alt.Y(alt.repeat("row"), type='quantitative'), color='{hue}:N'.format(hue=hue) ).properties( width=250, height=250 ).repeat( row=vars, column=vars ) return chart
Example #5
Source File: core.py From starborn with BSD 3-Clause "New" or "Revised" License | 6 votes |
def scatterplot(x, y, data, hue=None, xlim=None, ylim=None): # TODO: refactor so it uses category_chart_kwargs? if xlim is None: xlim = get_limit_tuple(data[x]) if ylim is None: ylim = get_limit_tuple(data[y]) xscale = alt.Scale(domain=xlim) yscale = alt.Scale(domain=ylim) other_args = {'color': '{hue}:N'.format(hue=hue)} if hue else {} points = alt.Chart(data).mark_circle().encode( alt.X(x, scale=xscale), alt.Y(y, scale=yscale), **other_args ) return points
Example #6
Source File: _core.py From altair_pandas with BSD 3-Clause "New" or "Revised" License | 6 votes |
def hist(self, bins=None, orientation="vertical", **kwargs): data = self._preprocess_data(with_index=False) column = data.columns[0] if isinstance(bins, int): bins = alt.Bin(maxbins=bins) elif bins is None: bins = True if orientation == "vertical": Indep, Dep = alt.X, alt.Y elif orientation == "horizontal": Indep, Dep = alt.Y, alt.X else: raise ValueError("orientation must be 'horizontal' or 'vertical'.") mark = self._get_mark_def({"type": "bar", "orient": orientation}, kwargs) return alt.Chart(data, mark=mark).encode( Indep(column, title=None, bin=bins), Dep("count()", title="Frequency") )
Example #7
Source File: explore.py From gobbli with Apache License 2.0 | 5 votes |
def st_heatmap( heatmap_df: pd.DataFrame, x_col_name: str, y_col_name: str, color_col_name: str ): heatmap = ( alt.Chart(heatmap_df, height=700, width=700) .mark_rect() .encode(alt.X(x_col_name), alt.Y(y_col_name), alt.Color(color_col_name)) ) st.altair_chart(heatmap)
Example #8
Source File: core.py From starborn with BSD 3-Clause "New" or "Revised" License | 5 votes |
def heatmap(data, vmin=None, vmax=None, annot=None, fmt='.2g'): # We always want to have a DataFrame with semantic information if not isinstance(data, pd.DataFrame): matrix = np.asarray(data) data = pd.DataFrame(matrix) melted = data.stack().reset_index(name='Value') x = data.columns.name y = data.index.name heatmap = alt.Chart(melted).mark_rect().encode( alt.X('{x}:O'.format(x=x), scale=alt.Scale(paddingInner=0)), alt.Y('{y}:O'.format(y=y), scale=alt.Scale(paddingInner=0)), color='Value:Q' ) if not annot: return heatmap # Overlay text text = alt.Chart(melted).mark_text(baseline='middle').encode( x='{x}:O'.format(x=x), y='{y}:O'.format(y=y), text=alt.Text('Value', format=fmt), color=alt.condition(alt.expr.datum['Value'] > 70, alt.value('black'), alt.value('white')) ) return heatmap + text
Example #9
Source File: core.py From starborn with BSD 3-Clause "New" or "Revised" License | 5 votes |
def jointplot(x, y, data, kind='scatter', hue=None, xlim=None, ylim=None): if xlim is None: xlim = get_limit_tuple(data[x]) if ylim is None: ylim = get_limit_tuple(data[y]) xscale = alt.Scale(domain=xlim) yscale = alt.Scale(domain=ylim) points = scatterplot(x, y, data, hue=hue, xlim=xlim, ylim=ylim) area_args = {'opacity': .3, 'interpolate': 'step'} blank_axis = alt.Axis(title='') top_hist = alt.Chart(data).mark_area(**area_args).encode( alt.X('{x}:Q'.format(x=x), # when using bins, the axis scale is set through # the bin extent, so we do not specify the scale here # (which would be ignored anyway) bin=alt.Bin(maxbins=20, extent=xscale.domain), stack=None, axis=blank_axis, ), alt.Y('count()', stack=None, axis=blank_axis), alt.Color('{hue}:N'.format(hue=hue)), ).properties(height=60) right_hist = alt.Chart(data).mark_area(**area_args).encode( alt.Y('{y}:Q'.format(y=y), bin=alt.Bin(maxbins=20, extent=yscale.domain), stack=None, axis=blank_axis, ), alt.X('count()', stack=None, axis=blank_axis), alt.Color('{hue}:N'.format(hue=hue)), ).properties(width=60) return top_hist & (points | right_hist)
Example #10
Source File: _core.py From altair_pandas with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _xy(self, mark, **kwargs): data = self._preprocess_data(with_index=True) return ( alt.Chart(data, mark=self._get_mark_def(mark, kwargs)) .encode( x=alt.X(data.columns[0], title=None), y=alt.Y(data.columns[1], title=None), tooltip=list(data.columns), ) .interactive() )
Example #11
Source File: _core.py From altair_pandas with BSD 3-Clause "New" or "Revised" License | 5 votes |
def box(self, vert=True, **kwargs): data = self._preprocess_data(with_index=False) chart = ( alt.Chart(data) .transform_fold(list(data.columns), as_=["column", "value"]) .mark_boxplot() .encode(x=alt.X("column:N", title=None), y="value:Q") ) if not vert: chart.encoding.x, chart.encoding.y = chart.encoding.y, chart.encoding.x return chart
Example #12
Source File: explore.py From gobbli with Apache License 2.0 | 5 votes |
def show_document_length_distribution(tokens: List[List[str]]): st.header("Document Length Distribution") document_lengths = get_document_lengths(tokens) doc_lengths = pd.DataFrame({"Token Count": document_lengths}) doc_length_chart = ( alt.Chart(doc_lengths, height=500, width=700) .mark_bar() .encode( alt.X("Token Count", bin=alt.Bin(maxbins=30)), alt.Y("count()", type="quantitative"), ) ) st.altair_chart(doc_length_chart)
Example #13
Source File: _core.py From altair_pandas with BSD 3-Clause "New" or "Revised" License | 5 votes |
def hist(self, bins=None, stacked=None, orientation="vertical", **kwargs): data = self._preprocess_data(with_index=False) if isinstance(bins, int): bins = alt.Bin(maxbins=bins) elif bins is None: bins = True if orientation == "vertical": Indep, Dep = alt.X, alt.Y elif orientation == "horizontal": Indep, Dep = alt.Y, alt.X else: raise ValueError("orientation must be 'horizontal' or 'vertical'.") mark = self._get_mark_def({"type": "bar", "orient": orientation}, kwargs) chart = ( alt.Chart(data, mark=mark) .transform_fold(list(data.columns), as_=["column", "value"]) .encode( Indep("value:Q", title=None, bin=bins), Dep("count()", title="Frequency", stack=stacked), color="column:N", ) ) if kwargs.get("subplots"): nrows, ncols = _get_layout(data.shape[1], kwargs.get("layout", (-1, 1))) chart = chart.encode(facet=alt.Facet("column:N", title=None)).properties( columns=ncols ) return chart
Example #14
Source File: _core.py From altair_pandas with BSD 3-Clause "New" or "Revised" License | 5 votes |
def hist_frame(self, column=None, layout=(-1, 2), **kwargs): if column is not None: if isinstance(column, str): column = [column] data = self._preprocess_data(with_index=False, usecols=column) data = data._get_numeric_data() nrows, ncols = _get_layout(data.shape[1], layout) return ( alt.Chart(data, mark=self._get_mark_def("bar", kwargs)) .encode( x=alt.X(alt.repeat("repeat"), type="quantitative", bin=True), y=alt.Y("count()", title="Frequency"), ) .repeat(repeat=list(data.columns), columns=ncols) )
Example #15
Source File: _core.py From pdvega with MIT License | 5 votes |
def _x(x, df, ordinal_threshold=6, **kwargs): return alt.X( field=x, type=infer_vegalite_type(df[x], ordinal_threshold=ordinal_threshold), **kwargs )
Example #16
Source File: app.py From demo-self-driving with Apache License 2.0 | 5 votes |
def frame_selector_ui(summary): st.sidebar.markdown("# Frame") # The user can pick which type of object to search for. object_type = st.sidebar.selectbox("Search for which objects?", summary.columns, 2) # The user can select a range for how many of the selected objecgt should be present. min_elts, max_elts = st.sidebar.slider("How many %ss (select a range)?" % object_type, 0, 25, [10, 20]) selected_frames = get_selected_frames(summary, object_type, min_elts, max_elts) if len(selected_frames) < 1: return None, None # Choose a frame out of the selected frames. selected_frame_index = st.sidebar.slider("Choose a frame (index)", 0, len(selected_frames) - 1, 0) # Draw an altair chart in the sidebar with information on the frame. objects_per_frame = summary.loc[selected_frames, object_type].reset_index(drop=True).reset_index() chart = alt.Chart(objects_per_frame, height=120).mark_area().encode( alt.X("index:Q", scale=alt.Scale(nice=False)), alt.Y("%s:Q" % object_type)) selected_frame_df = pd.DataFrame({"selected_frame": [selected_frame_index]}) vline = alt.Chart(selected_frame_df).mark_rule(color="red").encode( alt.X("selected_frame:Q", axis=None) ) st.sidebar.altair_chart(alt.layer(chart, vline)) selected_frame = selected_frames[selected_frame_index] return selected_frame_index, selected_frame # Select frames based on the selection in the sidebar
Example #17
Source File: _misc.py From altair_pandas with BSD 3-Clause "New" or "Revised" License | 4 votes |
def scatter_matrix( df, color: Union[str, None] = None, alpha: float = 1.0, tooltip: Union[List[str], tooltipList, None] = None, **kwargs ) -> alt.Chart: """ plots a scatter matrix At the moment does not support neither histogram nor kde; Uses f-f scatterplots instead. Interactive and with a cusotmizable tooltip Parameters ---------- df : DataFame DataFame to be used for scatterplot. Only numeric columns will be included. color : string [optional] Can be a column name or specific color value (hex, webcolors). alpha : float Opacity of the markers, within [0,1] tooltip: list [optional] List of specific column names or alt.Tooltip objects. If none (default), will show all columns. """ dfc = _preprocess_data(df) tooltip = _process_tooltip(tooltip) or dfc.columns.tolist() cols = dfc._get_numeric_data().columns.tolist() chart = ( alt.Chart(dfc) .mark_circle() .encode( x=alt.X(alt.repeat("column"), type="quantitative"), y=alt.X(alt.repeat("row"), type="quantitative"), opacity=alt.value(alpha), tooltip=tooltip, ) .properties(width=150, height=150) ) if color: color = str(color) if color in dfc: color = alt.Color(color) if "colormap" in kwargs: color.scale = alt.Scale(scheme=kwargs.get("colormap")) else: color = alt.value(color) chart = chart.encode(color=color) return chart.repeat(row=cols, column=cols).interactive()
Example #18
Source File: rewrite.py From errudite with GNU General Public License v2.0 | 4 votes |
def visualize_delta_confidence_per_model(self, instance_hash: Dict[InstanceKey, Instance]={}, instance_hash_rewritten: Dict[InstanceKey, Instance]={}, filtered_instances: List[InstanceKey]=None, model: str=None): """ Visualize the rewrite distribution, in terms of model confidence. It's a histogram that shows the distribution of the delta confidence. This historgram is different for each different model. Parameters ---------- instance_hash : Dict[InstanceKey, Instance] A dict that saves all the *original* instances, by default {}. It denotes by the corresponding instance keys. If ``{}``, resolve to ``Instance.instance_hash``. instance_hash_rewritten : Dict[InstanceKey, Instance] A dict that saves all the *rewritten* instances, by default {}. It denotes by the corresponding instance keys. If ``{}``, resolve to ``Instance.instance_hash_rewritten``. filtered_instances : List[InstanceKey], optional A selected list of instances. If given, only display the distribution of the selected instances, by default None model : str, optional The selected model, by default ``None``. If ``None``, resolve to ``Instance.model``. Returns ------- alt.Chart An altair chart object. """ model = Instance.resolve_default_model(model) instance_hash = instance_hash or Instance.instance_hash instance_hash_rewritten = instance_hash_rewritten or Instance.instance_hash_rewritten output = [] if filtered_instances: qids = list(np.unique([i.qid for i in filtered_instances])) else: qids = None data = Rewrite.get_delta_performance(self, qids, instance_hash, instance_hash_rewritten, model)['delta_confidences'] output = [ {"delta_confidence": d} for d in data ] df = pd.DataFrame(output) chart = alt.Chart(df).mark_bar().encode( y=alt.Y('count()'), x=alt.X('delta_confidence:Q', bin=True) ).properties(width=150, height=100, title=f'{self.rid} on {model}')#.configure_facet(spacing=5)# return chart
Example #19
Source File: rewrite.py From errudite with GNU General Public License v2.0 | 4 votes |
def visualize_models(self, instance_hash: Dict[InstanceKey, Instance]={}, instance_hash_rewritten: Dict[InstanceKey, Instance]={}, filtered_instances: List[InstanceKey]=None, models: str=[]): """ Visualize the rewrite distribution. It's a one-bar histogram that displays the count of instances rewritten, and the proportion of "flip_to_correct", "flip_to_incorrect", "unflip" Because of the flipping proportion, this historgram is different for each different model. Parameters ---------- instance_hash : Dict[InstanceKey, Instance] A dict that saves all the *original* instances, by default {}. It denotes by the corresponding instance keys. If ``{}``, resolve to ``Instance.instance_hash``. instance_hash_rewritten : Dict[InstanceKey, Instance] A dict that saves all the *rewritten* instances, by default {}. It denotes by the corresponding instance keys. If ``{}``, resolve to ``Instance.instance_hash_rewritten``. filtered_instances : List[InstanceKey], optional A selected list of instances. If given, only display the distribution of the selected instances, by default None models : List[str], optional A list of instances, with the bars for each group concated vertically. By default []. If [], resolve to ``[ Instance.model ]``. Returns ------- alt.Chart An altair chart object. """ model = models or [ Instance.model ] instance_hash = instance_hash or Instance.instance_hash instance_hash_rewritten = instance_hash_rewritten or Instance.instance_hash_rewritten if not models: models = [ Instance.resolve_default_model(None) ] output = [] for model in models: #Instance.set_default_model(model=model) data = self.serialize(instance_hash, instance_hash_rewritten, filtered_instances, model) for flip, count in data["counts"].items(): output.append({ "flip": flip, "count": count, "model": model }) df = pd.DataFrame(output) chart = alt.Chart(df).mark_bar().encode( y=alt.Y('model:N'), x=alt.X('count:Q', stack="zero"), color=alt.Color('flip:N', scale=alt.Scale( range=["#1f77b4", "#ff7f0e", "#c7c7c7"], domain=["flip_to_correct", "flip_to_incorrect", "unflip"])), tooltip=['model:N', 'count:Q', 'correctness:N'] ).properties(width=100)#.configure_facet(spacing=5)# return chart
Example #20
Source File: group.py From errudite with GNU General Public License v2.0 | 4 votes |
def visualize_models(self, instance_hash: Dict[InstanceKey, Instance]={}, instance_hash_rewritten: Dict[InstanceKey, Instance]={}, filtered_instances: List[InstanceKey]=None, models: List[str]=[]): """ Visualize the group distribution. It's a one-bar histogram that displays the count of instances in the group, and the proportion of incorrect predictions. Because of the incorrect prediction proportion, this historgram is different for each different model. Parameters ---------- instance_hash : Dict[InstanceKey, Instance] A dict that saves all the *original* instances, by default {}. It denotes by the corresponding instance keys. If ``{}``, resolve to ``Instance.instance_hash``. instance_hash_rewritten : Dict[InstanceKey, Instance] A dict that saves all the *rewritten* instances, by default {}. It denotes by the corresponding instance keys. If ``{}``, resolve to ``Instance.instance_hash_rewritten``. filtered_instances : List[InstanceKey], optional A selected list of instances. If given, only display the distribution of the selected instances, by default None models : List[str], optional A list of instances, with the bars for each group concated vertically. By default []. If [], resolve to ``[ Instance.model ]``. Returns ------- alt.Chart An altair chart object. """ instance_hash = instance_hash or Instance.instance_hash instance_hash_rewritten = instance_hash_rewritten or Instance.instance_hash_rewritten models = models or [ Instance.resolve_default_model(None) ] output = [] for model in models: #Instance.set_default_model(model=model) data = self.serialize(instance_hash, instance_hash_rewritten, filtered_instances, model) for correctness, count in data["counts"].items(): output.append({ "correctness": correctness, "count": count, "model": model }) df = pd.DataFrame(output) chart = alt.Chart(df).mark_bar().encode( y=alt.Y('model:N'), x=alt.X('count:Q', stack="zero"), color=alt.Color('correctness:N', scale=alt.Scale(domain=["correct", "incorrect"])), tooltip=['model:N', 'count:Q', 'correctness:N'] ).properties(width=100)#.configure_facet(spacing=5)# return chart
Example #21
Source File: explore.py From gobbli with Apache License 2.0 | 4 votes |
def show_label_distribution( sample_labels: Union[List[str], List[List[str]]], all_labels: Optional[Union[List[str], List[List[str]]]] = None, ): if sample_labels is not None: st.header("Label Distribution") label_counts = _collect_label_counts(sample_labels) if all_labels is None: label_chart = ( alt.Chart(label_counts, height=500, width=700) .mark_bar() .encode( alt.X("Label", type="nominal"), alt.Y("Proportion", type="quantitative"), ) ) else: label_counts["Label Set"] = "Sample" all_label_counts = _collect_label_counts(all_labels) all_label_counts["Label Set"] = "All Documents" label_counts = pd.concat([label_counts, all_label_counts]) label_chart = ( alt.Chart(label_counts, width=100) .mark_bar() .encode( alt.X( "Label Set", type="nominal", title=None, sort=["Sample", "All Documents"], ), alt.Y("Proportion", type="quantitative"), alt.Column( "Label", type="nominal", header=alt.Header(labelAngle=0) ), alt.Color("Label Set", type="nominal", legend=None), ) ) st.altair_chart(label_chart)
Example #22
Source File: evaluate.py From gobbli with Apache License 2.0 | 4 votes |
def errors_report(self, k: int = 10) -> str: """ Args: k: The number of results to return for each of false positives and false negatives. Returns: A nicely-formatted human-readable report describing the biggest mistakes made by the classifier for each class. """ errors = self.errors(k=k) output = "Errors Report\n" "------------\n\n" for label, (false_positives, false_negatives) in errors.items(): def make_errors_str(errors: List[ClassificationError]) -> str: if self.multilabel: return "\n".join( ( f"Correct Value: {label in e.y_true}\n" f"Predicted Probability: {e.y_pred_proba[label]}" f"Text: {truncate_text(escape_line_delimited_text(e.X), 500)}\n" ) for e in errors ) else: return "\n".join( ( f"True Class: {e.y_true}\n" f"Predicted Class: {e.y_pred} (Probability: {e.y_pred_proba[e.y_pred]})\n" f"Text: {truncate_text(escape_line_delimited_text(e.X), 500)}\n" ) for e in errors ) false_positives_str = make_errors_str(false_positives) if len(false_positives_str) == 0: false_positives_str = "None" false_negatives_str = make_errors_str(false_negatives) if len(false_negatives_str) == 0: false_negatives_str = "None" header_name = "CLASS" if self.multilabel else "LABEL" output += ( " -------\n" f"| {header_name}: {label}\n" " -------\n\n" "False Positives\n" "***************\n\n" f"{false_positives_str}\n\n" "False Negatives\n" "***************\n\n" f"{false_negatives_str}\n\n" ) return output
Example #23
Source File: evaluate.py From gobbli with Apache License 2.0 | 4 votes |
def errors_for_label(self, label: str, k: int = 10): """ Output the biggest mistakes for the given class by the classifier Args: label: The label to return errors for. k: The number of results to return for each of false positives and false negatives. Returns: A 2-tuple. The first element is a list of the top ``k`` false positives, and the second element is a list of the top ``k`` false negatives. """ pred_label = self.y_pred_multilabel[label].astype("bool") true_label = self.y_true_multilabel[label].astype("bool") # Order false positives/false negatives by the degree of the error; # i.e. we want the false positives with highest predicted probability first # and false negatives with lowest predicted probability first # Take the top `k` of each false_positives = ( self.y_pred_proba.loc[pred_label & ~true_label] .sort_values(by=label, ascending=False) .iloc[:k] ) false_negatives = ( self.y_pred_proba.loc[~pred_label & true_label] .sort_values(by=label, ascending=True) .iloc[:k] ) def create_classification_errors( y_pred_proba: pd.DataFrame, ) -> List[ClassificationError]: classification_errors = [] for ndx, row in y_pred_proba.iterrows(): classification_errors.append( ClassificationError( X=self.X[ndx], y_true=self.y_true[ndx], y_pred_proba=row.to_dict(), ) ) return classification_errors return ( create_classification_errors(false_positives), create_classification_errors(false_negatives), )