-
Notifications
You must be signed in to change notification settings - Fork 2.4k
/
aiplatform_v1.projects.locations.endpoints.html
2886 lines (2758 loc) 路 389 KB
/
aiplatform_v1.projects.locations.endpoints.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<html><body>
<style>
body, h1, h2, h3, div, span, p, pre, a {
margin: 0;
padding: 0;
border: 0;
font-weight: inherit;
font-style: inherit;
font-size: 100%;
font-family: inherit;
vertical-align: baseline;
}
body {
font-size: 13px;
padding: 1em;
}
h1 {
font-size: 26px;
margin-bottom: 1em;
}
h2 {
font-size: 24px;
margin-bottom: 1em;
}
h3 {
font-size: 20px;
margin-bottom: 1em;
margin-top: 1em;
}
pre, code {
line-height: 1.5;
font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
}
pre {
margin-top: 0.5em;
}
h1, h2, h3, p {
font-family: Arial, sans serif;
}
h1, h2, h3 {
border-bottom: solid #CCC 1px;
}
.toc_element {
margin-top: 0.5em;
}
.firstline {
margin-left: 2 em;
}
.method {
margin-top: 1em;
border: solid 1px #CCC;
padding: 1em;
background: #EEE;
}
.details {
font-weight: bold;
font-size: 14px;
}
</style>
<h1><a href="aiplatform_v1.html">Vertex AI API</a> . <a href="aiplatform_v1.projects.html">projects</a> . <a href="aiplatform_v1.projects.locations.html">locations</a> . <a href="aiplatform_v1.projects.locations.endpoints.html">endpoints</a></h1>
<h2>Instance Methods</h2>
<p class="toc_element">
<code><a href="aiplatform_v1.projects.locations.endpoints.operations.html">operations()</a></code>
</p>
<p class="firstline">Returns the operations Resource.</p>
<p class="toc_element">
<code><a href="#close">close()</a></code></p>
<p class="firstline">Close httplib2 connections.</p>
<p class="toc_element">
<code><a href="#computeTokens">computeTokens(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Return a list of tokens based on the input text.</p>
<p class="toc_element">
<code><a href="#countTokens">countTokens(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Perform a token counting.</p>
<p class="toc_element">
<code><a href="#create">create(parent, body=None, endpointId=None, x__xgafv=None)</a></code></p>
<p class="firstline">Creates an Endpoint.</p>
<p class="toc_element">
<code><a href="#delete">delete(name, x__xgafv=None)</a></code></p>
<p class="firstline">Deletes an Endpoint.</p>
<p class="toc_element">
<code><a href="#deployModel">deployModel(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Deploys a Model into this Endpoint, creating a DeployedModel within it.</p>
<p class="toc_element">
<code><a href="#directPredict">directPredict(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Perform an unary online prediction request to a gRPC model server for Vertex first-party products and frameworks.</p>
<p class="toc_element">
<code><a href="#directRawPredict">directRawPredict(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Perform an unary online prediction request to a gRPC model server for custom containers.</p>
<p class="toc_element">
<code><a href="#explain">explain(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Perform an online explanation. If deployed_model_id is specified, the corresponding DeployModel must have explanation_spec populated. If deployed_model_id is not specified, all DeployedModels must have explanation_spec populated.</p>
<p class="toc_element">
<code><a href="#generateContent">generateContent(model, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Generate content with multimodal inputs.</p>
<p class="toc_element">
<code><a href="#get">get(name, x__xgafv=None)</a></code></p>
<p class="firstline">Gets an Endpoint.</p>
<p class="toc_element">
<code><a href="#list">list(parent, filter=None, orderBy=None, pageSize=None, pageToken=None, readMask=None, x__xgafv=None)</a></code></p>
<p class="firstline">Lists Endpoints in a Location.</p>
<p class="toc_element">
<code><a href="#list_next">list_next()</a></code></p>
<p class="firstline">Retrieves the next page of results.</p>
<p class="toc_element">
<code><a href="#mutateDeployedModel">mutateDeployedModel(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Updates an existing deployed model. Updatable fields include `min_replica_count`, `max_replica_count`, `autoscaling_metric_specs`, `disable_container_logging` (v1 only), and `enable_container_logging` (v1beta1 only).</p>
<p class="toc_element">
<code><a href="#patch">patch(name, body=None, updateMask=None, x__xgafv=None)</a></code></p>
<p class="firstline">Updates an Endpoint.</p>
<p class="toc_element">
<code><a href="#predict">predict(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Perform an online prediction.</p>
<p class="toc_element">
<code><a href="#rawPredict">rawPredict(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Perform an online prediction with an arbitrary HTTP payload. The response includes the following HTTP headers: * `X-Vertex-AI-Endpoint-Id`: ID of the Endpoint that served this prediction. * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's DeployedModel that served this prediction.</p>
<p class="toc_element">
<code><a href="#serverStreamingPredict">serverStreamingPredict(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Perform a server-side streaming online prediction request for Vertex LLM streaming.</p>
<p class="toc_element">
<code><a href="#streamGenerateContent">streamGenerateContent(model, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Generate content with multimodal inputs with streaming support.</p>
<p class="toc_element">
<code><a href="#streamRawPredict">streamRawPredict(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Perform a streaming online prediction with an arbitrary HTTP payload.</p>
<p class="toc_element">
<code><a href="#undeployModel">undeployModel(endpoint, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Undeploys a Model from an Endpoint, removing a DeployedModel from it, and freeing all resources it's using.</p>
<h3>Method Details</h3>
<div class="method">
<code class="details" id="close">close()</code>
<pre>Close httplib2 connections.</pre>
</div>
<div class="method">
<code class="details" id="computeTokens">computeTokens(endpoint, body=None, x__xgafv=None)</code>
<pre>Return a list of tokens based on the input text.
Args:
endpoint: string, Required. The name of the Endpoint requested to get lists of tokens and token ids. (required)
body: object, The request body.
The object takes the form of:
{ # Request message for ComputeTokens RPC call.
"instances": [ # Required. The instances that are the input to token computing API call. Schema is identical to the prediction schema of the text model, even for the non-text models, like chat models, or Codey models.
"",
],
}
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
Returns:
An object of the form:
{ # Response message for ComputeTokens RPC call.
"tokensInfo": [ # Lists of tokens info from the input. A ComputeTokensRequest could have multiple instances with a prompt in each instance. We also need to return lists of tokens info for the request with multiple instances.
{ # Tokens info with a list of tokens and the corresponding list of token ids.
"tokenIds": [ # A list of token ids from the input.
"A String",
],
"tokens": [ # A list of tokens from the input.
"A String",
],
},
],
}</pre>
</div>
<div class="method">
<code class="details" id="countTokens">countTokens(endpoint, body=None, x__xgafv=None)</code>
<pre>Perform a token counting.
Args:
endpoint: string, Required. The name of the Endpoint requested to perform token counting. Format: `projects/{project}/locations/{location}/endpoints/{endpoint}` (required)
body: object, The request body.
The object takes the form of:
{ # Request message for PredictionService.CountTokens.
"contents": [ # Required. Input content.
{ # The base structured datatype containing multi-part content of a message. A `Content` includes a `role` field designating the producer of the `Content` and a `parts` field containing multi-part data that contains the content of the message turn.
"parts": [ # Required. Ordered `Parts` that constitute a single message. Parts may have different IANA MIME types.
{ # A datatype containing media that is part of a multi-part `Content` message. A `Part` consists of data which has an associated datatype. A `Part` can only contain one of the accepted types in `Part.data`. A `Part` must have a fixed IANA MIME type identifying the type and subtype of the media if `inline_data` or `file_data` field is filled with raw bytes.
"fileData": { # URI based data. # Optional. URI based data.
"fileUri": "A String", # Required. URI.
"mimeType": "A String", # Required. The IANA standard MIME type of the source data.
},
"functionCall": { # A predicted [FunctionCall] returned from the model that contains a string representing the [FunctionDeclaration.name] and a structured JSON object containing the parameters and their values. # Optional. A predicted [FunctionCall] returned from the model that contains a string representing the [FunctionDeclaration.name] with the parameters and their values.
"args": { # Optional. Required. The function parameters and values in JSON object format. See [FunctionDeclaration.parameters] for parameter details.
"a_key": "", # Properties of the object.
},
"name": "A String", # Required. The name of the function to call. Matches [FunctionDeclaration.name].
},
"functionResponse": { # The result output from a [FunctionCall] that contains a string representing the [FunctionDeclaration.name] and a structured JSON object containing any output from the function is used as context to the model. This should contain the result of a [FunctionCall] made based on model prediction. # Optional. The result output of a [FunctionCall] that contains a string representing the [FunctionDeclaration.name] and a structured JSON object containing any output from the function call. It is used as context to the model.
"name": "A String", # Required. The name of the function to call. Matches [FunctionDeclaration.name] and [FunctionCall.name].
"response": { # Required. The function response in JSON object format.
"a_key": "", # Properties of the object.
},
},
"inlineData": { # Content blob. It's preferred to send as text directly rather than raw bytes. # Optional. Inlined bytes data.
"data": "A String", # Required. Raw bytes.
"mimeType": "A String", # Required. The IANA standard MIME type of the source data.
},
"text": "A String", # Optional. Text part (can be code).
"videoMetadata": { # Metadata describes the input video content. # Optional. Video metadata. The metadata should only be specified while the video data is presented in inline_data or file_data.
"endOffset": "A String", # Optional. The end offset of the video.
"startOffset": "A String", # Optional. The start offset of the video.
},
},
],
"role": "A String", # Optional. The producer of the content. Must be either 'user' or 'model'. Useful to set for multi-turn conversations, otherwise can be left blank or unset.
},
],
"instances": [ # Required. The instances that are the input to token counting call. Schema is identical to the prediction schema of the underlying model.
"",
],
"model": "A String", # Required. The name of the publisher model requested to serve the prediction. Format: `projects/{project}/locations/{location}/publishers/*/models/*`
}
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
Returns:
An object of the form:
{ # Response message for PredictionService.CountTokens.
"totalBillableCharacters": 42, # The total number of billable characters counted across all instances from the request.
"totalTokens": 42, # The total number of tokens counted across all instances from the request.
}</pre>
</div>
<div class="method">
<code class="details" id="create">create(parent, body=None, endpointId=None, x__xgafv=None)</code>
<pre>Creates an Endpoint.
Args:
parent: string, Required. The resource name of the Location to create the Endpoint in. Format: `projects/{project}/locations/{location}` (required)
body: object, The request body.
The object takes the form of:
{ # Models are deployed into it, and afterwards Endpoint is called to obtain predictions and explanations.
"createTime": "A String", # Output only. Timestamp when this Endpoint was created.
"deployedModels": [ # Output only. The models deployed in this Endpoint. To add or remove DeployedModels use EndpointService.DeployModel and EndpointService.UndeployModel respectively.
{ # A deployment of a Model. Endpoints contain one or more DeployedModels.
"automaticResources": { # A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration. Each Model supporting these resources documents its specific guidelines. # A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration.
"maxReplicaCount": 42, # Immutable. The maximum number of replicas this DeployedModel may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the DeployedModel increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, a no upper bound for scaling under heavy traffic will be assume, though Vertex AI may be unable to scale beyond certain replica number.
"minReplicaCount": 42, # Immutable. The minimum number of replicas this DeployedModel will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas up to max_replica_count, and as traffic decreases, some of these extra replicas may be freed. If the requested value is too large, the deployment will error.
},
"createTime": "A String", # Output only. Timestamp when the DeployedModel was created.
"dedicatedResources": { # A description of resources that are dedicated to a DeployedModel, and that need a higher degree of manual configuration. # A description of resources that are dedicated to the DeployedModel, and that need a higher degree of manual configuration.
"autoscalingMetricSpecs": [ # Immutable. The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and autoscaling_metric_specs.target to `80`.
{ # The metric specification that defines the target resource utilization (CPU utilization, accelerator's duty cycle, and so on) for calculating the desired replica count.
"metricName": "A String", # Required. The resource metric name. Supported metrics: * For Online Prediction: * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle` * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
"target": 42, # The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
},
],
"machineSpec": { # Specification of a single machine. # Required. Immutable. The specification of a single machine used by the prediction.
"acceleratorCount": 42, # The number of accelerators to attach to the machine.
"acceleratorType": "A String", # Immutable. The type of accelerator(s) that may be attached to the machine as per accelerator_count.
"machineType": "A String", # Immutable. The type of the machine. See the [list of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types) See the [list of machine types supported for custom training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). For DeployedModel this field is optional, and the default value is `n1-standard-2`. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
"tpuTopology": "A String", # Immutable. The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
},
"maxReplicaCount": 42, # Immutable. The maximum number of replicas this DeployedModel may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the DeployedModel increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
"minReplicaCount": 42, # Required. Immutable. The minimum number of machine replicas this DeployedModel will be always deployed on. This value must be greater than or equal to 1. If traffic against the DeployedModel increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
},
"disableContainerLogging": True or False, # For custom-trained Models and AutoML Tabular Models, the container of the DeployedModel instances will send `stderr` and `stdout` streams to Cloud Logging by default. Please note that the logs incur cost, which are subject to [Cloud Logging pricing](https://cloud.google.com/logging/pricing). User can disable container logging by setting this flag to true.
"disableExplanations": True or False, # If true, deploy the model without explainable feature, regardless the existence of Model.explanation_spec or explanation_spec.
"displayName": "A String", # The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used.
"enableAccessLogging": True or False, # If true, online prediction access logs are sent to Cloud Logging. These logs are like standard server access logs, containing information like timestamp and latency for each prediction request. Note that logs may incur a cost, especially if your project receives prediction requests at a high queries per second rate (QPS). Estimate your costs before enabling this option.
"explanationSpec": { # Specification of Model explanation. # Explanation configuration for this DeployedModel. When deploying a Model using EndpointService.DeployModel, this value overrides the value of Model.explanation_spec. All fields of explanation_spec are optional in the request. If a field of explanation_spec is not populated, the value of the same field of Model.explanation_spec is inherited. If the corresponding Model.explanation_spec is not populated, all fields of the explanation_spec will be used for the explanation configuration.
"metadata": { # Metadata describing the Model's input and output for explanation. # Optional. Metadata describing the Model's input and output for explanation.
"featureAttributionsSchemaUri": "A String", # Points to a YAML file stored on Google Cloud Storage describing the format of the feature attributions. The schema is defined as an OpenAPI 3.0.2 [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject). AutoML tabular Models always have this field populated by Vertex AI. Note: The URI given on output may be different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access.
"inputs": { # Required. Map from feature names to feature input metadata. Keys are the name of the features. Values are the specification of the feature. An empty InputMetadata is valid. It describes a text feature which has the name specified as the key in ExplanationMetadata.inputs. The baseline of the empty feature is chosen by Vertex AI. For Vertex AI-provided Tensorflow images, the key can be any friendly name of the feature. Once specified, featureAttributions are keyed by this key (if not grouped with another feature). For custom images, the key must match with the key in instance.
"a_key": { # Metadata of the input of a feature. Fields other than InputMetadata.input_baselines are applicable only for Models that are using Vertex AI-provided images for Tensorflow.
"denseShapeTensorName": "A String", # Specifies the shape of the values of the input if the input is a sparse representation. Refer to Tensorflow documentation for more details: https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor.
"encodedBaselines": [ # A list of baselines for the encoded tensor. The shape of each baseline should match the shape of the encoded tensor. If a scalar is provided, Vertex AI broadcasts to the same shape as the encoded tensor.
"",
],
"encodedTensorName": "A String", # Encoded tensor is a transformation of the input tensor. Must be provided if choosing Integrated Gradients attribution or XRAI attribution and the input tensor is not differentiable. An encoded tensor is generated if the input tensor is encoded by a lookup table.
"encoding": "A String", # Defines how the feature is encoded into the input tensor. Defaults to IDENTITY.
"featureValueDomain": { # Domain details of the input feature value. Provides numeric information about the feature, such as its range (min, max). If the feature has been pre-processed, for example with z-scoring, then it provides information about how to recover the original feature. For example, if the input feature is an image and it has been pre-processed to obtain 0-mean and stddev = 1 values, then original_mean, and original_stddev refer to the mean and stddev of the original feature (e.g. image tensor) from which input feature (with mean = 0 and stddev = 1) was obtained. # The domain details of the input feature value. Like min/max, original mean or standard deviation if normalized.
"maxValue": 3.14, # The maximum permissible value for this feature.
"minValue": 3.14, # The minimum permissible value for this feature.
"originalMean": 3.14, # If this input feature has been normalized to a mean value of 0, the original_mean specifies the mean value of the domain prior to normalization.
"originalStddev": 3.14, # If this input feature has been normalized to a standard deviation of 1.0, the original_stddev specifies the standard deviation of the domain prior to normalization.
},
"groupName": "A String", # Name of the group that the input belongs to. Features with the same group name will be treated as one feature when computing attributions. Features grouped together can have different shapes in value. If provided, there will be one single attribution generated in Attribution.feature_attributions, keyed by the group name.
"indexFeatureMapping": [ # A list of feature names for each index in the input tensor. Required when the input InputMetadata.encoding is BAG_OF_FEATURES, BAG_OF_FEATURES_SPARSE, INDICATOR.
"A String",
],
"indicesTensorName": "A String", # Specifies the index of the values of the input tensor. Required when the input tensor is a sparse representation. Refer to Tensorflow documentation for more details: https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor.
"inputBaselines": [ # Baseline inputs for this feature. If no baseline is specified, Vertex AI chooses the baseline for this feature. If multiple baselines are specified, Vertex AI returns the average attributions across them in Attribution.feature_attributions. For Vertex AI-provided Tensorflow images (both 1.x and 2.x), the shape of each baseline must match the shape of the input tensor. If a scalar is provided, we broadcast to the same shape as the input tensor. For custom images, the element of the baselines must be in the same format as the feature's input in the instance[]. The schema of any single instance may be specified via Endpoint's DeployedModels' Model's PredictSchemata's instance_schema_uri.
"",
],
"inputTensorName": "A String", # Name of the input tensor for this feature. Required and is only applicable to Vertex AI-provided images for Tensorflow.
"modality": "A String", # Modality of the feature. Valid values are: numeric, image. Defaults to numeric.
"visualization": { # Visualization configurations for image explanation. # Visualization configurations for image explanation.
"clipPercentLowerbound": 3.14, # Excludes attributions below the specified percentile, from the highlighted areas. Defaults to 62.
"clipPercentUpperbound": 3.14, # Excludes attributions above the specified percentile from the highlighted areas. Using the clip_percent_upperbound and clip_percent_lowerbound together can be useful for filtering out noise and making it easier to see areas of strong attribution. Defaults to 99.9.
"colorMap": "A String", # The color scheme used for the highlighted areas. Defaults to PINK_GREEN for Integrated Gradients attribution, which shows positive attributions in green and negative in pink. Defaults to VIRIDIS for XRAI attribution, which highlights the most influential regions in yellow and the least influential in blue.
"overlayType": "A String", # How the original image is displayed in the visualization. Adjusting the overlay can help increase visual clarity if the original image makes it difficult to view the visualization. Defaults to NONE.
"polarity": "A String", # Whether to only highlight pixels with positive contributions, negative or both. Defaults to POSITIVE.
"type": "A String", # Type of the image visualization. Only applicable to Integrated Gradients attribution. OUTLINES shows regions of attribution, while PIXELS shows per-pixel attribution. Defaults to OUTLINES.
},
},
},
"latentSpaceSource": "A String", # Name of the source to generate embeddings for example based explanations.
"outputs": { # Required. Map from output names to output metadata. For Vertex AI-provided Tensorflow images, keys can be any user defined string that consists of any UTF-8 characters. For custom images, keys are the name of the output field in the prediction to be explained. Currently only one key is allowed.
"a_key": { # Metadata of the prediction output to be explained.
"displayNameMappingKey": "A String", # Specify a field name in the prediction to look for the display name. Use this if the prediction contains the display names for the outputs. The display names in the prediction must have the same shape of the outputs, so that it can be located by Attribution.output_index for a specific output.
"indexDisplayNameMapping": "", # Static mapping between the index and display name. Use this if the outputs are a deterministic n-dimensional array, e.g. a list of scores of all the classes in a pre-defined order for a multi-classification Model. It's not feasible if the outputs are non-deterministic, e.g. the Model produces top-k classes or sort the outputs by their values. The shape of the value must be an n-dimensional array of strings. The number of dimensions must match that of the outputs to be explained. The Attribution.output_display_name is populated by locating in the mapping with Attribution.output_index.
"outputTensorName": "A String", # Name of the output tensor. Required and is only applicable to Vertex AI provided images for Tensorflow.
},
},
},
"parameters": { # Parameters to configure explaining for Model's predictions. # Required. Parameters that configure explaining of the Model's predictions.
"examples": { # Example-based explainability that returns the nearest neighbors from the provided dataset. # Example-based explanations that returns the nearest neighbors from the provided dataset.
"exampleGcsSource": { # The Cloud Storage input instances. # The Cloud Storage input instances.
"dataFormat": "A String", # The format in which instances are given, if not specified, assume it's JSONL format. Currently only JSONL format is supported.
"gcsSource": { # The Google Cloud Storage location for the input content. # The Cloud Storage location for the input instances.
"uris": [ # Required. Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.
"A String",
],
},
},
"nearestNeighborSearchConfig": "", # The full configuration for the generated index, the semantics are the same as metadata and should match [NearestNeighborSearchConfig](https://cloud.google.com/vertex-ai/docs/explainable-ai/configuring-explanations-example-based#nearest-neighbor-search-config).
"neighborCount": 42, # The number of neighbors to return when querying for examples.
"presets": { # Preset configuration for example-based explanations # Simplified preset configuration, which automatically sets configuration values based on the desired query speed-precision trade-off and modality.
"modality": "A String", # The modality of the uploaded model, which automatically configures the distance measurement and feature normalization for the underlying example index and queries. If your model does not precisely fit one of these types, it is okay to choose the closest type.
"query": "A String", # Preset option controlling parameters for speed-precision trade-off when querying for examples. If omitted, defaults to `PRECISE`.
},
},
"integratedGradientsAttribution": { # An attribution method that computes the Aumann-Shapley value taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1703.01365 # An attribution method that computes Aumann-Shapley values taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1703.01365
"blurBaselineConfig": { # Config for blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383 # Config for IG with blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383
"maxBlurSigma": 3.14, # The standard deviation of the blur kernel for the blurred baseline. The same blurring parameter is used for both the height and the width dimension. If not set, the method defaults to the zero (i.e. black for images) baseline.
},
"smoothGradConfig": { # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf
"featureNoiseSigma": { # Noise sigma by features. Noise sigma represents the standard deviation of the gaussian kernel that will be used to add noise to interpolated inputs prior to computing gradients. # This is similar to noise_sigma, but provides additional flexibility. A separate noise sigma can be provided for each feature, which is useful if their distributions are different. No noise is added to features that are not set. If this field is unset, noise_sigma will be used for all features.
"noiseSigma": [ # Noise sigma per feature. No noise is added to features that are not set.
{ # Noise sigma for a single feature.
"name": "A String", # The name of the input feature for which noise sigma is provided. The features are defined in explanation metadata inputs.
"sigma": 3.14, # This represents the standard deviation of the Gaussian kernel that will be used to add noise to the feature prior to computing gradients. Similar to noise_sigma but represents the noise added to the current feature. Defaults to 0.1.
},
],
},
"noiseSigma": 3.14, # This is a single float value and will be used to add noise to all the features. Use this field when all features are normalized to have the same distribution: scale to range [0, 1], [-1, 1] or z-scoring, where features are normalized to have 0-mean and 1-variance. Learn more about [normalization](https://developers.google.com/machine-learning/data-prep/transform/normalization). For best results the recommended value is about 10% - 20% of the standard deviation of the input feature. Refer to section 3.2 of the SmoothGrad paper: https://arxiv.org/pdf/1706.03825.pdf. Defaults to 0.1. If the distribution is different per feature, set feature_noise_sigma instead for each feature.
"noisySampleCount": 42, # The number of gradient samples to use for approximation. The higher this number, the more accurate the gradient is, but the runtime complexity increases by this factor as well. Valid range of its value is [1, 50]. Defaults to 3.
},
"stepCount": 42, # Required. The number of steps for approximating the path integral. A good value to start is 50 and gradually increase until the sum to diff property is within the desired error range. Valid range of its value is [1, 100], inclusively.
},
"outputIndices": [ # If populated, only returns attributions that have output_index contained in output_indices. It must be an ndarray of integers, with the same shape of the output it's explaining. If not populated, returns attributions for top_k indices of outputs. If neither top_k nor output_indices is populated, returns the argmax index of the outputs. Only applicable to Models that predict multiple outputs (e,g, multi-class Models that predict multiple classes).
"",
],
"sampledShapleyAttribution": { # An attribution method that approximates Shapley values for features that contribute to the label being predicted. A sampling strategy is used to approximate the value rather than considering all subsets of features. # An attribution method that approximates Shapley values for features that contribute to the label being predicted. A sampling strategy is used to approximate the value rather than considering all subsets of features. Refer to this paper for model details: https://arxiv.org/abs/1306.4265.
"pathCount": 42, # Required. The number of feature permutations to consider when approximating the Shapley values. Valid range of its value is [1, 50], inclusively.
},
"topK": 42, # If populated, returns attributions for top K indices of outputs (defaults to 1). Only applies to Models that predicts more than one outputs (e,g, multi-class Models). When set to -1, returns explanations for all outputs.
"xraiAttribution": { # An explanation method that redistributes Integrated Gradients attributions to segmented regions, taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1906.02825 Supported only by image Models. # An attribution method that redistributes Integrated Gradients attribution to segmented regions, taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1906.02825 XRAI currently performs better on natural images, like a picture of a house or an animal. If the images are taken in artificial environments, like a lab or manufacturing line, or from diagnostic equipment, like x-rays or quality-control cameras, use Integrated Gradients instead.
"blurBaselineConfig": { # Config for blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383 # Config for XRAI with blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383
"maxBlurSigma": 3.14, # The standard deviation of the blur kernel for the blurred baseline. The same blurring parameter is used for both the height and the width dimension. If not set, the method defaults to the zero (i.e. black for images) baseline.
},
"smoothGradConfig": { # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf
"featureNoiseSigma": { # Noise sigma by features. Noise sigma represents the standard deviation of the gaussian kernel that will be used to add noise to interpolated inputs prior to computing gradients. # This is similar to noise_sigma, but provides additional flexibility. A separate noise sigma can be provided for each feature, which is useful if their distributions are different. No noise is added to features that are not set. If this field is unset, noise_sigma will be used for all features.
"noiseSigma": [ # Noise sigma per feature. No noise is added to features that are not set.
{ # Noise sigma for a single feature.
"name": "A String", # The name of the input feature for which noise sigma is provided. The features are defined in explanation metadata inputs.
"sigma": 3.14, # This represents the standard deviation of the Gaussian kernel that will be used to add noise to the feature prior to computing gradients. Similar to noise_sigma but represents the noise added to the current feature. Defaults to 0.1.
},
],
},
"noiseSigma": 3.14, # This is a single float value and will be used to add noise to all the features. Use this field when all features are normalized to have the same distribution: scale to range [0, 1], [-1, 1] or z-scoring, where features are normalized to have 0-mean and 1-variance. Learn more about [normalization](https://developers.google.com/machine-learning/data-prep/transform/normalization). For best results the recommended value is about 10% - 20% of the standard deviation of the input feature. Refer to section 3.2 of the SmoothGrad paper: https://arxiv.org/pdf/1706.03825.pdf. Defaults to 0.1. If the distribution is different per feature, set feature_noise_sigma instead for each feature.
"noisySampleCount": 42, # The number of gradient samples to use for approximation. The higher this number, the more accurate the gradient is, but the runtime complexity increases by this factor as well. Valid range of its value is [1, 50]. Defaults to 3.
},
"stepCount": 42, # Required. The number of steps for approximating the path integral. A good value to start is 50 and gradually increase until the sum to diff property is met within the desired error range. Valid range of its value is [1, 100], inclusively.
},
},
},
"id": "A String", # Immutable. The ID of the DeployedModel. If not provided upon deployment, Vertex AI will generate a value for this ID. This value should be 1-10 characters, and valid characters are `/[0-9]/`.
"model": "A String", # Required. The resource name of the Model that this is the deployment of. Note that the Model may be in a different location than the DeployedModel's Endpoint. The resource name may contain version id or version alias to specify the version. Example: `projects/{project}/locations/{location}/models/{model}@2` or `projects/{project}/locations/{location}/models/{model}@golden` if no version is specified, the default version will be deployed.
"modelVersionId": "A String", # Output only. The version ID of the model that is deployed.
"privateEndpoints": { # PrivateEndpoints proto is used to provide paths for users to send requests privately. To send request via private service access, use predict_http_uri, explain_http_uri or health_http_uri. To send request via private service connect, use service_attachment. # Output only. Provide paths for users to send predict/explain/health requests directly to the deployed model services running on Cloud via private services access. This field is populated if network is configured.
"explainHttpUri": "A String", # Output only. Http(s) path to send explain requests.
"healthHttpUri": "A String", # Output only. Http(s) path to send health check requests.
"predictHttpUri": "A String", # Output only. Http(s) path to send prediction requests.
"serviceAttachment": "A String", # Output only. The name of the service attachment resource. Populated if private service connect is enabled.
},
"serviceAccount": "A String", # The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account.
"sharedResources": "A String", # The resource name of the shared DeploymentResourcePool to deploy on. Format: `projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}`
},
],
"description": "A String", # The description of the Endpoint.
"displayName": "A String", # Required. The display name of the Endpoint. The name can be up to 128 characters long and can consist of any UTF-8 characters.
"enablePrivateServiceConnect": True or False, # Deprecated: If true, expose the Endpoint via private service connect. Only one of the fields, network or enable_private_service_connect, can be set.
"encryptionSpec": { # Represents a customer-managed encryption key spec that can be applied to a top-level resource. # Customer-managed encryption key spec for an Endpoint. If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key.
"kmsKeyName": "A String", # Required. The Cloud KMS resource identifier of the customer managed encryption key used to protect a resource. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created.
},
"etag": "A String", # Used to perform consistent read-modify-write updates. If not set, a blind "overwrite" update happens.
"labels": { # The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels.
"a_key": "A String",
},
"modelDeploymentMonitoringJob": "A String", # Output only. Resource name of the Model Monitoring job associated with this Endpoint if monitoring is enabled by JobService.CreateModelDeploymentMonitoringJob. Format: `projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}`
"name": "A String", # Output only. The resource name of the Endpoint.
"network": "A String", # Optional. The full name of the Google Compute Engine [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks) to which the Endpoint should be peered. Private services access must already be configured for the network. If left unspecified, the Endpoint is not peered with any network. Only one of the fields, network or enable_private_service_connect, can be set. [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert): `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is network name.
"predictRequestResponseLoggingConfig": { # Configuration for logging request-response to a BigQuery table. # Configures the request-response logging for online prediction.
"bigqueryDestination": { # The BigQuery location for the output content. # BigQuery table for logging. If only given a project, a new dataset will be created with name `logging__` where will be made BigQuery-dataset-name compatible (e.g. most special characters will become underscores). If no table name is given, a new table will be created with name `request_response_logging`
"outputUri": "A String", # Required. BigQuery URI to a project or table, up to 2000 characters long. When only the project is specified, the Dataset and Table is created. When the full table reference is specified, the Dataset must exist and table must not exist. Accepted forms: * BigQuery path. For example: `bq://projectId` or `bq://projectId.bqDatasetId` or `bq://projectId.bqDatasetId.bqTableId`.
},
"enabled": True or False, # If logging is enabled or not.
"samplingRate": 3.14, # Percentage of requests to be logged, expressed as a fraction in range(0,1].
},
"trafficSplit": { # A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at a moment.
"a_key": 42,
},
"updateTime": "A String", # Output only. Timestamp when this Endpoint was last updated.
}
endpointId: string, Immutable. The ID to use for endpoint, which will become the final component of the endpoint resource name. If not provided, Vertex AI will generate a value for this ID. If the first character is a letter, this value may be up to 63 characters, and valid characters are `[a-z0-9-]`. The last character must be a letter or number. If the first character is a number, this value may be up to 9 characters, and valid characters are `[0-9]` with no leading zeros. When using HTTP/JSON, this field is populated based on a query string argument, such as `?endpoint_id=12345`. This is the fallback for fields that are not included in either the URI or the body.
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
Returns:
An object of the form:
{ # This resource represents a long-running operation that is the result of a network API call.
"done": True or False, # If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.
"error": { # The `Status` type defines a logical error model that is suitable for different programming environments, including REST APIs and RPC APIs. It is used by [gRPC](https://github.com/grpc). Each `Status` message contains three pieces of data: error code, error message, and error details. You can find out more about this error model and how to work with it in the [API Design Guide](https://cloud.google.com/apis/design/errors). # The error result of the operation in case of failure or cancellation.
"code": 42, # The status code, which should be an enum value of google.rpc.Code.
"details": [ # A list of messages that carry the error details. There is a common set of message types for APIs to use.
{
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
],
"message": "A String", # A developer-facing error message, which should be in English. Any user-facing error message should be localized and sent in the google.rpc.Status.details field, or localized by the client.
},
"metadata": { # Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
"name": "A String", # The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.
"response": { # The normal, successful response of the operation. If the original method returns no data on success, such as `Delete`, the response is `google.protobuf.Empty`. If the original method is standard `Get`/`Create`/`Update`, the response should be the resource. For other methods, the response should have the type `XxxResponse`, where `Xxx` is the original method name. For example, if the original method name is `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`.
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
}</pre>
</div>
<div class="method">
<code class="details" id="delete">delete(name, x__xgafv=None)</code>
<pre>Deletes an Endpoint.
Args:
name: string, Required. The name of the Endpoint resource to be deleted. Format: `projects/{project}/locations/{location}/endpoints/{endpoint}` (required)
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
Returns:
An object of the form:
{ # This resource represents a long-running operation that is the result of a network API call.
"done": True or False, # If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.
"error": { # The `Status` type defines a logical error model that is suitable for different programming environments, including REST APIs and RPC APIs. It is used by [gRPC](https://github.com/grpc). Each `Status` message contains three pieces of data: error code, error message, and error details. You can find out more about this error model and how to work with it in the [API Design Guide](https://cloud.google.com/apis/design/errors). # The error result of the operation in case of failure or cancellation.
"code": 42, # The status code, which should be an enum value of google.rpc.Code.
"details": [ # A list of messages that carry the error details. There is a common set of message types for APIs to use.
{
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
],
"message": "A String", # A developer-facing error message, which should be in English. Any user-facing error message should be localized and sent in the google.rpc.Status.details field, or localized by the client.
},
"metadata": { # Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
"name": "A String", # The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.
"response": { # The normal, successful response of the operation. If the original method returns no data on success, such as `Delete`, the response is `google.protobuf.Empty`. If the original method is standard `Get`/`Create`/`Update`, the response should be the resource. For other methods, the response should have the type `XxxResponse`, where `Xxx` is the original method name. For example, if the original method name is `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`.
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
}</pre>
</div>
<div class="method">
<code class="details" id="deployModel">deployModel(endpoint, body=None, x__xgafv=None)</code>
<pre>Deploys a Model into this Endpoint, creating a DeployedModel within it.
Args:
endpoint: string, Required. The name of the Endpoint resource into which to deploy a Model. Format: `projects/{project}/locations/{location}/endpoints/{endpoint}` (required)
body: object, The request body.
The object takes the form of:
{ # Request message for EndpointService.DeployModel.
"deployedModel": { # A deployment of a Model. Endpoints contain one or more DeployedModels. # Required. The DeployedModel to be created within the Endpoint. Note that Endpoint.traffic_split must be updated for the DeployedModel to start receiving traffic, either as part of this call, or via EndpointService.UpdateEndpoint.
"automaticResources": { # A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration. Each Model supporting these resources documents its specific guidelines. # A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration.
"maxReplicaCount": 42, # Immutable. The maximum number of replicas this DeployedModel may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the DeployedModel increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, a no upper bound for scaling under heavy traffic will be assume, though Vertex AI may be unable to scale beyond certain replica number.
"minReplicaCount": 42, # Immutable. The minimum number of replicas this DeployedModel will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas up to max_replica_count, and as traffic decreases, some of these extra replicas may be freed. If the requested value is too large, the deployment will error.
},
"createTime": "A String", # Output only. Timestamp when the DeployedModel was created.
"dedicatedResources": { # A description of resources that are dedicated to a DeployedModel, and that need a higher degree of manual configuration. # A description of resources that are dedicated to the DeployedModel, and that need a higher degree of manual configuration.
"autoscalingMetricSpecs": [ # Immutable. The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and autoscaling_metric_specs.target to `80`.
{ # The metric specification that defines the target resource utilization (CPU utilization, accelerator's duty cycle, and so on) for calculating the desired replica count.
"metricName": "A String", # Required. The resource metric name. Supported metrics: * For Online Prediction: * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle` * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
"target": 42, # The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.
},
],
"machineSpec": { # Specification of a single machine. # Required. Immutable. The specification of a single machine used by the prediction.
"acceleratorCount": 42, # The number of accelerators to attach to the machine.
"acceleratorType": "A String", # Immutable. The type of accelerator(s) that may be attached to the machine as per accelerator_count.
"machineType": "A String", # Immutable. The type of the machine. See the [list of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types) See the [list of machine types supported for custom training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). For DeployedModel this field is optional, and the default value is `n1-standard-2`. For BatchPredictionJob or as part of WorkerPoolSpec this field is required.
"tpuTopology": "A String", # Immutable. The topology of the TPUs. Corresponds to the TPU topologies available from GKE. (Example: tpu_topology: "2x2x1").
},
"maxReplicaCount": 42, # Immutable. The maximum number of replicas this DeployedModel may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the DeployedModel increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).
"minReplicaCount": 42, # Required. Immutable. The minimum number of machine replicas this DeployedModel will be always deployed on. This value must be greater than or equal to 1. If traffic against the DeployedModel increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.
},
"disableContainerLogging": True or False, # For custom-trained Models and AutoML Tabular Models, the container of the DeployedModel instances will send `stderr` and `stdout` streams to Cloud Logging by default. Please note that the logs incur cost, which are subject to [Cloud Logging pricing](https://cloud.google.com/logging/pricing). User can disable container logging by setting this flag to true.
"disableExplanations": True or False, # If true, deploy the model without explainable feature, regardless the existence of Model.explanation_spec or explanation_spec.
"displayName": "A String", # The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used.
"enableAccessLogging": True or False, # If true, online prediction access logs are sent to Cloud Logging. These logs are like standard server access logs, containing information like timestamp and latency for each prediction request. Note that logs may incur a cost, especially if your project receives prediction requests at a high queries per second rate (QPS). Estimate your costs before enabling this option.
"explanationSpec": { # Specification of Model explanation. # Explanation configuration for this DeployedModel. When deploying a Model using EndpointService.DeployModel, this value overrides the value of Model.explanation_spec. All fields of explanation_spec are optional in the request. If a field of explanation_spec is not populated, the value of the same field of Model.explanation_spec is inherited. If the corresponding Model.explanation_spec is not populated, all fields of the explanation_spec will be used for the explanation configuration.
"metadata": { # Metadata describing the Model's input and output for explanation. # Optional. Metadata describing the Model's input and output for explanation.
"featureAttributionsSchemaUri": "A String", # Points to a YAML file stored on Google Cloud Storage describing the format of the feature attributions. The schema is defined as an OpenAPI 3.0.2 [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject). AutoML tabular Models always have this field populated by Vertex AI. Note: The URI given on output may be different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access.
"inputs": { # Required. Map from feature names to feature input metadata. Keys are the name of the features. Values are the specification of the feature. An empty InputMetadata is valid. It describes a text feature which has the name specified as the key in ExplanationMetadata.inputs. The baseline of the empty feature is chosen by Vertex AI. For Vertex AI-provided Tensorflow images, the key can be any friendly name of the feature. Once specified, featureAttributions are keyed by this key (if not grouped with another feature). For custom images, the key must match with the key in instance.
"a_key": { # Metadata of the input of a feature. Fields other than InputMetadata.input_baselines are applicable only for Models that are using Vertex AI-provided images for Tensorflow.
"denseShapeTensorName": "A String", # Specifies the shape of the values of the input if the input is a sparse representation. Refer to Tensorflow documentation for more details: https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor.
"encodedBaselines": [ # A list of baselines for the encoded tensor. The shape of each baseline should match the shape of the encoded tensor. If a scalar is provided, Vertex AI broadcasts to the same shape as the encoded tensor.
"",
],
"encodedTensorName": "A String", # Encoded tensor is a transformation of the input tensor. Must be provided if choosing Integrated Gradients attribution or XRAI attribution and the input tensor is not differentiable. An encoded tensor is generated if the input tensor is encoded by a lookup table.
"encoding": "A String", # Defines how the feature is encoded into the input tensor. Defaults to IDENTITY.
"featureValueDomain": { # Domain details of the input feature value. Provides numeric information about the feature, such as its range (min, max). If the feature has been pre-processed, for example with z-scoring, then it provides information about how to recover the original feature. For example, if the input feature is an image and it has been pre-processed to obtain 0-mean and stddev = 1 values, then original_mean, and original_stddev refer to the mean and stddev of the original feature (e.g. image tensor) from which input feature (with mean = 0 and stddev = 1) was obtained. # The domain details of the input feature value. Like min/max, original mean or standard deviation if normalized.
"maxValue": 3.14, # The maximum permissible value for this feature.
"minValue": 3.14, # The minimum permissible value for this feature.
"originalMean": 3.14, # If this input feature has been normalized to a mean value of 0, the original_mean specifies the mean value of the domain prior to normalization.
"originalStddev": 3.14, # If this input feature has been normalized to a standard deviation of 1.0, the original_stddev specifies the standard deviation of the domain prior to normalization.
},
"groupName": "A String", # Name of the group that the input belongs to. Features with the same group name will be treated as one feature when computing attributions. Features grouped together can have different shapes in value. If provided, there will be one single attribution generated in Attribution.feature_attributions, keyed by the group name.
"indexFeatureMapping": [ # A list of feature names for each index in the input tensor. Required when the input InputMetadata.encoding is BAG_OF_FEATURES, BAG_OF_FEATURES_SPARSE, INDICATOR.
"A String",
],
"indicesTensorName": "A String", # Specifies the index of the values of the input tensor. Required when the input tensor is a sparse representation. Refer to Tensorflow documentation for more details: https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor.
"inputBaselines": [ # Baseline inputs for this feature. If no baseline is specified, Vertex AI chooses the baseline for this feature. If multiple baselines are specified, Vertex AI returns the average attributions across them in Attribution.feature_attributions. For Vertex AI-provided Tensorflow images (both 1.x and 2.x), the shape of each baseline must match the shape of the input tensor. If a scalar is provided, we broadcast to the same shape as the input tensor. For custom images, the element of the baselines must be in the same format as the feature's input in the instance[]. The schema of any single instance may be specified via Endpoint's DeployedModels' Model's PredictSchemata's instance_schema_uri.
"",
],
"inputTensorName": "A String", # Name of the input tensor for this feature. Required and is only applicable to Vertex AI-provided images for Tensorflow.
"modality": "A String", # Modality of the feature. Valid values are: numeric, image. Defaults to numeric.
"visualization": { # Visualization configurations for image explanation. # Visualization configurations for image explanation.
"clipPercentLowerbound": 3.14, # Excludes attributions below the specified percentile, from the highlighted areas. Defaults to 62.
"clipPercentUpperbound": 3.14, # Excludes attributions above the specified percentile from the highlighted areas. Using the clip_percent_upperbound and clip_percent_lowerbound together can be useful for filtering out noise and making it easier to see areas of strong attribution. Defaults to 99.9.
"colorMap": "A String", # The color scheme used for the highlighted areas. Defaults to PINK_GREEN for Integrated Gradients attribution, which shows positive attributions in green and negative in pink. Defaults to VIRIDIS for XRAI attribution, which highlights the most influential regions in yellow and the least influential in blue.
"overlayType": "A String", # How the original image is displayed in the visualization. Adjusting the overlay can help increase visual clarity if the original image makes it difficult to view the visualization. Defaults to NONE.
"polarity": "A String", # Whether to only highlight pixels with positive contributions, negative or both. Defaults to POSITIVE.
"type": "A String", # Type of the image visualization. Only applicable to Integrated Gradients attribution. OUTLINES shows regions of attribution, while PIXELS shows per-pixel attribution. Defaults to OUTLINES.
},
},
},
"latentSpaceSource": "A String", # Name of the source to generate embeddings for example based explanations.
"outputs": { # Required. Map from output names to output metadata. For Vertex AI-provided Tensorflow images, keys can be any user defined string that consists of any UTF-8 characters. For custom images, keys are the name of the output field in the prediction to be explained. Currently only one key is allowed.
"a_key": { # Metadata of the prediction output to be explained.
"displayNameMappingKey": "A String", # Specify a field name in the prediction to look for the display name. Use this if the prediction contains the display names for the outputs. The display names in the prediction must have the same shape of the outputs, so that it can be located by Attribution.output_index for a specific output.
"indexDisplayNameMapping": "", # Static mapping between the index and display name. Use this if the outputs are a deterministic n-dimensional array, e.g. a list of scores of all the classes in a pre-defined order for a multi-classification Model. It's not feasible if the outputs are non-deterministic, e.g. the Model produces top-k classes or sort the outputs by their values. The shape of the value must be an n-dimensional array of strings. The number of dimensions must match that of the outputs to be explained. The Attribution.output_display_name is populated by locating in the mapping with Attribution.output_index.
"outputTensorName": "A String", # Name of the output tensor. Required and is only applicable to Vertex AI provided images for Tensorflow.
},
},
},
"parameters": { # Parameters to configure explaining for Model's predictions. # Required. Parameters that configure explaining of the Model's predictions.
"examples": { # Example-based explainability that returns the nearest neighbors from the provided dataset. # Example-based explanations that returns the nearest neighbors from the provided dataset.
"exampleGcsSource": { # The Cloud Storage input instances. # The Cloud Storage input instances.
"dataFormat": "A String", # The format in which instances are given, if not specified, assume it's JSONL format. Currently only JSONL format is supported.
"gcsSource": { # The Google Cloud Storage location for the input content. # The Cloud Storage location for the input instances.
"uris": [ # Required. Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.
"A String",
],
},
},
"nearestNeighborSearchConfig": "", # The full configuration for the generated index, the semantics are the same as metadata and should match [NearestNeighborSearchConfig](https://cloud.google.com/vertex-ai/docs/explainable-ai/configuring-explanations-example-based#nearest-neighbor-search-config).
"neighborCount": 42, # The number of neighbors to return when querying for examples.
"presets": { # Preset configuration for example-based explanations # Simplified preset configuration, which automatically sets configuration values based on the desired query speed-precision trade-off and modality.
"modality": "A String", # The modality of the uploaded model, which automatically configures the distance measurement and feature normalization for the underlying example index and queries. If your model does not precisely fit one of these types, it is okay to choose the closest type.
"query": "A String", # Preset option controlling parameters for speed-precision trade-off when querying for examples. If omitted, defaults to `PRECISE`.
},
},
"integratedGradientsAttribution": { # An attribution method that computes the Aumann-Shapley value taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1703.01365 # An attribution method that computes Aumann-Shapley values taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1703.01365
"blurBaselineConfig": { # Config for blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383 # Config for IG with blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383
"maxBlurSigma": 3.14, # The standard deviation of the blur kernel for the blurred baseline. The same blurring parameter is used for both the height and the width dimension. If not set, the method defaults to the zero (i.e. black for images) baseline.
},
"smoothGradConfig": { # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf
"featureNoiseSigma": { # Noise sigma by features. Noise sigma represents the standard deviation of the gaussian kernel that will be used to add noise to interpolated inputs prior to computing gradients. # This is similar to noise_sigma, but provides additional flexibility. A separate noise sigma can be provided for each feature, which is useful if their distributions are different. No noise is added to features that are not set. If this field is unset, noise_sigma will be used for all features.
"noiseSigma": [ # Noise sigma per feature. No noise is added to features that are not set.
{ # Noise sigma for a single feature.
"name": "A String", # The name of the input feature for which noise sigma is provided. The features are defined in explanation metadata inputs.
"sigma": 3.14, # This represents the standard deviation of the Gaussian kernel that will be used to add noise to the feature prior to computing gradients. Similar to noise_sigma but represents the noise added to the current feature. Defaults to 0.1.
},
],
},
"noiseSigma": 3.14, # This is a single float value and will be used to add noise to all the features. Use this field when all features are normalized to have the same distribution: scale to range [0, 1], [-1, 1] or z-scoring, where features are normalized to have 0-mean and 1-variance. Learn more about [normalization](https://developers.google.com/machine-learning/data-prep/transform/normalization). For best results the recommended value is about 10% - 20% of the standard deviation of the input feature. Refer to section 3.2 of the SmoothGrad paper: https://arxiv.org/pdf/1706.03825.pdf. Defaults to 0.1. If the distribution is different per feature, set feature_noise_sigma instead for each feature.
"noisySampleCount": 42, # The number of gradient samples to use for approximation. The higher this number, the more accurate the gradient is, but the runtime complexity increases by this factor as well. Valid range of its value is [1, 50]. Defaults to 3.
},
"stepCount": 42, # Required. The number of steps for approximating the path integral. A good value to start is 50 and gradually increase until the sum to diff property is within the desired error range. Valid range of its value is [1, 100], inclusively.
},
"outputIndices": [ # If populated, only returns attributions that have output_index contained in output_indices. It must be an ndarray of integers, with the same shape of the output it's explaining. If not populated, returns attributions for top_k indices of outputs. If neither top_k nor output_indices is populated, returns the argmax index of the outputs. Only applicable to Models that predict multiple outputs (e,g, multi-class Models that predict multiple classes).
"",
],
"sampledShapleyAttribution": { # An attribution method that approximates Shapley values for features that contribute to the label being predicted. A sampling strategy is used to approximate the value rather than considering all subsets of features. # An attribution method that approximates Shapley values for features that contribute to the label being predicted. A sampling strategy is used to approximate the value rather than considering all subsets of features. Refer to this paper for model details: https://arxiv.org/abs/1306.4265.
"pathCount": 42, # Required. The number of feature permutations to consider when approximating the Shapley values. Valid range of its value is [1, 50], inclusively.
},
"topK": 42, # If populated, returns attributions for top K indices of outputs (defaults to 1). Only applies to Models that predicts more than one outputs (e,g, multi-class Models). When set to -1, returns explanations for all outputs.
"xraiAttribution": { # An explanation method that redistributes Integrated Gradients attributions to segmented regions, taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1906.02825 Supported only by image Models. # An attribution method that redistributes Integrated Gradients attribution to segmented regions, taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1906.02825 XRAI currently performs better on natural images, like a picture of a house or an animal. If the images are taken in artificial environments, like a lab or manufacturing line, or from diagnostic equipment, like x-rays or quality-control cameras, use Integrated Gradients instead.
"blurBaselineConfig": { # Config for blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383 # Config for XRAI with blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383
"maxBlurSigma": 3.14, # The standard deviation of the blur kernel for the blurred baseline. The same blurring parameter is used for both the height and the width dimension. If not set, the method defaults to the zero (i.e. black for images) baseline.
},
"smoothGradConfig": { # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf
"featureNoiseSigma": { # Noise sigma by features. Noise sigma represents the standard deviation of the gaussian kernel that will be used to add noise to interpolated inputs prior to computing gradients. # This is similar to noise_sigma, but provides additional flexibility. A separate noise sigma can be provided for each feature, which is useful if their distributions are different. No noise is added to features that are not set. If this field is unset, noise_sigma will be used for all features.
"noiseSigma": [ # Noise sigma per feature. No noise is added to features that are not set.
{ # Noise sigma for a single feature.
"name": "A String", # The name of the input feature for which noise sigma is provided. The features are defined in explanation metadata inputs.
"sigma": 3.14, # This represents the standard deviation of the Gaussian kernel that will be used to add noise to the feature prior to computing gradients. Similar to noise_sigma but represents the noise added to the current feature. Defaults to 0.1.
},
],
},
"noiseSigma": 3.14, # This is a single float value and will be used to add noise to all the features. Use this field when all features are normalized to have the same distribution: scale to range [0, 1], [-1, 1] or z-scoring, where features are normalized to have 0-mean and 1-variance. Learn more about [normalization](https://developers.google.com/machine-learning/data-prep/transform/normalization). For best results the recommended value is about 10% - 20% of the standard deviation of the input feature. Refer to section 3.2 of the SmoothGrad paper: https://arxiv.org/pdf/1706.03825.pdf. Defaults to 0.1. If the distribution is different per feature, set feature_noise_sigma instead for each feature.
"noisySampleCount": 42, # The number of gradient samples to use for approximation. The higher this number, the more accurate the gradient is, but the runtime complexity increases by this factor as well. Valid range of its value is [1, 50]. Defaults to 3.
},
"stepCount": 42, # Required. The number of steps for approximating the path integral. A good value to start is 50 and gradually increase until the sum to diff property is met within the desired error range. Valid range of its value is [1, 100], inclusively.
},
},
},
"id": "A String", # Immutable. The ID of the DeployedModel. If not provided upon deployment, Vertex AI will generate a value for this ID. This value should be 1-10 characters, and valid characters are `/[0-9]/`.
"model": "A String", # Required. The resource name of the Model that this is the deployment of. Note that the Model may be in a different location than the DeployedModel's Endpoint. The resource name may contain version id or version alias to specify the version. Example: `projects/{project}/locations/{location}/models/{model}@2` or `projects/{project}/locations/{location}/models/{model}@golden` if no version is specified, the default version will be deployed.
"modelVersionId": "A String", # Output only. The version ID of the model that is deployed.
"privateEndpoints": { # PrivateEndpoints proto is used to provide paths for users to send requests privately. To send request via private service access, use predict_http_uri, explain_http_uri or health_http_uri. To send request via private service connect, use service_attachment. # Output only. Provide paths for users to send predict/explain/health requests directly to the deployed model services running on Cloud via private services access. This field is populated if network is configured.
"explainHttpUri": "A String", # Output only. Http(s) path to send explain requests.
"healthHttpUri": "A String", # Output only. Http(s) path to send health check requests.
"predictHttpUri": "A String", # Output only. Http(s) path to send prediction requests.
"serviceAttachment": "A String", # Output only. The name of the service attachment resource. Populated if private service connect is enabled.
},
"serviceAccount": "A String", # The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account.
"sharedResources": "A String", # The resource name of the shared DeploymentResourcePool to deploy on. Format: `projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}`
},
"trafficSplit": { # A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If this field is non-empty, then the Endpoint's traffic_split will be overwritten with it. To refer to the ID of the just being deployed Model, a "0" should be used, and the actual ID of the new DeployedModel will be filled in its place by this method. The traffic percentage values must add up to 100. If this field is empty, then the Endpoint's traffic_split is not updated.
"a_key": 42,
},
}
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
Returns:
An object of the form:
{ # This resource represents a long-running operation that is the result of a network API call.
"done": True or False, # If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.
"error": { # The `Status` type defines a logical error model that is suitable for different programming environments, including REST APIs and RPC APIs. It is used by [gRPC](https://github.com/grpc). Each `Status` message contains three pieces of data: error code, error message, and error details. You can find out more about this error model and how to work with it in the [API Design Guide](https://cloud.google.com/apis/design/errors). # The error result of the operation in case of failure or cancellation.
"code": 42, # The status code, which should be an enum value of google.rpc.Code.
"details": [ # A list of messages that carry the error details. There is a common set of message types for APIs to use.
{
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
],
"message": "A String", # A developer-facing error message, which should be in English. Any user-facing error message should be localized and sent in the google.rpc.Status.details field, or localized by the client.
},
"metadata": { # Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
"name": "A String", # The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.
"response": { # The normal, successful response of the operation. If the original method returns no data on success, such as `Delete`, the response is `google.protobuf.Empty`. If the original method is standard `Get`/`Create`/`Update`, the response should be the resource. For other methods, the response should have the type `XxxResponse`, where `Xxx` is the original method name. For example, if the original method name is `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`.
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
}</pre>
</div>
<div class="method">
<code class="details" id="directPredict">directPredict(endpoint, body=None, x__xgafv=None)</code>
<pre>Perform an unary online prediction request to a gRPC model server for Vertex first-party products and frameworks.
Args:
endpoint: string, Required. The name of the Endpoint requested to serve the prediction. Format: `projects/{project}/locations/{location}/endpoints/{endpoint}` (required)
body: object, The request body.
The object takes the form of:
{ # Request message for PredictionService.DirectPredict.
"inputs": [ # The prediction input.
{ # A tensor value type.
"boolVal": [ # Type specific representations that make it easy to create tensor protos in all languages. Only the representation corresponding to "dtype" can be set. The values hold the flattened representation of the tensor in row major order. BOOL
True or False,
],
"bytesVal": [ # STRING
"A String",
],
"doubleVal": [ # DOUBLE
3.14,
],
"dtype": "A String", # The data type of tensor.
"floatVal": [ # FLOAT
3.14,
],
"int64Val": [ # INT64
"A String",
],
"intVal": [ # INT_8 INT_16 INT_32
42,
],
"listVal": [ # A list of tensor values.
# Object with schema name: GoogleCloudAiplatformV1Tensor
],
"shape": [ # Shape of the tensor.
"A String",
],
"stringVal": [ # STRING
"A String",
],
"structVal": { # A map of string to tensor.
"a_key": # Object with schema name: GoogleCloudAiplatformV1Tensor
},
"tensorVal": "A String", # Serialized raw tensor content.
"uint64Val": [ # UINT64
"A String",
],
"uintVal": [ # UINT8 UINT16 UINT32
42,
],
},
],
"parameters": { # A tensor value type. # The parameters that govern the prediction.
"boolVal": [ # Type specific representations that make it easy to create tensor protos in all languages. Only the representation corresponding to "dtype" can be set. The values hold the flattened representation of the tensor in row major order. BOOL
True or False,
],
"bytesVal": [ # STRING
"A String",
],
"doubleVal": [ # DOUBLE
3.14,
],
"dtype": "A String", # The data type of tensor.
"floatVal": [ # FLOAT
3.14,
],
"int64Val": [ # INT64
"A String",
],
"intVal": [ # INT_8 INT_16 INT_32
42,
],
"listVal": [ # A list of tensor values.
# Object with schema name: GoogleCloudAiplatformV1Tensor
],
"shape": [ # Shape of the tensor.
"A String",
],
"stringVal": [ # STRING
"A String",
],
"structVal": { # A map of string to tensor.
"a_key": # Object with schema name: GoogleCloudAiplatformV1Tensor
},
"tensorVal": "A String", # Serialized raw tensor content.
"uint64Val": [ # UINT64
"A String",
],
"uintVal": [ # UINT8 UINT16 UINT32
42,
],
},
}
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
Returns:
An object of the form:
{ # Response message for PredictionService.DirectPredict.
"outputs": [ # The prediction output.
{ # A tensor value type.
"boolVal": [ # Type specific representations that make it easy to create tensor protos in all languages. Only the representation corresponding to "dtype" can be set. The values hold the flattened representation of the tensor in row major order. BOOL
True or False,
],
"bytesVal": [ # STRING
"A String",
],
"doubleVal": [ # DOUBLE
3.14,
],
"dtype": "A String", # The data type of tensor.
"floatVal": [ # FLOAT
3.14,
],
"int64Val": [ # INT64
"A String",
],
"intVal": [ # INT_8 INT_16 INT_32
42,
],
"listVal": [ # A list of tensor values.
# Object with schema name: GoogleCloudAiplatformV1Tensor
],
"shape": [ # Shape of the tensor.
"A String",
],
"stringVal": [ # STRING
"A String",
],
"structVal": { # A map of string to tensor.
"a_key": # Object with schema name: GoogleCloudAiplatformV1Tensor
},
"tensorVal": "A String", # Serialized raw tensor content.
"uint64Val": [ # UINT64
"A String",
],
"uintVal": [ # UINT8 UINT16 UINT32
42,
],
},
],
"parameters": { # A tensor value type. # The parameters that govern the prediction.
"boolVal": [ # Type specific representations that make it easy to create tensor protos in all languages. Only the representation corresponding to "dtype" can be set. The values hold the flattened representation of the tensor in row major order. BOOL
True or False,
],
"bytesVal": [ # STRING
"A String",
],
"doubleVal": [ # DOUBLE
3.14,
],
"dtype": "A String", # The data type of tensor.
"floatVal": [ # FLOAT
3.14,
],
"int64Val": [ # INT64
"A String",
],
"intVal": [ # INT_8 INT_16 INT_32
42,
],
"listVal": [ # A list of tensor values.
# Object with schema name: GoogleCloudAiplatformV1Tensor
],
"shape": [ # Shape of the tensor.
"A String",
],
"stringVal": [ # STRING
"A String",
],
"structVal": { # A map of string to tensor.
"a_key": # Object with schema name: GoogleCloudAiplatformV1Tensor
},
"tensorVal": "A String", # Serialized raw tensor content.
"uint64Val": [ # UINT64
"A String",
],
"uintVal": [ # UINT8 UINT16 UINT32
42,
],
},
}</pre>
</div>
<div class="method">
<code class="details" id="directRawPredict">directRawPredict(endpoint, body=None, x__xgafv=None)</code>
<pre>Perform an unary online prediction request to a gRPC model server for custom containers.
Args:
endpoint: string, Required. The name of the Endpoint requested to serve the prediction. Format: `projects/{project}/locations/{location}/endpoints/{endpoint}` (required)
body: object, The request body.
The object takes the form of:
{ # Request message for PredictionService.DirectRawPredict.
"input": "A String", # The prediction input.
"methodName": "A String", # Fully qualified name of the API method being invoked to perform predictions. Format: `/namespace.Service/Method/` Example: `/tensorflow.serving.PredictionService/Predict`
}
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
Returns:
An object of the form:
{ # Response message for PredictionService.DirectRawPredict.
"output": "A String", # The prediction output.
}</pre>
</div>
<div class="method">
<code class="details" id="explain">explain(endpoint, body=None, x__xgafv=None)</code>
<pre>Perform an online explanation. If deployed_model_id is specified, the corresponding DeployModel must have explanation_spec populated. If deployed_model_id is not specified, all DeployedModels must have explanation_spec populated.
Args:
endpoint: string, Required. The name of the Endpoint requested to serve the explanation. Format: `projects/{project}/locations/{location}/endpoints/{endpoint}` (required)
body: object, The request body.
The object takes the form of:
{ # Request message for PredictionService.Explain.
"deployedModelId": "A String", # If specified, this ExplainRequest will be served by the chosen DeployedModel, overriding Endpoint.traffic_split.
"explanationSpecOverride": { # The ExplanationSpec entries that can be overridden at online explanation time. # If specified, overrides the explanation_spec of the DeployedModel. Can be used for explaining prediction results with different configurations, such as: - Explaining top-5 predictions results as opposed to top-1; - Increasing path count or step count of the attribution methods to reduce approximate errors; - Using different baselines for explaining the prediction results.
"examplesOverride": { # Overrides for example-based explanations. # The example-based explanations parameter overrides.
"crowdingCount": 42, # The number of neighbors to return that have the same crowding tag.
"dataFormat": "A String", # The format of the data being provided with each call.
"neighborCount": 42, # The number of neighbors to return.
"restrictions": [ # Restrict the resulting nearest neighbors to respect these constraints.
{ # Restrictions namespace for example-based explanations overrides.
"allow": [ # The list of allowed tags.
"A String",
],
"deny": [ # The list of deny tags.
"A String",
],
"namespaceName": "A String", # The namespace name.
},
],
"returnEmbeddings": True or False, # If true, return the embeddings instead of neighbors.
},
"metadata": { # The ExplanationMetadata entries that can be overridden at online explanation time. # The metadata to be overridden. If not specified, no metadata is overridden.
"inputs": { # Required. Overrides the input metadata of the features. The key is the name of the feature to be overridden. The keys specified here must exist in the input metadata to be overridden. If a feature is not specified here, the corresponding feature's input metadata is not overridden.
"a_key": { # The input metadata entries to be overridden.
"inputBaselines": [ # Baseline inputs for this feature. This overrides the `input_baseline` field of the ExplanationMetadata.InputMetadata object of the corresponding feature's input metadata. If it's not specified, the original baselines are not overridden.
"",
],
},
},
},
"parameters": { # Parameters to configure explaining for Model's predictions. # The parameters to be overridden. Note that the attribution method cannot be changed. If not specified, no parameter is overridden.
"examples": { # Example-based explainability that returns the nearest neighbors from the provided dataset. # Example-based explanations that returns the nearest neighbors from the provided dataset.
"exampleGcsSource": { # The Cloud Storage input instances. # The Cloud Storage input instances.
"dataFormat": "A String", # The format in which instances are given, if not specified, assume it's JSONL format. Currently only JSONL format is supported.
"gcsSource": { # The Google Cloud Storage location for the input content. # The Cloud Storage location for the input instances.
"uris": [ # Required. Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.
"A String",
],
},
},
"nearestNeighborSearchConfig": "", # The full configuration for the generated index, the semantics are the same as metadata and should match [NearestNeighborSearchConfig](https://cloud.google.com/vertex-ai/docs/explainable-ai/configuring-explanations-example-based#nearest-neighbor-search-config).
"neighborCount": 42, # The number of neighbors to return when querying for examples.
"presets": { # Preset configuration for example-based explanations # Simplified preset configuration, which automatically sets configuration values based on the desired query speed-precision trade-off and modality.
"modality": "A String", # The modality of the uploaded model, which automatically configures the distance measurement and feature normalization for the underlying example index and queries. If your model does not precisely fit one of these types, it is okay to choose the closest type.
"query": "A String", # Preset option controlling parameters for speed-precision trade-off when querying for examples. If omitted, defaults to `PRECISE`.
},
},
"integratedGradientsAttribution": { # An attribution method that computes the Aumann-Shapley value taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1703.01365 # An attribution method that computes Aumann-Shapley values taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1703.01365
"blurBaselineConfig": { # Config for blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383 # Config for IG with blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383
"maxBlurSigma": 3.14, # The standard deviation of the blur kernel for the blurred baseline. The same blurring parameter is used for both the height and the width dimension. If not set, the method defaults to the zero (i.e. black for images) baseline.
},
"smoothGradConfig": { # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf
"featureNoiseSigma": { # Noise sigma by features. Noise sigma represents the standard deviation of the gaussian kernel that will be used to add noise to interpolated inputs prior to computing gradients. # This is similar to noise_sigma, but provides additional flexibility. A separate noise sigma can be provided for each feature, which is useful if their distributions are different. No noise is added to features that are not set. If this field is unset, noise_sigma will be used for all features.
"noiseSigma": [ # Noise sigma per feature. No noise is added to features that are not set.
{ # Noise sigma for a single feature.
"name": "A String", # The name of the input feature for which noise sigma is provided. The features are defined in explanation metadata inputs.
"sigma": 3.14, # This represents the standard deviation of the Gaussian kernel that will be used to add noise to the feature prior to computing gradients. Similar to noise_sigma but represents the noise added to the current feature. Defaults to 0.1.
},
],
},
"noiseSigma": 3.14, # This is a single float value and will be used to add noise to all the features. Use this field when all features are normalized to have the same distribution: scale to range [0, 1], [-1, 1] or z-scoring, where features are normalized to have 0-mean and 1-variance. Learn more about [normalization](https://developers.google.com/machine-learning/data-prep/transform/normalization). For best results the recommended value is about 10% - 20% of the standard deviation of the input feature. Refer to section 3.2 of the SmoothGrad paper: https://arxiv.org/pdf/1706.03825.pdf. Defaults to 0.1. If the distribution is different per feature, set feature_noise_sigma instead for each feature.
"noisySampleCount": 42, # The number of gradient samples to use for approximation. The higher this number, the more accurate the gradient is, but the runtime complexity increases by this factor as well. Valid range of its value is [1, 50]. Defaults to 3.
},
"stepCount": 42, # Required. The number of steps for approximating the path integral. A good value to start is 50 and gradually increase until the sum to diff property is within the desired error range. Valid range of its value is [1, 100], inclusively.
},
"outputIndices": [ # If populated, only returns attributions that have output_index contained in output_indices. It must be an ndarray of integers, with the same shape of the output it's explaining. If not populated, returns attributions for top_k indices of outputs. If neither top_k nor output_indices is populated, returns the argmax index of the outputs. Only applicable to Models that predict multiple outputs (e,g, multi-class Models that predict multiple classes).
"",
],
"sampledShapleyAttribution": { # An attribution method that approximates Shapley values for features that contribute to the label being predicted. A sampling strategy is used to approximate the value rather than considering all subsets of features. # An attribution method that approximates Shapley values for features that contribute to the label being predicted. A sampling strategy is used to approximate the value rather than considering all subsets of features. Refer to this paper for model details: https://arxiv.org/abs/1306.4265.
"pathCount": 42, # Required. The number of feature permutations to consider when approximating the Shapley values. Valid range of its value is [1, 50], inclusively.
},
"topK": 42, # If populated, returns attributions for top K indices of outputs (defaults to 1). Only applies to Models that predicts more than one outputs (e,g, multi-class Models). When set to -1, returns explanations for all outputs.
"xraiAttribution": { # An explanation method that redistributes Integrated Gradients attributions to segmented regions, taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1906.02825 Supported only by image Models. # An attribution method that redistributes Integrated Gradients attribution to segmented regions, taking advantage of the model's fully differentiable structure. Refer to this paper for more details: https://arxiv.org/abs/1906.02825 XRAI currently performs better on natural images, like a picture of a house or an animal. If the images are taken in artificial environments, like a lab or manufacturing line, or from diagnostic equipment, like x-rays or quality-control cameras, use Integrated Gradients instead.
"blurBaselineConfig": { # Config for blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383 # Config for XRAI with blur baseline. When enabled, a linear path from the maximally blurred image to the input image is created. Using a blurred baseline instead of zero (black image) is motivated by the BlurIG approach explained here: https://arxiv.org/abs/2004.03383
"maxBlurSigma": 3.14, # The standard deviation of the blur kernel for the blurred baseline. The same blurring parameter is used for both the height and the width dimension. If not set, the method defaults to the zero (i.e. black for images) baseline.
},
"smoothGradConfig": { # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf # Config for SmoothGrad approximation of gradients. When enabled, the gradients are approximated by averaging the gradients from noisy samples in the vicinity of the inputs. Adding noise can help improve the computed gradients. Refer to this paper for more details: https://arxiv.org/pdf/1706.03825.pdf
"featureNoiseSigma": { # Noise sigma by features. Noise sigma represents the standard deviation of the gaussian kernel that will be used to add noise to interpolated inputs prior to computing gradients. # This is similar to noise_sigma, but provides additional flexibility. A separate noise sigma can be provided for each feature, which is useful if their distributions are different. No noise is added to features that are not set. If this field is unset, noise_sigma will be used for all features.
"noiseSigma": [ # Noise sigma per feature. No noise is added to features that are not set.
{ # Noise sigma for a single feature.
"name": "A String", # The name of the input feature for which noise sigma is provided. The features are defined in explanation metadata inputs.
"sigma": 3.14, # This represents the standard deviation of the Gaussian kernel that will be used to add noise to the feature prior to computing gradients. Similar to noise_sigma but represents the noise added to the current feature. Defaults to 0.1.
},
],
},
"noiseSigma": 3.14, # This is a single float value and will be used to add noise to all the features. Use this field when all features are normalized to have the same distribution: scale to range [0, 1], [-1, 1] or z-scoring, where features are normalized to have 0-mean and 1-variance. Learn more about [normalization](https://developers.google.com/machine-learning/data-prep/transform/normalization). For best results the recommended value is about 10% - 20% of the standard deviation of the input feature. Refer to section 3.2 of the SmoothGrad paper: https://arxiv.org/pdf/1706.03825.pdf. Defaults to 0.1. If the distribution is different per feature, set feature_noise_sigma instead for each feature.