Python Examples of tensorflow_serving.apis.predict

Source File: serving_utils.py From tensor2tensor with Apache License 2.0

6 votes

def make_grpc_request_fn(servable_name, server, timeout_secs):
  """Wraps function to make grpc requests with runtime args."""
  stub = _create_stub(server)

  def _make_grpc_request(examples):
    """Builds and sends request to TensorFlow model server."""
    request = predict_pb2.PredictRequest()
    request.model_spec.name = servable_name
    request.inputs["input"].CopyFrom(
        tf.make_tensor_proto(
            [ex.SerializeToString() for ex in examples], shape=[len(examples)]))
    response = stub.Predict(request, timeout_secs)
    outputs = tf.make_ndarray(response.outputs["outputs"])
    scores = tf.make_ndarray(response.outputs["scores"])
    assert len(outputs) == len(scores)
    return [{  # pylint: disable=g-complex-comprehension
        "outputs": output,
        "scores": score
    } for output, score in zip(outputs, scores)]

  return _make_grpc_request

Source File: grpc.py From jetson with MIT License

6 votes

def gRPCPredict(request: model.Request):
    start = datetime.datetime.now()
    stub = prediction_service_pb2_grpc.PredictionServiceStub(
        grpc.insecure_channel(f"{SERVING_HOST}:{SERVING_GRPC_PORT}")
    )
    predictRequest = predict_pb2.PredictRequest()
    predictRequest.model_spec.name = model_name
    predictRequest.inputs['x'].CopyFrom(
        make_tensor_proto(
            request.instances,
            shape = [len(request.instances), 1]
        )
    )
    predictResult = stub.Predict(predictRequest, PREDICT_TIMEOUT)
    return {
        'predictions': list(predictResult.outputs['y'].float_val),
        'meta': {
            'model_name': model_name,
            'duration': util.millis_interval(start,datetime.datetime.now()),
            'timestamp': datetime.datetime.now().timestamp(),
            'jetson_model': jetson_model
        }
    }

Source File: serving_grpc_client.py From deep_learning with MIT License

6 votes

def grpc_predict_raw(data):
    port = 8500
    channel = grpc.insecure_channel('{host}:{port}'.format(host=host, port=port))
    # channel = implementations.insecure_channel(host, int(port))

    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'textcnn_model'
    request.model_spec.signature_name = "serving_default"

    tensor_protos = {
        # 一条一条的请求方式
        'sentence':tf.make_tensor_proto(data['sentence'], dtype=tf.int64, shape=[1, 55])
    }
    for k in tensor_protos:
        request.inputs[k].CopyFrom(tensor_protos[k])

    response = stub.Predict(request, 5.0)
    print(response)

Source File: chicago_taxi_client.py From code-snippets with Apache License 2.0

6 votes

def _do_local_inference(host, port, serialized_examples, model_name):
  """Performs inference on a model hosted by the host:port server."""

  channel = implementations.insecure_channel(host, int(port))
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

  request = predict_pb2.PredictRequest()
  # request.model_spec.name = 'chicago_taxi'
  request.model_spec.name = model_name
  request.model_spec.signature_name = 'predict'

  tfproto = tf.contrib.util.make_tensor_proto([serialized_examples],
                                              shape=[len(serialized_examples)],
                                              dtype=tf.string)
  # The name of the input tensor is 'examples' based on
  # https://github.com/tensorflow/tensorflow/blob/r1.9/tensorflow/python/estimator/export/export.py#L290
  request.inputs['examples'].CopyFrom(tfproto)
  print(stub.Predict(request, _LOCAL_INFERENCE_TIMEOUT_SECONDS))

Source File: predict_client.py From tensorflow_template_application with Apache License 2.0

6 votes

def main():
  # Generate inference data
  keys = numpy.asarray([1, 2, 3, 4])
  keys_tensor_proto = tf.contrib.util.make_tensor_proto(keys, dtype=tf.int32)
  features = numpy.asarray(
      [[1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 1, 1, 1, 1, 1, 1, 1, 1],
       [9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 9, 9, 9, 9, 9, 9, 9, 9]])
  features_tensor_proto = tf.contrib.util.make_tensor_proto(
      features, dtype=tf.float32)

  # Create gRPC client
  channel = implementations.insecure_channel(FLAGS.host, FLAGS.port)
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = FLAGS.model_name
  if FLAGS.model_version > 0:
    request.model_spec.version.value = FLAGS.model_version
  if FLAGS.signature_name != "":
    request.model_spec.signature_name = FLAGS.signature_name
  request.inputs["keys"].CopyFrom(keys_tensor_proto)
  request.inputs["features"].CopyFrom(features_tensor_proto)

  # Send request
  result = stub.Predict(request, FLAGS.request_timeout)
  print(result)

Source File: request_builder_test.py From tfx with Apache License 2.0

6 votes

def testBuildRequests_EstimatorModel_Predict(self):
    builder = request_builder._TFServingRpcRequestBuilder(
        model_name='foo',
        signatures=self._GetEstimatorModelSignature(
            signature_names=['predict']))
    builder.ReadExamplesArtifact(self._examples, num_examples=1)

    result = builder.BuildRequests()

    self.assertEqual(len(result), 1)
    self.assertIsInstance(result[0], predict_pb2.PredictRequest)
    self.assertEqual(result[0].model_spec.name, 'foo')
    self.assertEqual(result[0].model_spec.signature_name, 'predict')
    self.assertEqual(len(result[0].inputs), 1)
    input_key = list(result[0].inputs.keys())[0]
    self.assertEqual(result[0].inputs[input_key].dtype,
                     tf.dtypes.string.as_datatype_enum)

Source File: grpc_client_utils.py From inference-model-manager with Apache License 2.0

6 votes

def prepare_stub_and_request(address, model_name, model_version=None, creds=None, opts=None,
                             request_type=INFERENCE_REQUEST):
    if opts is not None:
        opts = (('grpc.ssl_target_name_override', opts),)
    if creds is not None:
        channel = grpc.secure_channel(address, creds, options=opts)
    else:
        channel = grpc.insecure_channel(address, options=opts)
    request = None
    stub = None
    if request_type == MODEL_STATUS_REQUEST:
        request = get_model_status_pb2.GetModelStatusRequest()
        stub = model_service_pb2_grpc.ModelServiceStub(channel)
    elif request_type == INFERENCE_REQUEST:
        stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
        request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    if model_version is not None:
        request.model_spec.version.value = model_version
    return stub, request

Source File: serving_utils.py From fine-lm with MIT License

6 votes

def make_grpc_request_fn(servable_name, server, timeout_secs):
  """Wraps function to make grpc requests with runtime args."""
  stub = _create_stub(server)

  def _make_grpc_request(examples):
    """Builds and sends request to TensorFlow model server."""
    request = predict_pb2.PredictRequest()
    request.model_spec.name = servable_name
    request.inputs["input"].CopyFrom(
        tf.contrib.util.make_tensor_proto(
            [ex.SerializeToString() for ex in examples], shape=[len(examples)]))
    response = stub.Predict(request, timeout_secs)
    outputs = tf.make_ndarray(response.outputs["outputs"])
    scores = tf.make_ndarray(response.outputs["scores"])
    assert len(outputs) == len(scores)
    return [{
        "outputs": outputs[i],
        "scores": scores[i]
    } for i in range(len(outputs))]

  return _make_grpc_request

Source File: serving_utils.py From BERT with Apache License 2.0

6 votes

def make_grpc_request_fn(servable_name, server, timeout_secs):
  """Wraps function to make grpc requests with runtime args."""
  stub = _create_stub(server)

  def _make_grpc_request(examples):
    """Builds and sends request to TensorFlow model server."""
    request = predict_pb2.PredictRequest()
    request.model_spec.name = servable_name
    request.inputs["input"].CopyFrom(
        tf.make_tensor_proto(
            [ex.SerializeToString() for ex in examples], shape=[len(examples)]))
    response = stub.Predict(request, timeout_secs)
    outputs = tf.make_ndarray(response.outputs["outputs"])
    scores = tf.make_ndarray(response.outputs["scores"])
    assert len(outputs) == len(scores)
    return [{  # pylint: disable=g-complex-comprehension
        "outputs": output,
        "scores": score
    } for output, score in zip(outputs, scores)]

  return _make_grpc_request

Source File: serving_utils.py From training_results_v0.5 with Apache License 2.0

6 votes

def make_grpc_request_fn(servable_name, server, timeout_secs):
  """Wraps function to make grpc requests with runtime args."""
  stub = _create_stub(server)

  def _make_grpc_request(examples):
    """Builds and sends request to TensorFlow model server."""
    request = predict_pb2.PredictRequest()
    request.model_spec.name = servable_name
    request.inputs["input"].CopyFrom(
        tf.contrib.util.make_tensor_proto(
            [ex.SerializeToString() for ex in examples], shape=[len(examples)]))
    response = stub.Predict(request, timeout_secs)
    outputs = tf.make_ndarray(response.outputs["outputs"])
    scores = tf.make_ndarray(response.outputs["scores"])
    assert len(outputs) == len(scores)
    return [{
        "outputs": outputs[i],
        "scores": scores[i]
    } for i in range(len(outputs))]

  return _make_grpc_request

Source File: ende_client.py From OpenNMT-tf with MIT License

6 votes

def send_request(stub, model_name, batch_tokens, timeout=5.0):
  """Sends a translation request.

  Args:
    stub: The prediction service stub.
    model_name: The model to request.
    tokens: A list of tokens.
    timeout: Timeout after this many seconds.

  Returns:
    A future.
  """
  batch_tokens, lengths, max_length = pad_batch(batch_tokens)
  batch_size = len(lengths)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name
  request.inputs["tokens"].CopyFrom(tf.make_tensor_proto(
      batch_tokens, dtype=tf.string, shape=(batch_size, max_length)))
  request.inputs["length"].CopyFrom(tf.make_tensor_proto(
      lengths, dtype=tf.int32, shape=(batch_size,)))
  return stub.Predict.future(request, timeout)

Source File: tfs_sample_client.py From image-quality-assessment with Apache License 2.0

6 votes

def get_image_quality_predictions(image_path, model_name):
    # Load and preprocess image
    image = utils.load_image(image_path, target_size=(224, 224))
    image = keras.applications.mobilenet.preprocess_input(image)

    # Run through model
    target = f'{TFS_HOST}:{TFS_PORT}'
    channel = grpc.insecure_channel(target)
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.signature_name = 'image_quality'

    request.inputs['input_image'].CopyFrom(
        tf.contrib.util.make_tensor_proto(np.expand_dims(image, 0))
    )

    response = stub.Predict(request, 10.0)
    result = round(calc_mean_score(response.outputs['quality_prediction'].float_val), 2)

    print(json.dumps({'mean_score_prediction': np.round(result, 3)}, indent=2))

Source File: request_builder_test.py From tfx with Apache License 2.0

5 votes

def testBuildRequests_KerasModel(self):
    builder = request_builder._TFServingRpcRequestBuilder(
        model_name='foo',
        signatures=self._GetKerasModelSignature())
    builder.ReadExamplesArtifact(self._examples, num_examples=1)

    result = builder.BuildRequests()

    self.assertEqual(len(result), 1)
    self.assertIsInstance(result[0], predict_pb2.PredictRequest)
    self.assertEqual(result[0].model_spec.name, 'foo')
    self.assertEqual(result[0].model_spec.signature_name, 'serving_default')

Source File: tensorflow_serving_client_workload.py From PerfKitBenchmarker with Apache License 2.0

5 votes

def classify_random_image(self):
    """Chooses a random image and sends a prediction request to the server.

    If a response is receieved before the requests timesout, its latency is
    saved, and the request is counted as successful. If the request timesout
    or otherwise errors, its latency is discarded, and it is counted as a
    failed request.
    """
    image = self.get_random_image()
    with open(image, 'rb') as f:
      data = f.read()
      request = predict_pb2.PredictRequest()
      request.model_spec.name = MODEL_NAME
      request.model_spec.signature_name = 'serving_default'
      request.inputs['image_bytes'].CopyFrom(
          tf.make_tensor_proto(data, shape=[1]))

      try:
        start_time = time.time()
        self.stub.Predict(request, FLAGS.rpc_timeout)
        end_time = time.time()
        with self.thread_lock:
          self.num_completed_requests += 1
          self.latencies.append(end_time - start_time)

      except ExpirationError:
        with self.thread_lock:
          self.num_failed_requests += 1

Source File: mnist_client.py From kubeflow-introduction with Apache License 2.0

5 votes

def get_prediction(image, server_host='127.0.0.1', server_port=9000,
                   server_name="server", timeout=10.0):
    """
    Retrieve a prediction from a TensorFlow model server

    :param image:       a MNIST image represented as a 1x784 array
    :param server_host: the address of the TensorFlow server
    :param server_port: the port used by the server
    :param server_name: the name of the server
    :param timeout:     the amount of time to wait for a prediction to complete
    :return 0:          the integer predicted in the MNIST image
    :return 1:          the confidence scores for all classes
    :return 2:          the version number of the model handling the request
    """

    print("connecting to:%s:%i" % (server_host, server_port))
    # initialize to server connection
    channel = implementations.insecure_channel(server_host, server_port)
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    # build request
    request = predict_pb2.PredictRequest()
    request.model_spec.name = server_name
    request.model_spec.signature_name = 'predict_images'
    request.inputs['images'].CopyFrom(
        tf.contrib.util.make_tensor_proto(image, shape=image.shape))
 
    # retrieve results
    result = stub.Predict(request, timeout)
    resultVal = result.outputs['prediction'].int64_val
    scores = result.outputs['scores'].float_val
    version = result.outputs['model-version'].string_val
    return resultVal[0], scores, version[0]

Source File: inception_client.py From models with Apache License 2.0

5 votes

def main(_):
  host, port = FLAGS.server.split(':')
  channel = implementations.insecure_channel(host, int(port))
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  # Send request
  with open(FLAGS.image, 'rb') as f:
    # See prediction_service.proto for gRPC request/response details.
    data = f.read()
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'inception'
    request.model_spec.signature_name = 'predict_images'
    request.inputs['images'].CopyFrom(
        tf.contrib.util.make_tensor_proto(data, shape=[1]))
    result = stub.Predict(request, 10.0)  # 10 secs timeout
    print(result)

Source File: test_grpc_serving.py From BERT with Apache License 2.0

5 votes

def prepare_grpc_request(model_name, signature_name, input_dict):
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.signature_name = signature_name
    for key in input_dict:
        request.inputs[key].CopyFrom(
            make_tensor_proto([input_dict[key]]))
    return request

Source File: predict.py From -Learn-Artificial-Intelligence-with-TensorFlow with MIT License

5 votes

def get_predict_request(x):
    model_spec = model_pb2.ModelSpec(name='default', signature_name='export_outputs')
    request = predict_pb2.PredictRequest(model_spec=model_spec)
    request.inputs['x'].CopyFrom(
        tf.contrib.util.make_tensor_proto(x, shape=x.shape))
    return request

Source File: predict.py From -Learn-Artificial-Intelligence-with-TensorFlow with MIT License

5 votes

def predict_category(stub, X):
    # Wrap X inside a valid PredictRequest.
    predict_request = get_predict_request(X)
    # Call TensorFlow model server's Predict API, which returns a PredictResponse.
    predict_response = stub.Predict(predict_request, timeout=20.0)
    # Extract the predicted category from the PredictResponse object.
    prediction_category = get_predicted_category(predict_response)
    return prediction_category

Source File: client.py From Machine-Learning-with-TensorFlow-1.x with MIT License

5 votes

def process_image(path, label_data, top_k=3):
    start_time = datetime.now()
    img = imread(path)

    host, port = "0.0.0.0:9000".split(":")
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    request = predict_pb2.PredictRequest()
    request.model_spec.name = "pet-model"
    request.model_spec.signature_name = "predict_images"

    request.inputs["images"].CopyFrom(
        tf.contrib.util.make_tensor_proto(
            img.astype(dtype=float),
            shape=img.shape, dtype=tf.float32
        )
    )

    result = stub.Predict(request, 20.)
    scores = tf.contrib.util.make_ndarray(result.outputs["scores"])[0]
    probs = softmax(scores)
    index = sorted(range(len(probs)), key=lambda x: probs[x], reverse=True)

    outputs = []
    for i in range(top_k):
        outputs.append(Output(score=float(probs[index[i]]), label=label_data[index[i]]))

    print(outputs)
    print("total time", (datetime.now() - start_time).total_seconds())
    return outputs

Source File: yolo.py From mobilenetv2-yolov3 with MIT License

5 votes

def export_serving_model(yolo, path, warmup_path=None,with_tensorrt=False):
    overwrite_path(path)
    tf.saved_model.save(yolo.yolo_model, path)
    if with_tensorrt:
        params=trt.TrtConversionParams(
            rewriter_config_template=None,
            max_workspace_size_bytes=trt.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
            precision_mode=trt.TrtPrecisionMode.FP16,
            minimum_segment_size=3,
            is_dynamic_op=True,
            maximum_cached_engines=1,
            use_calibration=True,
            max_batch_size=1)
        converter = trt.TrtGraphConverterV2(input_saved_model_dir=path,conversion_params=params)
        converter.convert()
        tf.io.gfile.rmtree(path)
        converter.save(path)
    asset_extra = os.path.join(path, "assets.extra")
    tf.io.gfile.mkdir(asset_extra)
    with tf.io.TFRecordWriter(
            os.path.join(asset_extra, "tf_serving_warmup_requests")) as writer:
        request = predict_pb2.PredictRequest()
        request.model_spec.name = 'detection'
        request.model_spec.signature_name = 'serving_default'
        if warmup_path is None:
            warmup_path = input('Please enter warm up image path:')
        image = open(warmup_path, 'rb').read()
        image_data = np.expand_dims(image, 0)
        request.inputs['predict_image'].CopyFrom(
            tf.compat.v1.make_tensor_proto(image_data))
        log = prediction_log_pb2.PredictionLog(
            predict_log=prediction_log_pb2.PredictLog(request=request))
        writer.write(log.SerializeToString())

Source File: crnn_python_client_via_grpc.py From CRNN_Tensorflow with MIT License

5 votes

def make_request(image_path, server):
    """

    :param image_path:
    :param server:
    :return:
    """
    channel = grpc.insecure_channel(server)
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (CFG.ARCH.INPUT_SIZE[0], CFG.ARCH.INPUT_SIZE[1]), interpolation=cv2.INTER_LINEAR)
    image = np.array(image, np.float32) / 127.5 - 1.0

    image_list = np.array([image], dtype=np.float32)

    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'crnn'
    request.model_spec.signature_name = sm.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY

    request.inputs['input_tensor'].CopyFrom(make_tensor_proto(
        image_list, shape=[1, CFG.ARCH.INPUT_SIZE[1], CFG.ARCH.INPUT_SIZE[0], 3]))

    try:
        result = stub.Predict(request, 10.0)

        return result
    except Exception as err:
        print(err)
        return None

Source File: client.py From voice-vector with MIT License

5 votes

def do_inference(num_tests, concurrency=1):
  channel = implementations.insecure_channel(host, int(port))
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

  coord = _Coordinator(num_tests, concurrency)

  for _ in range(num_tests):
    # dummy audio
    duration, sr, n_fft, win_length, hop_length, n_mels, max_db, min_db = 4, 16000, 512, 512, 128, 80, 35, -55
    filename = librosa.util.example_audio_file()
    wav = read_wav(filename, sr=sr, duration=duration)
    mel = wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels)
    mel = normalize_db(mel, max_db=max_db, min_db=min_db)
    mel = mel.astype(np.float32)
    mel = np.expand_dims(mel, axis=0)  # single batch
    n_timesteps = sr / hop_length * duration + 1

    # build request
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'voice_vector'
    request.model_spec.signature_name = 'predict'
    request.inputs['x'].CopyFrom(tf.contrib.util.make_tensor_proto(mel, shape=[1, n_timesteps, n_mels]))

    coord.throttle()

    # send asynchronous response (recommended. use this.)
    result_future = stub.Predict.future(request, 10.0)  # timeout
    result_future.add_done_callback(_create_rpc_callback(coord))

    # send synchronous response (NOT recommended)
    # result = stub.Predict(request, 5.0)

  coord.wait_all_done()

Source File: request_builder.py From tfx with Apache License 2.0

5 votes

def _BuildPredictRequests(self, signature_name: Text,
                            serialized_input_key: Text):
    for record in self._records:
      request = predict_pb2.PredictRequest()
      request.model_spec.name = self._model_name
      request.model_spec.signature_name = signature_name
      request.inputs[serialized_input_key].CopyFrom(
          tf.make_tensor_proto([record]))
      yield request

Source File: request_builder.py From tfx with Apache License 2.0

5 votes

def _GetSerializedInputKey(self, signature_def: _SignatureDef):
    """Gets key for SignatureDef input that consumes serialized record.

    To build a PredictRequest, SignatureDef inputs should have a single input
    argument that accepts serialized record inputs. The input TensorSpec should
    have dtype=DT_STRING and shape=TensorShape([None]).

    Args:
      signature_def: A SignatureDef proto message.

    Returns:
      An input key for the serialized input.
    """
    signature_input_keys = list(signature_def.inputs.keys())
    if len(signature_input_keys) == 1:
      input_key = signature_input_keys[0]
      input_spec = signature_def.inputs[input_key]
      if (input_spec.dtype == tf.dtypes.string.as_datatype_enum
          and input_spec.tensor_shape == tf.TensorShape([None]).as_proto()):
        return input_key
    # TODO(b/151697719): General Predict method signature support.
    raise ValueError(
        'Unable to find valid input key from SignatureDef. In order to make '
        'PredictRequest, model should define signature that accepts serialized '
        'record inputs, i.e. signature with single input whose dtype=DT_STRING '
        'and shape=TensorShape([None]).')

Source File: tensorflow_serving_client.py From tfx with Apache License 2.0

5 votes

def _SendRequest(self, request: types.Request) -> None:
    if isinstance(request, classification_pb2.ClassificationRequest):
      self._prediction_service.Classify(request)
    elif isinstance(request, regression_pb2.RegressionRequest):
      self._prediction_service.Regress(request)
    elif isinstance(request, predict_pb2.PredictRequest):
      self._prediction_service.Predict(request)
    else:
      raise NotImplementedError('Unsupported request type {}'.format(
          type(request).__name__))

Source File: tfserver.py From tf_classification with MIT License

5 votes

def predict(image_data,
            model_name='inception',
            host='localhost',
            port=9000,
            timeout=10):
  """
  Arguments:
    image_data (list): A list of image data. The image data should either be the image bytes or
      float arrays.
    model_name (str): The name of the model to query (specified when you started the Server)
    model_signature_name (str): The name of the signature to query (specified when you created the exported model)
    host (str): The machine host identifier that the classifier is running on.
    port (int): The port that the classifier is listening on.
    timeout (int): Time in seconds before timing out.

  Returns:
    PredictResponse protocol buffer. See here: https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/predict.proto
  """

  if len(image_data) <= 0:
    return None

  channel = implementations.insecure_channel(host, int(port))
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
  request = predict_pb2.PredictRequest()
  request.model_spec.name = model_name

  if type(image_data[0]) == str:
    request.model_spec.signature_name = 'predict_image_bytes'
    request.inputs['images'].CopyFrom(
        tf.contrib.util.make_tensor_proto(image_data, shape=[len(image_data)]))
  else:
    request.model_spec.signature_name = 'predict_image_array'
    request.inputs['images'].CopyFrom(
        tf.contrib.util.make_tensor_proto(image_data, shape=[len(image_data), len(image_data[1])]))

  result = stub.Predict(request, timeout)
  return result

Source File: abstract_export_generator.py From tensor2robot with Apache License 2.0

5 votes

def create_warmup_requests_numpy(self, batch_sizes,
                                   export_dir):
    """Creates warm-up requests for a given feature specification.

    This writes an output file in
    `export_dir/assets.extra/tf_serving_warmup_requests` for use with Servo.

    Args:
      batch_sizes: Batch sizes of warm-up requests to write.
      export_dir: Base directory for the export.

    Returns:
      The filename written.
    """
    feature_spec = self._get_input_features_for_receiver_fn()

    flat_feature_spec = tensorspec_utils.flatten_spec_structure(feature_spec)
    tf.io.gfile.makedirs(export_dir)
    request_filename = os.path.join(export_dir, 'tf_serving_warmup_requests')
    with tf.python_io.TFRecordWriter(request_filename) as writer:
      for batch_size in batch_sizes:
        request = predict_pb2.PredictRequest()
        request.model_spec.name = self._model_name
        numpy_feature_specs = tensorspec_utils.make_constant_numpy(
            flat_feature_spec, constant_value=0, batch_size=batch_size)

        for key, numpy_spec in numpy_feature_specs.items():
          request.inputs[key].CopyFrom(
              contrib_util.make_tensor_proto(numpy_spec))

        log = prediction_log_pb2.PredictionLog(
            predict_log=prediction_log_pb2.PredictLog(request=request))
        writer.write(log.SerializeToString())
    return request_filename

Source File: client.py From wide_deep with MIT License

5 votes

def main(_):
    host, port = FLAGS.server.split(':')
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    request = predict_pb2.PredictRequest()
    request.model_spec.name = FLAGS.model
    request.model_spec.signature_name = 'serving_default'
    # feature_dict = {'age': _float_feature(value=25),
    #               'capital_gain': _float_feature(value=0),
    #               'capital_loss': _float_feature(value=0),
    #               'education': _bytes_feature(value='11th'.encode()),
    #               'education_num': _float_feature(value=7),
    #               'gender': _bytes_feature(value='Male'.encode()),
    #               'hours_per_week': _float_feature(value=40),
    #               'native_country': _bytes_feature(value='United-States'.encode()),
    #               'occupation': _bytes_feature(value='Machine-op-inspct'.encode()),
    #               'relationship': _bytes_feature(value='Own-child'.encode()),
    #               'workclass': _bytes_feature(value='Private'.encode())}
    # label = 0
    data = _read_test_input()
    feature_dict = pred_input_fn(data)

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    serialized = example.SerializeToString()

    request.inputs['inputs'].CopyFrom(
        tf.contrib.util.make_tensor_proto(serialized, shape=[1]))

    result_future = stub.Predict.future(request, 5.0)
    prediction = result_future.result().outputs['scores']

    # print('True label: ' + str(label))
    print('Prediction: ' + str(np.argmax(prediction.float_val)))

Source File: client.py From wide_deep with MIT License

5 votes

def do_inference(hostport, work_dir, concurrency, num_tests):
    """Tests PredictionService with concurrent requests.
    Args:
        hostport: Host:port address of the PredictionService.
        work_dir: The full path of working directory for test data set.
        concurrency: Maximum number of concurrent requests.
        num_tests: Number of test images to use.
    Returns:
        The classification error rate.
    Raises:
        IOError: An error occurred processing test data set.
    """
    test_data_set = mnist_input_data.read_data_sets(work_dir).test
    host, port = hostport.split(':')
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    result_counter = _ResultCounter(num_tests, concurrency)
    for _ in range(num_tests):
        request = predict_pb2.PredictRequest()
        request.model_spec.name = 'mnist'
        request.model_spec.signature_name = 'predict_images'
        image, label = test_data_set.next_batch(1)
        request.inputs['images'].CopyFrom(
            tf.contrib.util.make_tensor_proto(image[0], shape=[1, image[0].size]))
        result_counter.throttle()
        result_future = stub.Predict.future(request, 5.0)  # 5 seconds
        result_future.add_done_callback(
            _create_rpc_callback(label[0], result_counter))
    return result_counter.get_error_rate()

Python tensorflow_serving.apis.predict_pb2.PredictRequest() Examples