org.apache.http.HttpHost Scala Example

Source File: ElasticClient.scala From fs2-elastic with MIT License

5 votes

package com.alessandromarrella.fs2_elastic
import org.elasticsearch.client.{
  RestClient,
  RestClientBuilder,
  RestHighLevelClient
}
import fs2._
import cats.effect.Sync
import org.apache.http.HttpHost

object Client {

  def fromHosts[F[_]](hosts: HttpHost*)(
      implicit F: Sync[F]): Stream[F, RestHighLevelClient] =
    Stream.bracket(
      F.delay(new RestHighLevelClient(RestClient.builder(hosts: _*))))(
      c => F.delay(c.close())
    )

  def fromClientBuilder[F[_]](restClientBuilder: RestClientBuilder)(
      implicit F: Sync[F]): Stream[F, RestHighLevelClient] =
    Stream.bracket(F.delay(new RestHighLevelClient(restClientBuilder)))(
      c => F.delay(c.close())
    )
}

Source File: Http.scala From seed with Apache License 2.0

5 votes

package seed.publish.util

import java.net.URI

import org.apache.commons.io.IOUtils
import org.apache.http.{HttpHost, HttpRequest, HttpRequestInterceptor}
import org.apache.http.entity.ContentType
import seed.util.ZioHelpers._
import zio.Task
import org.apache.http.auth.AuthScope
import org.apache.http.auth.UsernamePasswordCredentials
import org.apache.http.client.protocol.HttpClientContext
import org.apache.http.impl.auth.BasicScheme
import org.apache.http.impl.client.{BasicAuthCache, BasicCredentialsProvider}
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient
import org.apache.http.impl.nio.client.HttpAsyncClients
import org.apache.http.nio.client.methods.HttpAsyncMethods
import org.apache.http.nio.protocol.HttpAsyncRequestProducer
import org.apache.http.protocol.HttpContext

class Http(httpClient: CloseableHttpAsyncClient) {
  def put(url: String, bytes: Array[Byte]): Task[String] = {
    val producer =
      HttpAsyncMethods.createPut(url, bytes, ContentType.DEFAULT_BINARY)
    send(url, producer)
  }

  def post(url: String, bytes: Array[Byte]): Task[String] = {
    val producer =
      HttpAsyncMethods.createPost(url, bytes, ContentType.DEFAULT_BINARY)
    send(url, producer)
  }

  def destroy(): Unit = httpClient.close()

  private def send(url: String, producer: HttpAsyncRequestProducer) = {
    val client = new CompletableHttpAsyncClient(httpClient)

    val uri        = URI.create(url)
    val targetHost = new HttpHost(uri.getHost, uri.getPort, uri.getScheme)
    val authCache  = new BasicAuthCache()
    authCache.put(targetHost, new BasicScheme())

    val clientContext = HttpClientContext.create()
    clientContext.setAuthCache(authCache)

    val future =
      client.execute(producer, HttpAsyncMethods.createConsumer(), clientContext)

    fromCompletableFuture(future)
      .map(r => IOUtils.toString(r.getEntity.getContent, "UTF-8"))
  }
}

class CustomRequestInterceptor(log: seed.Log) extends HttpRequestInterceptor {
  override def process(request: HttpRequest, context: HttpContext): Unit =
    log.debug("Sending HTTP request " + request + "...")
}

object Http {
  def create(log: seed.Log, authHost: String, auth: (String, String)): Http = {
    val credsProvider = new BasicCredentialsProvider()
    credsProvider.setCredentials(
      new AuthScope(authHost, 443),
      new UsernamePasswordCredentials(auth._1, auth._2)
    )

    val c = HttpAsyncClients
      .custom()
      .setDefaultCredentialsProvider(credsProvider)
      .addInterceptorFirst(new CustomRequestInterceptor(log))
      .build()
    c.start()

    new Http(c)
  }
}

Source File: HttpProxyChecker.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler.checker

import java.net.URI
import java.nio.charset.StandardCharsets

import org.apache.http.annotation.ThreadSafe
import org.apache.http.HttpHost
import org.apache.http.client.methods.HttpGet
import org.apache.http.impl.client.HttpClients
import org.apache.http.util.EntityUtils


@ThreadSafe
private[checker] object HttpProxyChecker extends AbstractProxyChecker {
  private val CLIENT  = HttpClients.custom().setMaxConnTotal(AbstractProxyChecker.MAX_CONN)
    .disableRedirectHandling().build()
  private val TARGET_URL = new URI("http://www.baidu.com")


  def check(host: String, port: Int): (Int, Int) = {
    val request = new HttpGet(TARGET_URL)
    AbstractProxyChecker.configureRequest(request, Some(new HttpHost(host, port, "http")))

    val response = CLIENT.execute(request)

    val statusCode = response.getStatusLine.getStatusCode
    val html = EntityUtils.toString(response.getEntity, StandardCharsets.UTF_8)
    if (statusCode == 200 && html.contains("<title>百度一下")) (statusCode, html.getBytes.length) else (statusCode, -1)
  }
}

Source File: AbstractProxyChecker.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler.checker

import java.io.IOException

import org.apache.http.{HttpHeaders, HttpHost}
import org.apache.http.client.config.RequestConfig
import org.apache.http.client.methods.HttpGet
import org.crowdcrawler.proxycrawler.ProxyCrawler


private[checker] trait AbstractProxyChecker {
  
  @throws(classOf[IOException])
  def check(host: String, port: Int): (Int, Int)
}


private[checker] object AbstractProxyChecker {
  val TIMEOUT = 30000  // 30000 milliseconds
  val MAX_CONN = 100000
  val REQUEST_CONFIG = RequestConfig.custom.setConnectTimeout(TIMEOUT).setSocketTimeout(TIMEOUT)
    .setRedirectsEnabled(false).setRelativeRedirectsAllowed(false).setCircularRedirectsAllowed(false)
    .build()


  def configureRequest(request: HttpGet, proxy: Option[HttpHost] = None): Unit = {
    ProxyCrawler.DEFAULT_HEADERS.foreach { case (key, value) =>
      request.setHeader(key, value)
    }
    // disable keep-alive
    request.setHeader(HttpHeaders.CONNECTION, "close")
    val requestConfig = if (proxy.isDefined) {
      RequestConfig.copy(AbstractProxyChecker.REQUEST_CONFIG).setProxy(proxy.get).build()
    } else {
      REQUEST_CONFIG
    }
    request.setConfig(requestConfig)
  }
}

Source File: HttpsProxyChecker.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler.checker

import java.net.URI
import java.nio.charset.StandardCharsets
import java.security.cert.X509Certificate

import org.apache.http.HttpHost
import org.apache.http.annotation.ThreadSafe
import org.apache.http.client.methods.HttpGet
import org.apache.http.conn.ssl.{NoopHostnameVerifier, SSLConnectionSocketFactory}
import org.apache.http.impl.client.HttpClients
import org.apache.http.ssl.{TrustStrategy, SSLContexts}
import org.apache.http.util.EntityUtils


@ThreadSafe
private[checker] object HttpsProxyChecker extends AbstractProxyChecker {
  // trust all certificates including self-signed certificates
  private[checker] val SSL_CONTEXT = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() {
    def isTrusted(chain: Array[X509Certificate], authType: String) = true
  }).build()
  private val CLIENT = {
    val connectionFactory = new SSLConnectionSocketFactory(SSL_CONTEXT, NoopHostnameVerifier.INSTANCE)
    HttpClients.custom().setSSLSocketFactory(connectionFactory).setMaxConnTotal(AbstractProxyChecker.MAX_CONN)
      .disableRedirectHandling().build()
  }
  private val TARGET_URL = new URI("https://www.google.com")


  def check(host: String, port: Int): (Int, Int) = {
    val request = new HttpGet(TARGET_URL)
    AbstractProxyChecker.configureRequest(request, Some(new HttpHost(host, port, "http")))

    val response = CLIENT.execute(request)

    val statusCode = response.getStatusLine.getStatusCode
    val html = EntityUtils.toString(response.getEntity, StandardCharsets.UTF_8)
    if (statusCode == 200 && html.contains("<title>Google</title>")) (statusCode, html.getBytes.length)
    else (statusCode, -1)
  }
}

Source File: ElasticWriteConfig.scala From elastic-indexer4s with MIT License

5 votes

package com.yannick_cw.elastic_indexer4s.elasticsearch.elasic_config

import com.sksamuel.elastic4s.http.{ElasticClient, ElasticNodeEndpoint}
import org.apache.http.HttpHost
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.sniff.Sniffer
import org.joda.time.DateTime

import scala.concurrent.duration.{FiniteDuration, _}

case class ElasticWriteConfig(
    elasticNodeEndpoints: List[ElasticNodeEndpoint],
    indexPrefix: String,
    docType: String,
    mappingSetting: MappingSetting = TypedMappingSetting(),
    writeBatchSize: Int = 50,
    writeConcurrentRequest: Int = 10,
    writeMaxAttempts: Int = 5,
    logWriteSpeedEvery: FiniteDuration = 1 minute,
    waitForElasticTimeout: FiniteDuration = 5 seconds,
    sniffCluster: Boolean = false
) {
  val indexName: String = indexPrefix + "_" + new DateTime().toString("yyyy-MM-dd't'HH:mm:ss")

  lazy val restClient: RestClient =
    RestClient
      .builder(elasticNodeEndpoints.map(e => new HttpHost(e.host, e.port, "http")): _*)
      .build()

  lazy val client: ElasticClient = {
    if (sniffCluster) {
      // sniffs every 5 minutes for the best hosts to connect to
      Sniffer.builder(restClient).build()
    }
    ElasticClient.fromRestClient(restClient)
  }
}

object ElasticWriteConfig {
  def apply(
      esNodeEndpoints: List[ElasticNodeEndpoint],
      esTargetIndexPrefix: String,
      esTargetType: String
  ): ElasticWriteConfig =
    new ElasticWriteConfig(esNodeEndpoints, esTargetIndexPrefix, esTargetType)
}

Source File: DocumentClientProvider.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.azure.documentdb

import com.datamountaineer.streamreactor.connect.azure.documentdb.config.DocumentDbSinkSettings
import com.microsoft.azure.documentdb.{ConnectionPolicy, DocumentClient}
import org.apache.http.HttpHost


object DocumentClientProvider {
  def get(settings: DocumentDbSinkSettings): DocumentClient = {
    val policy = ConnectionPolicy.GetDefault()
    settings.proxy.map(HttpHost.create).foreach(policy.setProxy)

    new DocumentClient(settings.endpoint,
      settings.masterKey,
      policy,
      settings.consistency)
  }
}

Source File: ContentRecommender.scala From albedo with MIT License

5 votes

package ws.vinta.albedo.recommenders

import org.apache.http.HttpHost
import org.apache.spark.ml.param.Param
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, Dataset}
import org.elasticsearch.action.search.SearchRequest
import org.elasticsearch.client.{RestClient, RestHighLevelClient}
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item
import org.elasticsearch.index.query.QueryBuilders._
import org.elasticsearch.search.SearchHit
import org.elasticsearch.search.builder.SearchSourceBuilder
import ws.vinta.albedo.closures.DBFunctions._

class ContentRecommender(override val uid: String) extends Recommender {

  def this() = {
    this(Identifiable.randomUID("contentRecommender"))
  }

  val enableEvaluationMode = new Param[Boolean](this, "enableEvaluationMode", "Should be enable for evaluation only")

  def getEnableEvaluationMode: Boolean = $(enableEvaluationMode)

  def setEnableEvaluationMode(value: Boolean): this.type = set(enableEvaluationMode, value)
  setDefault(enableEvaluationMode -> false)

  override def source = "content"

  override def recommendForUsers(userDF: Dataset[_]): DataFrame = {
    transformSchema(userDF.schema)

    import userDF.sparkSession.implicits._

    val userRecommendedItemDF = userDF
      .as[Int]
      .flatMap {
        case (userId) => {
          // 因為 More Like This query 用 document id 查詢時
          // 結果會過濾掉那些做為條件的 document ids
          // 但是這樣在 evaluate 的時候就不太合適了
          // 所以我們改用後 k 個 repo 當作查詢條件
          val limit = $(topK)
          val offset = if ($(enableEvaluationMode)) $(topK) else 0
          val repoIds = selectUserStarredRepos(userId, limit, offset)

          val lowClient = RestClient.builder(new HttpHost("127.0.0.1", 9200, "http")).build()
          val highClient = new RestHighLevelClient(lowClient)

          val fields = Array("description", "full_name", "language", "topics")
          val texts = Array("")
          val items = repoIds.map((itemId: Int) => new Item("repo", "repo_info_doc", itemId.toString))
          val queryBuilder = moreLikeThisQuery(fields, texts, items)
            .minTermFreq(2)
            .maxQueryTerms(50)

          val searchSourceBuilder = new SearchSourceBuilder()
          searchSourceBuilder.query(queryBuilder)
          searchSourceBuilder.size($(topK))
          searchSourceBuilder.from(0)

          val searchRequest = new SearchRequest()
          searchRequest.indices("repo")
          searchRequest.types("repo_info_doc")
          searchRequest.source(searchSourceBuilder)

          val searchResponse = highClient.search(searchRequest)
          val hits = searchResponse.getHits
          val searchHits = hits.getHits

          val userItemScoreTuples = searchHits.map((searchHit: SearchHit) => {
            val itemId = searchHit.getId.toInt
            val score = searchHit.getScore
            (userId, itemId, score)
          })

          lowClient.close()

          userItemScoreTuples
        }
      }
      .toDF($(userCol), $(itemCol), $(scoreCol))
      .withColumn($(sourceCol), lit(source))

    userRecommendedItemDF
  }
}

Source File: IndexAdmin.scala From scio with Apache License 2.0

5 votes

package com.spotify.scio.elasticsearch

import org.apache.http.HttpHost
import org.elasticsearch.client._
import org.elasticsearch.client.indices.{CreateIndexRequest, CreateIndexResponse}
import org.elasticsearch.common.xcontent.XContentType

import scala.util.Try

object IndexAdmin {
  private def indicesClient[A](esOptions: ElasticsearchOptions)(f: IndicesClient => A): Try[A] = {
    val client = new RestHighLevelClient(RestClient.builder(esOptions.nodes: _*))

    val result = Try(f(client.indices()))
    client.close()
    result
  }

  
  private def ensureIndex(
    index: String,
    mappingSource: String,
    client: IndicesClient
  ): CreateIndexResponse =
    client.create(
      new CreateIndexRequest(index).source(mappingSource, XContentType.JSON),
      RequestOptions.DEFAULT
    )
}

Source File: package.scala From scio with Apache License 2.0

5 votes

package com.spotify.scio

import com.spotify.scio.coders.Coder
import com.spotify.scio.elasticsearch.ElasticsearchIO.{RetryConfig, WriteParam}
import com.spotify.scio.io.ClosedTap
import com.spotify.scio.values.SCollection
import org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write.BulkExecutionException
import org.apache.http.HttpHost
import org.elasticsearch.action.DocWriteRequest
import org.joda.time.Duration


    def saveAsElasticsearch(
      esOptions: ElasticsearchOptions,
      flushInterval: Duration = WriteParam.DefaultFlushInterval,
      numOfShards: Long = WriteParam.DefaultNumShards,
      maxBulkRequestSize: Int = WriteParam.DefaultMaxBulkRequestSize,
      errorFn: BulkExecutionException => Unit = WriteParam.DefaultErrorFn,
      retry: RetryConfig = WriteParam.DefaultRetryConfig
    )(f: T => Iterable[DocWriteRequest[_]])(implicit coder: Coder[T]): ClosedTap[Nothing] = {
      val param = WriteParam(f, errorFn, flushInterval, numOfShards, maxBulkRequestSize, retry)
      self.write(ElasticsearchIO[T](esOptions))(param)
    }
  }
}

Source File: ElasticSearchClient.scala From pipelines-examples with Apache License 2.0

5 votes

package warez

import akka.NotUsed
import akka.kafka.ConsumerMessage.CommittableOffset
import akka.stream.alpakka.elasticsearch.{ ReadResult, WriteMessage, WriteResult }
import akka.stream.alpakka.elasticsearch.scaladsl.{ ElasticsearchFlow, ElasticsearchSource }
import akka.stream.scaladsl.Source

import org.apache.http.HttpHost
import org.elasticsearch.client.RestClient
import spray.json.{ JsObject, JsonFormat }

import pipelines.akkastream.scaladsl.FlowWithOffsetContext


  implicit val esClient: RestClient = RestClient.builder(new HttpHost(hostname, port)).build()

  def indexFlow(): FlowWithOffsetContext[WriteMessage[T, NotUsed], WriteResult[T, CommittableOffset]] =
    ElasticsearchFlow.createWithContext[T, CommittableOffset](indexName, typeName)

  def querySource(searchCriteria: String): Source[ReadResult[JsObject], NotUsed] =
    ElasticsearchSource
      .create(indexName, typeName, query = s"""{
          "bool": {
            "must": {
              "query_string": {
                "query": "$searchCriteria"
              }
            }
          }
        }""")
}

org.apache.http.HttpHost Scala Examples