1
0

clientconn.go 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398
  1. /*
  2. *
  3. * Copyright 2014 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package grpc
  19. import (
  20. "errors"
  21. "fmt"
  22. "math"
  23. "net"
  24. "reflect"
  25. "strings"
  26. "sync"
  27. "sync/atomic"
  28. "time"
  29. "golang.org/x/net/context"
  30. "google.golang.org/grpc/balancer"
  31. _ "google.golang.org/grpc/balancer/roundrobin" // To register roundrobin.
  32. "google.golang.org/grpc/codes"
  33. "google.golang.org/grpc/connectivity"
  34. "google.golang.org/grpc/credentials"
  35. "google.golang.org/grpc/grpclog"
  36. "google.golang.org/grpc/internal/backoff"
  37. "google.golang.org/grpc/internal/channelz"
  38. "google.golang.org/grpc/internal/transport"
  39. "google.golang.org/grpc/keepalive"
  40. "google.golang.org/grpc/metadata"
  41. "google.golang.org/grpc/resolver"
  42. _ "google.golang.org/grpc/resolver/dns" // To register dns resolver.
  43. _ "google.golang.org/grpc/resolver/passthrough" // To register passthrough resolver.
  44. "google.golang.org/grpc/status"
  45. )
  46. const (
  47. // minimum time to give a connection to complete
  48. minConnectTimeout = 20 * time.Second
  49. // must match grpclbName in grpclb/grpclb.go
  50. grpclbName = "grpclb"
  51. )
  52. var (
  53. // ErrClientConnClosing indicates that the operation is illegal because
  54. // the ClientConn is closing.
  55. //
  56. // Deprecated: this error should not be relied upon by users; use the status
  57. // code of Canceled instead.
  58. ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing")
  59. // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs.
  60. errConnDrain = errors.New("grpc: the connection is drained")
  61. // errConnClosing indicates that the connection is closing.
  62. errConnClosing = errors.New("grpc: the connection is closing")
  63. // errBalancerClosed indicates that the balancer is closed.
  64. errBalancerClosed = errors.New("grpc: balancer is closed")
  65. // We use an accessor so that minConnectTimeout can be
  66. // atomically read and updated while testing.
  67. getMinConnectTimeout = func() time.Duration {
  68. return minConnectTimeout
  69. }
  70. )
  71. // The following errors are returned from Dial and DialContext
  72. var (
  73. // errNoTransportSecurity indicates that there is no transport security
  74. // being set for ClientConn. Users should either set one or explicitly
  75. // call WithInsecure DialOption to disable security.
  76. errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)")
  77. // errTransportCredsAndBundle indicates that creds bundle is used together
  78. // with other individual Transport Credentials.
  79. errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials")
  80. // errTransportCredentialsMissing indicates that users want to transmit security
  81. // information (e.g., oauth2 token) which requires secure connection on an insecure
  82. // connection.
  83. errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)")
  84. // errCredentialsConflict indicates that grpc.WithTransportCredentials()
  85. // and grpc.WithInsecure() are both called for a connection.
  86. errCredentialsConflict = errors.New("grpc: transport credentials are set for an insecure connection (grpc.WithTransportCredentials() and grpc.WithInsecure() are both called)")
  87. )
  88. const (
  89. defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4
  90. defaultClientMaxSendMessageSize = math.MaxInt32
  91. // http2IOBufSize specifies the buffer size for sending frames.
  92. defaultWriteBufSize = 32 * 1024
  93. defaultReadBufSize = 32 * 1024
  94. )
  95. // Dial creates a client connection to the given target.
  96. func Dial(target string, opts ...DialOption) (*ClientConn, error) {
  97. return DialContext(context.Background(), target, opts...)
  98. }
  99. // DialContext creates a client connection to the given target. By default, it's
  100. // a non-blocking dial (the function won't wait for connections to be
  101. // established, and connecting happens in the background). To make it a blocking
  102. // dial, use WithBlock() dial option.
  103. //
  104. // In the non-blocking case, the ctx does not act against the connection. It
  105. // only controls the setup steps.
  106. //
  107. // In the blocking case, ctx can be used to cancel or expire the pending
  108. // connection. Once this function returns, the cancellation and expiration of
  109. // ctx will be noop. Users should call ClientConn.Close to terminate all the
  110. // pending operations after this function returns.
  111. //
  112. // The target name syntax is defined in
  113. // https://github.com/grpc/grpc/blob/master/doc/naming.md.
  114. // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target.
  115. func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
  116. cc := &ClientConn{
  117. target: target,
  118. csMgr: &connectivityStateManager{},
  119. conns: make(map[*addrConn]struct{}),
  120. dopts: defaultDialOptions(),
  121. blockingpicker: newPickerWrapper(),
  122. czData: new(channelzData),
  123. }
  124. cc.retryThrottler.Store((*retryThrottler)(nil))
  125. cc.ctx, cc.cancel = context.WithCancel(context.Background())
  126. for _, opt := range opts {
  127. opt.apply(&cc.dopts)
  128. }
  129. if channelz.IsOn() {
  130. if cc.dopts.channelzParentID != 0 {
  131. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target)
  132. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  133. Desc: "Channel Created",
  134. Severity: channelz.CtINFO,
  135. Parent: &channelz.TraceEventDesc{
  136. Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID),
  137. Severity: channelz.CtINFO,
  138. },
  139. })
  140. } else {
  141. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, 0, target)
  142. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  143. Desc: "Channel Created",
  144. Severity: channelz.CtINFO,
  145. })
  146. }
  147. cc.csMgr.channelzID = cc.channelzID
  148. }
  149. if !cc.dopts.insecure {
  150. if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil {
  151. return nil, errNoTransportSecurity
  152. }
  153. if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil {
  154. return nil, errTransportCredsAndBundle
  155. }
  156. } else {
  157. if cc.dopts.copts.TransportCredentials != nil || cc.dopts.copts.CredsBundle != nil {
  158. return nil, errCredentialsConflict
  159. }
  160. for _, cd := range cc.dopts.copts.PerRPCCredentials {
  161. if cd.RequireTransportSecurity() {
  162. return nil, errTransportCredentialsMissing
  163. }
  164. }
  165. }
  166. cc.mkp = cc.dopts.copts.KeepaliveParams
  167. if cc.dopts.copts.Dialer == nil {
  168. cc.dopts.copts.Dialer = newProxyDialer(
  169. func(ctx context.Context, addr string) (net.Conn, error) {
  170. network, addr := parseDialTarget(addr)
  171. return dialContext(ctx, network, addr)
  172. },
  173. )
  174. }
  175. if cc.dopts.copts.UserAgent != "" {
  176. cc.dopts.copts.UserAgent += " " + grpcUA
  177. } else {
  178. cc.dopts.copts.UserAgent = grpcUA
  179. }
  180. if cc.dopts.timeout > 0 {
  181. var cancel context.CancelFunc
  182. ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout)
  183. defer cancel()
  184. }
  185. defer func() {
  186. select {
  187. case <-ctx.Done():
  188. conn, err = nil, ctx.Err()
  189. default:
  190. }
  191. if err != nil {
  192. cc.Close()
  193. }
  194. }()
  195. scSet := false
  196. if cc.dopts.scChan != nil {
  197. // Try to get an initial service config.
  198. select {
  199. case sc, ok := <-cc.dopts.scChan:
  200. if ok {
  201. cc.sc = sc
  202. scSet = true
  203. }
  204. default:
  205. }
  206. }
  207. if cc.dopts.bs == nil {
  208. cc.dopts.bs = backoff.Exponential{
  209. MaxDelay: DefaultBackoffConfig.MaxDelay,
  210. }
  211. }
  212. if cc.dopts.resolverBuilder == nil {
  213. // Only try to parse target when resolver builder is not already set.
  214. cc.parsedTarget = parseTarget(cc.target)
  215. grpclog.Infof("parsed scheme: %q", cc.parsedTarget.Scheme)
  216. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  217. if cc.dopts.resolverBuilder == nil {
  218. // If resolver builder is still nil, the parse target's scheme is
  219. // not registered. Fallback to default resolver and set Endpoint to
  220. // the original unparsed target.
  221. grpclog.Infof("scheme %q not registered, fallback to default scheme", cc.parsedTarget.Scheme)
  222. cc.parsedTarget = resolver.Target{
  223. Scheme: resolver.GetDefaultScheme(),
  224. Endpoint: target,
  225. }
  226. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  227. }
  228. } else {
  229. cc.parsedTarget = resolver.Target{Endpoint: target}
  230. }
  231. creds := cc.dopts.copts.TransportCredentials
  232. if creds != nil && creds.Info().ServerName != "" {
  233. cc.authority = creds.Info().ServerName
  234. } else if cc.dopts.insecure && cc.dopts.authority != "" {
  235. cc.authority = cc.dopts.authority
  236. } else {
  237. // Use endpoint from "scheme://authority/endpoint" as the default
  238. // authority for ClientConn.
  239. cc.authority = cc.parsedTarget.Endpoint
  240. }
  241. if cc.dopts.scChan != nil && !scSet {
  242. // Blocking wait for the initial service config.
  243. select {
  244. case sc, ok := <-cc.dopts.scChan:
  245. if ok {
  246. cc.sc = sc
  247. }
  248. case <-ctx.Done():
  249. return nil, ctx.Err()
  250. }
  251. }
  252. if cc.dopts.scChan != nil {
  253. go cc.scWatcher()
  254. }
  255. var credsClone credentials.TransportCredentials
  256. if creds := cc.dopts.copts.TransportCredentials; creds != nil {
  257. credsClone = creds.Clone()
  258. }
  259. cc.balancerBuildOpts = balancer.BuildOptions{
  260. DialCreds: credsClone,
  261. CredsBundle: cc.dopts.copts.CredsBundle,
  262. Dialer: cc.dopts.copts.Dialer,
  263. ChannelzParentID: cc.channelzID,
  264. }
  265. // Build the resolver.
  266. cc.resolverWrapper, err = newCCResolverWrapper(cc)
  267. if err != nil {
  268. return nil, fmt.Errorf("failed to build resolver: %v", err)
  269. }
  270. // Start the resolver wrapper goroutine after resolverWrapper is created.
  271. //
  272. // If the goroutine is started before resolverWrapper is ready, the
  273. // following may happen: The goroutine sends updates to cc. cc forwards
  274. // those to balancer. Balancer creates new addrConn. addrConn fails to
  275. // connect, and calls resolveNow(). resolveNow() tries to use the non-ready
  276. // resolverWrapper.
  277. cc.resolverWrapper.start()
  278. // A blocking dial blocks until the clientConn is ready.
  279. if cc.dopts.block {
  280. for {
  281. s := cc.GetState()
  282. if s == connectivity.Ready {
  283. break
  284. } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure {
  285. if err = cc.blockingpicker.connectionError(); err != nil {
  286. terr, ok := err.(interface{ Temporary() bool })
  287. if ok && !terr.Temporary() {
  288. return nil, err
  289. }
  290. }
  291. }
  292. if !cc.WaitForStateChange(ctx, s) {
  293. // ctx got timeout or canceled.
  294. return nil, ctx.Err()
  295. }
  296. }
  297. }
  298. return cc, nil
  299. }
  300. // connectivityStateManager keeps the connectivity.State of ClientConn.
  301. // This struct will eventually be exported so the balancers can access it.
  302. type connectivityStateManager struct {
  303. mu sync.Mutex
  304. state connectivity.State
  305. notifyChan chan struct{}
  306. channelzID int64
  307. }
  308. // updateState updates the connectivity.State of ClientConn.
  309. // If there's a change it notifies goroutines waiting on state change to
  310. // happen.
  311. func (csm *connectivityStateManager) updateState(state connectivity.State) {
  312. csm.mu.Lock()
  313. defer csm.mu.Unlock()
  314. if csm.state == connectivity.Shutdown {
  315. return
  316. }
  317. if csm.state == state {
  318. return
  319. }
  320. csm.state = state
  321. if channelz.IsOn() {
  322. channelz.AddTraceEvent(csm.channelzID, &channelz.TraceEventDesc{
  323. Desc: fmt.Sprintf("Channel Connectivity change to %v", state),
  324. Severity: channelz.CtINFO,
  325. })
  326. }
  327. if csm.notifyChan != nil {
  328. // There are other goroutines waiting on this channel.
  329. close(csm.notifyChan)
  330. csm.notifyChan = nil
  331. }
  332. }
  333. func (csm *connectivityStateManager) getState() connectivity.State {
  334. csm.mu.Lock()
  335. defer csm.mu.Unlock()
  336. return csm.state
  337. }
  338. func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} {
  339. csm.mu.Lock()
  340. defer csm.mu.Unlock()
  341. if csm.notifyChan == nil {
  342. csm.notifyChan = make(chan struct{})
  343. }
  344. return csm.notifyChan
  345. }
  346. // ClientConn represents a client connection to an RPC server.
  347. type ClientConn struct {
  348. ctx context.Context
  349. cancel context.CancelFunc
  350. target string
  351. parsedTarget resolver.Target
  352. authority string
  353. dopts dialOptions
  354. csMgr *connectivityStateManager
  355. balancerBuildOpts balancer.BuildOptions
  356. resolverWrapper *ccResolverWrapper
  357. blockingpicker *pickerWrapper
  358. mu sync.RWMutex
  359. sc ServiceConfig
  360. scRaw string
  361. conns map[*addrConn]struct{}
  362. // Keepalive parameter can be updated if a GoAway is received.
  363. mkp keepalive.ClientParameters
  364. curBalancerName string
  365. preBalancerName string // previous balancer name.
  366. curAddresses []resolver.Address
  367. balancerWrapper *ccBalancerWrapper
  368. retryThrottler atomic.Value
  369. channelzID int64 // channelz unique identification number
  370. czData *channelzData
  371. }
  372. // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or
  373. // ctx expires. A true value is returned in former case and false in latter.
  374. // This is an EXPERIMENTAL API.
  375. func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool {
  376. ch := cc.csMgr.getNotifyChan()
  377. if cc.csMgr.getState() != sourceState {
  378. return true
  379. }
  380. select {
  381. case <-ctx.Done():
  382. return false
  383. case <-ch:
  384. return true
  385. }
  386. }
  387. // GetState returns the connectivity.State of ClientConn.
  388. // This is an EXPERIMENTAL API.
  389. func (cc *ClientConn) GetState() connectivity.State {
  390. return cc.csMgr.getState()
  391. }
  392. func (cc *ClientConn) scWatcher() {
  393. for {
  394. select {
  395. case sc, ok := <-cc.dopts.scChan:
  396. if !ok {
  397. return
  398. }
  399. cc.mu.Lock()
  400. // TODO: load balance policy runtime change is ignored.
  401. // We may revist this decision in the future.
  402. cc.sc = sc
  403. cc.scRaw = ""
  404. cc.mu.Unlock()
  405. case <-cc.ctx.Done():
  406. return
  407. }
  408. }
  409. }
  410. func (cc *ClientConn) handleResolvedAddrs(addrs []resolver.Address, err error) {
  411. cc.mu.Lock()
  412. defer cc.mu.Unlock()
  413. if cc.conns == nil {
  414. // cc was closed.
  415. return
  416. }
  417. if reflect.DeepEqual(cc.curAddresses, addrs) {
  418. return
  419. }
  420. cc.curAddresses = addrs
  421. if cc.dopts.balancerBuilder == nil {
  422. // Only look at balancer types and switch balancer if balancer dial
  423. // option is not set.
  424. var isGRPCLB bool
  425. for _, a := range addrs {
  426. if a.Type == resolver.GRPCLB {
  427. isGRPCLB = true
  428. break
  429. }
  430. }
  431. var newBalancerName string
  432. if isGRPCLB {
  433. newBalancerName = grpclbName
  434. } else {
  435. // Address list doesn't contain grpclb address. Try to pick a
  436. // non-grpclb balancer.
  437. newBalancerName = cc.curBalancerName
  438. // If current balancer is grpclb, switch to the previous one.
  439. if newBalancerName == grpclbName {
  440. newBalancerName = cc.preBalancerName
  441. }
  442. // The following could be true in two cases:
  443. // - the first time handling resolved addresses
  444. // (curBalancerName="")
  445. // - the first time handling non-grpclb addresses
  446. // (curBalancerName="grpclb", preBalancerName="")
  447. if newBalancerName == "" {
  448. newBalancerName = PickFirstBalancerName
  449. }
  450. }
  451. cc.switchBalancer(newBalancerName)
  452. } else if cc.balancerWrapper == nil {
  453. // Balancer dial option was set, and this is the first time handling
  454. // resolved addresses. Build a balancer with dopts.balancerBuilder.
  455. cc.balancerWrapper = newCCBalancerWrapper(cc, cc.dopts.balancerBuilder, cc.balancerBuildOpts)
  456. }
  457. cc.balancerWrapper.handleResolvedAddrs(addrs, nil)
  458. }
  459. // switchBalancer starts the switching from current balancer to the balancer
  460. // with the given name.
  461. //
  462. // It will NOT send the current address list to the new balancer. If needed,
  463. // caller of this function should send address list to the new balancer after
  464. // this function returns.
  465. //
  466. // Caller must hold cc.mu.
  467. func (cc *ClientConn) switchBalancer(name string) {
  468. if cc.conns == nil {
  469. return
  470. }
  471. if strings.ToLower(cc.curBalancerName) == strings.ToLower(name) {
  472. return
  473. }
  474. grpclog.Infof("ClientConn switching balancer to %q", name)
  475. if cc.dopts.balancerBuilder != nil {
  476. grpclog.Infoln("ignoring balancer switching: Balancer DialOption used instead")
  477. return
  478. }
  479. // TODO(bar switching) change this to two steps: drain and close.
  480. // Keep track of sc in wrapper.
  481. if cc.balancerWrapper != nil {
  482. cc.balancerWrapper.close()
  483. }
  484. builder := balancer.Get(name)
  485. // TODO(yuxuanli): If user send a service config that does not contain a valid balancer name, should
  486. // we reuse previous one?
  487. if channelz.IsOn() {
  488. if builder == nil {
  489. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  490. Desc: fmt.Sprintf("Channel switches to new LB policy %q due to fallback from invalid balancer name", PickFirstBalancerName),
  491. Severity: channelz.CtWarning,
  492. })
  493. } else {
  494. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  495. Desc: fmt.Sprintf("Channel switches to new LB policy %q", name),
  496. Severity: channelz.CtINFO,
  497. })
  498. }
  499. }
  500. if builder == nil {
  501. grpclog.Infof("failed to get balancer builder for: %v, using pick_first instead", name)
  502. builder = newPickfirstBuilder()
  503. }
  504. cc.preBalancerName = cc.curBalancerName
  505. cc.curBalancerName = builder.Name()
  506. cc.balancerWrapper = newCCBalancerWrapper(cc, builder, cc.balancerBuildOpts)
  507. }
  508. func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State) {
  509. cc.mu.Lock()
  510. if cc.conns == nil {
  511. cc.mu.Unlock()
  512. return
  513. }
  514. // TODO(bar switching) send updates to all balancer wrappers when balancer
  515. // gracefully switching is supported.
  516. cc.balancerWrapper.handleSubConnStateChange(sc, s)
  517. cc.mu.Unlock()
  518. }
  519. // newAddrConn creates an addrConn for addrs and adds it to cc.conns.
  520. //
  521. // Caller needs to make sure len(addrs) > 0.
  522. func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
  523. ac := &addrConn{
  524. cc: cc,
  525. addrs: addrs,
  526. scopts: opts,
  527. dopts: cc.dopts,
  528. czData: new(channelzData),
  529. successfulHandshake: true, // make the first nextAddr() call _not_ move addrIdx up by 1
  530. resetBackoff: make(chan struct{}),
  531. }
  532. ac.ctx, ac.cancel = context.WithCancel(cc.ctx)
  533. // Track ac in cc. This needs to be done before any getTransport(...) is called.
  534. cc.mu.Lock()
  535. if cc.conns == nil {
  536. cc.mu.Unlock()
  537. return nil, ErrClientConnClosing
  538. }
  539. if channelz.IsOn() {
  540. ac.channelzID = channelz.RegisterSubChannel(ac, cc.channelzID, "")
  541. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  542. Desc: "Subchannel Created",
  543. Severity: channelz.CtINFO,
  544. Parent: &channelz.TraceEventDesc{
  545. Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID),
  546. Severity: channelz.CtINFO,
  547. },
  548. })
  549. }
  550. cc.conns[ac] = struct{}{}
  551. cc.mu.Unlock()
  552. return ac, nil
  553. }
  554. // removeAddrConn removes the addrConn in the subConn from clientConn.
  555. // It also tears down the ac with the given error.
  556. func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) {
  557. cc.mu.Lock()
  558. if cc.conns == nil {
  559. cc.mu.Unlock()
  560. return
  561. }
  562. delete(cc.conns, ac)
  563. cc.mu.Unlock()
  564. ac.tearDown(err)
  565. }
  566. func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric {
  567. return &channelz.ChannelInternalMetric{
  568. State: cc.GetState(),
  569. Target: cc.target,
  570. CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted),
  571. CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded),
  572. CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed),
  573. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)),
  574. }
  575. }
  576. // Target returns the target string of the ClientConn.
  577. // This is an EXPERIMENTAL API.
  578. func (cc *ClientConn) Target() string {
  579. return cc.target
  580. }
  581. func (cc *ClientConn) incrCallsStarted() {
  582. atomic.AddInt64(&cc.czData.callsStarted, 1)
  583. atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano())
  584. }
  585. func (cc *ClientConn) incrCallsSucceeded() {
  586. atomic.AddInt64(&cc.czData.callsSucceeded, 1)
  587. }
  588. func (cc *ClientConn) incrCallsFailed() {
  589. atomic.AddInt64(&cc.czData.callsFailed, 1)
  590. }
  591. // connect starts creating a transport.
  592. // It does nothing if the ac is not IDLE.
  593. // TODO(bar) Move this to the addrConn section.
  594. func (ac *addrConn) connect() error {
  595. ac.mu.Lock()
  596. if ac.state == connectivity.Shutdown {
  597. ac.mu.Unlock()
  598. return errConnClosing
  599. }
  600. if ac.state != connectivity.Idle {
  601. ac.mu.Unlock()
  602. return nil
  603. }
  604. ac.updateConnectivityState(connectivity.Connecting)
  605. ac.cc.handleSubConnStateChange(ac.acbw, ac.state)
  606. ac.mu.Unlock()
  607. // Start a goroutine connecting to the server asynchronously.
  608. go ac.resetTransport(false)
  609. return nil
  610. }
  611. // tryUpdateAddrs tries to update ac.addrs with the new addresses list.
  612. //
  613. // It checks whether current connected address of ac is in the new addrs list.
  614. // - If true, it updates ac.addrs and returns true. The ac will keep using
  615. // the existing connection.
  616. // - If false, it does nothing and returns false.
  617. func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool {
  618. ac.mu.Lock()
  619. defer ac.mu.Unlock()
  620. grpclog.Infof("addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs)
  621. if ac.state == connectivity.Shutdown {
  622. ac.addrs = addrs
  623. return true
  624. }
  625. var curAddrFound bool
  626. for _, a := range addrs {
  627. if reflect.DeepEqual(ac.curAddr, a) {
  628. curAddrFound = true
  629. break
  630. }
  631. }
  632. grpclog.Infof("addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound)
  633. if curAddrFound {
  634. ac.addrs = addrs
  635. ac.addrIdx = 0 // Start reconnecting from beginning in the new list.
  636. }
  637. return curAddrFound
  638. }
  639. // GetMethodConfig gets the method config of the input method.
  640. // If there's an exact match for input method (i.e. /service/method), we return
  641. // the corresponding MethodConfig.
  642. // If there isn't an exact match for the input method, we look for the default config
  643. // under the service (i.e /service/). If there is a default MethodConfig for
  644. // the service, we return it.
  645. // Otherwise, we return an empty MethodConfig.
  646. func (cc *ClientConn) GetMethodConfig(method string) MethodConfig {
  647. // TODO: Avoid the locking here.
  648. cc.mu.RLock()
  649. defer cc.mu.RUnlock()
  650. m, ok := cc.sc.Methods[method]
  651. if !ok {
  652. i := strings.LastIndex(method, "/")
  653. m = cc.sc.Methods[method[:i+1]]
  654. }
  655. return m
  656. }
  657. func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) {
  658. hdr, _ := metadata.FromOutgoingContext(ctx)
  659. t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickOptions{
  660. FullMethodName: method,
  661. Header: hdr,
  662. })
  663. if err != nil {
  664. return nil, nil, toRPCErr(err)
  665. }
  666. return t, done, nil
  667. }
  668. // handleServiceConfig parses the service config string in JSON format to Go native
  669. // struct ServiceConfig, and store both the struct and the JSON string in ClientConn.
  670. func (cc *ClientConn) handleServiceConfig(js string) error {
  671. if cc.dopts.disableServiceConfig {
  672. return nil
  673. }
  674. if cc.scRaw == js {
  675. return nil
  676. }
  677. if channelz.IsOn() {
  678. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  679. // The special formatting of \"%s\" instead of %q is to provide nice printing of service config
  680. // for human consumption.
  681. Desc: fmt.Sprintf("Channel has a new service config \"%s\"", js),
  682. Severity: channelz.CtINFO,
  683. })
  684. }
  685. sc, err := parseServiceConfig(js)
  686. if err != nil {
  687. return err
  688. }
  689. cc.mu.Lock()
  690. // Check if the ClientConn is already closed. Some fields (e.g.
  691. // balancerWrapper) are set to nil when closing the ClientConn, and could
  692. // cause nil pointer panic if we don't have this check.
  693. if cc.conns == nil {
  694. cc.mu.Unlock()
  695. return nil
  696. }
  697. cc.scRaw = js
  698. cc.sc = sc
  699. if sc.retryThrottling != nil {
  700. newThrottler := &retryThrottler{
  701. tokens: sc.retryThrottling.MaxTokens,
  702. max: sc.retryThrottling.MaxTokens,
  703. thresh: sc.retryThrottling.MaxTokens / 2,
  704. ratio: sc.retryThrottling.TokenRatio,
  705. }
  706. cc.retryThrottler.Store(newThrottler)
  707. } else {
  708. cc.retryThrottler.Store((*retryThrottler)(nil))
  709. }
  710. if sc.LB != nil && *sc.LB != grpclbName { // "grpclb" is not a valid balancer option in service config.
  711. if cc.curBalancerName == grpclbName {
  712. // If current balancer is grpclb, there's at least one grpclb
  713. // balancer address in the resolved list. Don't switch the balancer,
  714. // but change the previous balancer name, so if a new resolved
  715. // address list doesn't contain grpclb address, balancer will be
  716. // switched to *sc.LB.
  717. cc.preBalancerName = *sc.LB
  718. } else {
  719. cc.switchBalancer(*sc.LB)
  720. cc.balancerWrapper.handleResolvedAddrs(cc.curAddresses, nil)
  721. }
  722. }
  723. cc.mu.Unlock()
  724. return nil
  725. }
  726. func (cc *ClientConn) resolveNow(o resolver.ResolveNowOption) {
  727. cc.mu.RLock()
  728. r := cc.resolverWrapper
  729. cc.mu.RUnlock()
  730. if r == nil {
  731. return
  732. }
  733. go r.resolveNow(o)
  734. }
  735. // ResetConnectBackoff wakes up all subchannels in transient failure and causes
  736. // them to attempt another connection immediately. It also resets the backoff
  737. // times used for subsequent attempts regardless of the current state.
  738. //
  739. // In general, this function should not be used. Typical service or network
  740. // outages result in a reasonable client reconnection strategy by default.
  741. // However, if a previously unavailable network becomes available, this may be
  742. // used to trigger an immediate reconnect.
  743. //
  744. // This API is EXPERIMENTAL.
  745. func (cc *ClientConn) ResetConnectBackoff() {
  746. cc.mu.Lock()
  747. defer cc.mu.Unlock()
  748. for ac := range cc.conns {
  749. ac.resetConnectBackoff()
  750. }
  751. }
  752. // Close tears down the ClientConn and all underlying connections.
  753. func (cc *ClientConn) Close() error {
  754. defer cc.cancel()
  755. cc.mu.Lock()
  756. if cc.conns == nil {
  757. cc.mu.Unlock()
  758. return ErrClientConnClosing
  759. }
  760. conns := cc.conns
  761. cc.conns = nil
  762. cc.csMgr.updateState(connectivity.Shutdown)
  763. rWrapper := cc.resolverWrapper
  764. cc.resolverWrapper = nil
  765. bWrapper := cc.balancerWrapper
  766. cc.balancerWrapper = nil
  767. cc.mu.Unlock()
  768. cc.blockingpicker.close()
  769. if rWrapper != nil {
  770. rWrapper.close()
  771. }
  772. if bWrapper != nil {
  773. bWrapper.close()
  774. }
  775. for ac := range conns {
  776. ac.tearDown(ErrClientConnClosing)
  777. }
  778. if channelz.IsOn() {
  779. ted := &channelz.TraceEventDesc{
  780. Desc: "Channel Deleted",
  781. Severity: channelz.CtINFO,
  782. }
  783. if cc.dopts.channelzParentID != 0 {
  784. ted.Parent = &channelz.TraceEventDesc{
  785. Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID),
  786. Severity: channelz.CtINFO,
  787. }
  788. }
  789. channelz.AddTraceEvent(cc.channelzID, ted)
  790. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  791. // the entity beng deleted, and thus prevent it from being deleted right away.
  792. channelz.RemoveEntry(cc.channelzID)
  793. }
  794. return nil
  795. }
  796. // addrConn is a network connection to a given address.
  797. type addrConn struct {
  798. ctx context.Context
  799. cancel context.CancelFunc
  800. cc *ClientConn
  801. dopts dialOptions
  802. acbw balancer.SubConn
  803. scopts balancer.NewSubConnOptions
  804. transport transport.ClientTransport // The current transport.
  805. mu sync.Mutex
  806. addrIdx int // The index in addrs list to start reconnecting from.
  807. curAddr resolver.Address // The current address.
  808. addrs []resolver.Address // All addresses that the resolver resolved to.
  809. // Use updateConnectivityState for updating addrConn's connectivity state.
  810. state connectivity.State
  811. tearDownErr error // The reason this addrConn is torn down.
  812. backoffIdx int
  813. // backoffDeadline is the time until which resetTransport needs to
  814. // wait before increasing backoffIdx count.
  815. backoffDeadline time.Time
  816. // connectDeadline is the time by which all connection
  817. // negotiations must complete.
  818. connectDeadline time.Time
  819. resetBackoff chan struct{}
  820. channelzID int64 // channelz unique identification number
  821. czData *channelzData
  822. successfulHandshake bool
  823. }
  824. // Note: this requires a lock on ac.mu.
  825. func (ac *addrConn) updateConnectivityState(s connectivity.State) {
  826. ac.state = s
  827. if channelz.IsOn() {
  828. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  829. Desc: fmt.Sprintf("Subchannel Connectivity change to %v", s),
  830. Severity: channelz.CtINFO,
  831. })
  832. }
  833. }
  834. // adjustParams updates parameters used to create transports upon
  835. // receiving a GoAway.
  836. func (ac *addrConn) adjustParams(r transport.GoAwayReason) {
  837. switch r {
  838. case transport.GoAwayTooManyPings:
  839. v := 2 * ac.dopts.copts.KeepaliveParams.Time
  840. ac.cc.mu.Lock()
  841. if v > ac.cc.mkp.Time {
  842. ac.cc.mkp.Time = v
  843. }
  844. ac.cc.mu.Unlock()
  845. }
  846. }
  847. // resetTransport makes sure that a healthy ac.transport exists.
  848. //
  849. // The transport will close itself when it encounters an error, or on GOAWAY, or on deadline waiting for handshake, or
  850. // when the clientconn is closed. Each iteration creating a new transport will try a different address that the balancer
  851. // assigned to the addrConn, until it has tried all addresses. Once it has tried all addresses, it will re-resolve to
  852. // get a new address list. If an error is received, the list is re-resolved and the next reset attempt will try from the
  853. // beginning. This method has backoff built in. The backoff amount starts at 0 and increases each time resolution occurs
  854. // (addresses are exhausted). The backoff amount is reset to 0 each time a handshake is received.
  855. //
  856. // If the DialOption WithWaitForHandshake was set, resetTransport returns successfully only after handshake is received.
  857. func (ac *addrConn) resetTransport(resolveNow bool) {
  858. for {
  859. // If this is the first in a line of resets, we want to resolve immediately. The only other time we
  860. // want to reset is if we have tried all the addresses handed to us.
  861. if resolveNow {
  862. ac.mu.Lock()
  863. ac.cc.resolveNow(resolver.ResolveNowOption{})
  864. ac.mu.Unlock()
  865. }
  866. ac.mu.Lock()
  867. if ac.state == connectivity.Shutdown {
  868. ac.mu.Unlock()
  869. return
  870. }
  871. // If the connection is READY, a failure must have occurred.
  872. // Otherwise, we'll consider this is a transient failure when:
  873. // We've exhausted all addresses
  874. // We're in CONNECTING
  875. // And it's not the very first addr to try TODO(deklerk) find a better way to do this than checking ac.successfulHandshake
  876. if ac.state == connectivity.Ready || (ac.addrIdx == len(ac.addrs)-1 && ac.state == connectivity.Connecting && !ac.successfulHandshake) {
  877. ac.updateConnectivityState(connectivity.TransientFailure)
  878. ac.cc.handleSubConnStateChange(ac.acbw, ac.state)
  879. }
  880. ac.transport = nil
  881. ac.mu.Unlock()
  882. if err := ac.nextAddr(); err != nil {
  883. return
  884. }
  885. ac.mu.Lock()
  886. if ac.state == connectivity.Shutdown {
  887. ac.mu.Unlock()
  888. return
  889. }
  890. backoffIdx := ac.backoffIdx
  891. backoffFor := ac.dopts.bs.Backoff(backoffIdx)
  892. // This will be the duration that dial gets to finish.
  893. dialDuration := getMinConnectTimeout()
  894. if backoffFor > dialDuration {
  895. // Give dial more time as we keep failing to connect.
  896. dialDuration = backoffFor
  897. }
  898. start := time.Now()
  899. connectDeadline := start.Add(dialDuration)
  900. ac.backoffDeadline = start.Add(backoffFor)
  901. ac.connectDeadline = connectDeadline
  902. ac.mu.Unlock()
  903. ac.cc.mu.RLock()
  904. ac.dopts.copts.KeepaliveParams = ac.cc.mkp
  905. ac.cc.mu.RUnlock()
  906. ac.mu.Lock()
  907. if ac.state == connectivity.Shutdown {
  908. ac.mu.Unlock()
  909. return
  910. }
  911. if ac.state != connectivity.Connecting {
  912. ac.updateConnectivityState(connectivity.Connecting)
  913. ac.cc.handleSubConnStateChange(ac.acbw, ac.state)
  914. }
  915. addr := ac.addrs[ac.addrIdx]
  916. copts := ac.dopts.copts
  917. if ac.scopts.CredsBundle != nil {
  918. copts.CredsBundle = ac.scopts.CredsBundle
  919. }
  920. ac.mu.Unlock()
  921. if channelz.IsOn() {
  922. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  923. Desc: fmt.Sprintf("Subchannel picks a new address %q to connect", addr.Addr),
  924. Severity: channelz.CtINFO,
  925. })
  926. }
  927. if err := ac.createTransport(backoffIdx, addr, copts, connectDeadline); err != nil {
  928. continue
  929. }
  930. return
  931. }
  932. }
  933. // createTransport creates a connection to one of the backends in addrs.
  934. func (ac *addrConn) createTransport(backoffNum int, addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) error {
  935. oneReset := sync.Once{}
  936. skipReset := make(chan struct{})
  937. allowedToReset := make(chan struct{})
  938. prefaceReceived := make(chan struct{})
  939. onCloseCalled := make(chan struct{})
  940. var prefaceMu sync.Mutex
  941. var serverPrefaceReceived bool
  942. var clientPrefaceWrote bool
  943. onGoAway := func(r transport.GoAwayReason) {
  944. ac.mu.Lock()
  945. ac.adjustParams(r)
  946. ac.mu.Unlock()
  947. select {
  948. case <-skipReset: // The outer resetTransport loop will handle reconnection.
  949. return
  950. case <-allowedToReset: // We're in the clear to reset.
  951. go oneReset.Do(func() { ac.resetTransport(false) })
  952. }
  953. }
  954. prefaceTimer := time.NewTimer(connectDeadline.Sub(time.Now()))
  955. onClose := func() {
  956. close(onCloseCalled)
  957. prefaceTimer.Stop()
  958. select {
  959. case <-skipReset: // The outer resetTransport loop will handle reconnection.
  960. return
  961. case <-allowedToReset: // We're in the clear to reset.
  962. oneReset.Do(func() { ac.resetTransport(false) })
  963. }
  964. }
  965. target := transport.TargetInfo{
  966. Addr: addr.Addr,
  967. Metadata: addr.Metadata,
  968. Authority: ac.cc.authority,
  969. }
  970. onPrefaceReceipt := func() {
  971. close(prefaceReceived)
  972. prefaceTimer.Stop()
  973. // TODO(deklerk): optimization; does anyone else actually use this lock? maybe we can just remove it for this scope
  974. ac.mu.Lock()
  975. prefaceMu.Lock()
  976. serverPrefaceReceived = true
  977. if clientPrefaceWrote {
  978. ac.successfulHandshake = true
  979. ac.backoffDeadline = time.Time{}
  980. ac.connectDeadline = time.Time{}
  981. ac.addrIdx = 0
  982. ac.backoffIdx = 0
  983. }
  984. prefaceMu.Unlock()
  985. ac.mu.Unlock()
  986. }
  987. // Do not cancel in the success path because of this issue in Go1.6: https://github.com/golang/go/issues/15078.
  988. connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline)
  989. if channelz.IsOn() {
  990. copts.ChannelzParentID = ac.channelzID
  991. }
  992. newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, target, copts, onPrefaceReceipt, onGoAway, onClose)
  993. if err == nil {
  994. prefaceMu.Lock()
  995. clientPrefaceWrote = true
  996. if serverPrefaceReceived {
  997. ac.successfulHandshake = true
  998. }
  999. prefaceMu.Unlock()
  1000. if ac.dopts.waitForHandshake {
  1001. select {
  1002. case <-prefaceTimer.C:
  1003. // We didn't get the preface in time.
  1004. newTr.Close()
  1005. err = errors.New("timed out waiting for server handshake")
  1006. case <-prefaceReceived:
  1007. // We got the preface - huzzah! things are good.
  1008. case <-onCloseCalled:
  1009. // The transport has already closed - noop.
  1010. close(allowedToReset)
  1011. return nil
  1012. }
  1013. } else {
  1014. go func() {
  1015. select {
  1016. case <-prefaceTimer.C:
  1017. // We didn't get the preface in time.
  1018. newTr.Close()
  1019. case <-prefaceReceived:
  1020. // We got the preface just in the nick of time - huzzah!
  1021. case <-onCloseCalled:
  1022. // The transport has already closed - noop.
  1023. }
  1024. }()
  1025. }
  1026. }
  1027. if err != nil {
  1028. // newTr is either nil, or closed.
  1029. cancel()
  1030. ac.cc.blockingpicker.updateConnectionError(err)
  1031. ac.mu.Lock()
  1032. if ac.state == connectivity.Shutdown {
  1033. // ac.tearDown(...) has been invoked.
  1034. ac.mu.Unlock()
  1035. // We don't want to reset during this close because we prefer to kick out of this function and let the loop
  1036. // in resetTransport take care of reconnecting.
  1037. close(skipReset)
  1038. return errConnClosing
  1039. }
  1040. ac.mu.Unlock()
  1041. grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v. Err :%v. Reconnecting...", addr, err)
  1042. // We don't want to reset during this close because we prefer to kick out of this function and let the loop
  1043. // in resetTransport take care of reconnecting.
  1044. close(skipReset)
  1045. return err
  1046. }
  1047. ac.mu.Lock()
  1048. if ac.state == connectivity.Shutdown {
  1049. ac.mu.Unlock()
  1050. // We don't want to reset during this close because we prefer to kick out of this function and let the loop
  1051. // in resetTransport take care of reconnecting.
  1052. close(skipReset)
  1053. newTr.Close()
  1054. return errConnClosing
  1055. }
  1056. ac.updateConnectivityState(connectivity.Ready)
  1057. ac.cc.handleSubConnStateChange(ac.acbw, ac.state)
  1058. ac.transport = newTr
  1059. ac.curAddr = addr
  1060. ac.mu.Unlock()
  1061. // Ok, _now_ we will finally let the transport reset if it encounters a closable error. Without this, the reader
  1062. // goroutine failing races with all the code in this method that sets the connection to "ready".
  1063. close(allowedToReset)
  1064. return nil
  1065. }
  1066. // nextAddr increments the addrIdx if there are more addresses to try. If
  1067. // there are no more addrs to try it will re-resolve, set addrIdx to 0, and
  1068. // increment the backoffIdx.
  1069. //
  1070. // nextAddr must be called without ac.mu being held.
  1071. func (ac *addrConn) nextAddr() error {
  1072. ac.mu.Lock()
  1073. // If a handshake has been observed, we expect the counters to have manually
  1074. // been reset so we'll just return, since we want the next usage to start
  1075. // at index 0.
  1076. if ac.successfulHandshake {
  1077. ac.successfulHandshake = false
  1078. ac.mu.Unlock()
  1079. return nil
  1080. }
  1081. if ac.addrIdx < len(ac.addrs)-1 {
  1082. ac.addrIdx++
  1083. ac.mu.Unlock()
  1084. return nil
  1085. }
  1086. ac.addrIdx = 0
  1087. ac.backoffIdx++
  1088. if ac.state == connectivity.Shutdown {
  1089. ac.mu.Unlock()
  1090. return errConnClosing
  1091. }
  1092. ac.cc.resolveNow(resolver.ResolveNowOption{})
  1093. backoffDeadline := ac.backoffDeadline
  1094. b := ac.resetBackoff
  1095. ac.mu.Unlock()
  1096. timer := time.NewTimer(backoffDeadline.Sub(time.Now()))
  1097. select {
  1098. case <-timer.C:
  1099. case <-b:
  1100. timer.Stop()
  1101. case <-ac.ctx.Done():
  1102. timer.Stop()
  1103. return ac.ctx.Err()
  1104. }
  1105. return nil
  1106. }
  1107. func (ac *addrConn) resetConnectBackoff() {
  1108. ac.mu.Lock()
  1109. close(ac.resetBackoff)
  1110. ac.backoffIdx = 0
  1111. ac.resetBackoff = make(chan struct{})
  1112. ac.mu.Unlock()
  1113. }
  1114. // getReadyTransport returns the transport if ac's state is READY.
  1115. // Otherwise it returns nil, false.
  1116. // If ac's state is IDLE, it will trigger ac to connect.
  1117. func (ac *addrConn) getReadyTransport() (transport.ClientTransport, bool) {
  1118. ac.mu.Lock()
  1119. if ac.state == connectivity.Ready && ac.transport != nil {
  1120. t := ac.transport
  1121. ac.mu.Unlock()
  1122. return t, true
  1123. }
  1124. var idle bool
  1125. if ac.state == connectivity.Idle {
  1126. idle = true
  1127. }
  1128. ac.mu.Unlock()
  1129. // Trigger idle ac to connect.
  1130. if idle {
  1131. ac.connect()
  1132. }
  1133. return nil, false
  1134. }
  1135. // tearDown starts to tear down the addrConn.
  1136. // TODO(zhaoq): Make this synchronous to avoid unbounded memory consumption in
  1137. // some edge cases (e.g., the caller opens and closes many addrConn's in a
  1138. // tight loop.
  1139. // tearDown doesn't remove ac from ac.cc.conns.
  1140. func (ac *addrConn) tearDown(err error) {
  1141. ac.mu.Lock()
  1142. if ac.state == connectivity.Shutdown {
  1143. ac.mu.Unlock()
  1144. return
  1145. }
  1146. // We have to set the state to Shutdown before anything else to prevent races
  1147. // between setting the state and logic that waits on context cancelation / etc.
  1148. ac.updateConnectivityState(connectivity.Shutdown)
  1149. ac.cancel()
  1150. ac.tearDownErr = err
  1151. ac.cc.handleSubConnStateChange(ac.acbw, ac.state)
  1152. ac.curAddr = resolver.Address{}
  1153. if err == errConnDrain && ac.transport != nil {
  1154. // GracefulClose(...) may be executed multiple times when
  1155. // i) receiving multiple GoAway frames from the server; or
  1156. // ii) there are concurrent name resolver/Balancer triggered
  1157. // address removal and GoAway.
  1158. // We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu.
  1159. ac.mu.Unlock()
  1160. ac.transport.GracefulClose()
  1161. ac.mu.Lock()
  1162. }
  1163. if channelz.IsOn() {
  1164. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1165. Desc: "Subchannel Deleted",
  1166. Severity: channelz.CtINFO,
  1167. Parent: &channelz.TraceEventDesc{
  1168. Desc: fmt.Sprintf("Subchanel(id:%d) deleted", ac.channelzID),
  1169. Severity: channelz.CtINFO,
  1170. },
  1171. })
  1172. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  1173. // the entity beng deleted, and thus prevent it from being deleted right away.
  1174. channelz.RemoveEntry(ac.channelzID)
  1175. }
  1176. ac.mu.Unlock()
  1177. }
  1178. func (ac *addrConn) getState() connectivity.State {
  1179. ac.mu.Lock()
  1180. defer ac.mu.Unlock()
  1181. return ac.state
  1182. }
  1183. func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric {
  1184. ac.mu.Lock()
  1185. addr := ac.curAddr.Addr
  1186. ac.mu.Unlock()
  1187. return &channelz.ChannelInternalMetric{
  1188. State: ac.getState(),
  1189. Target: addr,
  1190. CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted),
  1191. CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded),
  1192. CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed),
  1193. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)),
  1194. }
  1195. }
  1196. func (ac *addrConn) incrCallsStarted() {
  1197. atomic.AddInt64(&ac.czData.callsStarted, 1)
  1198. atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano())
  1199. }
  1200. func (ac *addrConn) incrCallsSucceeded() {
  1201. atomic.AddInt64(&ac.czData.callsSucceeded, 1)
  1202. }
  1203. func (ac *addrConn) incrCallsFailed() {
  1204. atomic.AddInt64(&ac.czData.callsFailed, 1)
  1205. }
  1206. type retryThrottler struct {
  1207. max float64
  1208. thresh float64
  1209. ratio float64
  1210. mu sync.Mutex
  1211. tokens float64 // TODO(dfawley): replace with atomic and remove lock.
  1212. }
  1213. // throttle subtracts a retry token from the pool and returns whether a retry
  1214. // should be throttled (disallowed) based upon the retry throttling policy in
  1215. // the service config.
  1216. func (rt *retryThrottler) throttle() bool {
  1217. if rt == nil {
  1218. return false
  1219. }
  1220. rt.mu.Lock()
  1221. defer rt.mu.Unlock()
  1222. rt.tokens--
  1223. if rt.tokens < 0 {
  1224. rt.tokens = 0
  1225. }
  1226. return rt.tokens <= rt.thresh
  1227. }
  1228. func (rt *retryThrottler) successfulRPC() {
  1229. if rt == nil {
  1230. return
  1231. }
  1232. rt.mu.Lock()
  1233. defer rt.mu.Unlock()
  1234. rt.tokens += rt.ratio
  1235. if rt.tokens > rt.max {
  1236. rt.tokens = rt.max
  1237. }
  1238. }
  1239. type channelzChannel struct {
  1240. cc *ClientConn
  1241. }
  1242. func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric {
  1243. return c.cc.channelzMetric()
  1244. }
  1245. // ErrClientConnTimeout indicates that the ClientConn cannot establish the
  1246. // underlying connections within the specified timeout.
  1247. //
  1248. // Deprecated: This error is never returned by grpc and should not be
  1249. // referenced by users.
  1250. var ErrClientConnTimeout = errors.New("grpc: timed out when dialing")