Skip to content
代码片段 群组 项目
提交 3009caa6 编辑于 作者: Heejae Chang's avatar Heejae Chang

added retry to InvokeAsync as well for OOP start up.

上级 b7354608
No related branches found
No related tags found
......@@ -36,23 +36,14 @@ public static async Task<RemoteHostClient> CreateAsync(
using (Logger.LogBlock(FunctionId.ServiceHubRemoteHostClient_CreateAsync, cancellationToken))
// let each client to have unique id so that we can distinguish different clients when service is restarted
var currentInstanceId = Interlocked.Add(ref s_instanceId, 1);
var primary = new HubClient("ManagedLanguage.IDE.RemoteHostClient");
var current = $"VS ({Process.GetCurrentProcess().Id}) ({currentInstanceId})";
var hostGroup = new HostGroup(current);
var timeout = TimeSpan.FromMilliseconds(workspace.Options.GetOption(RemoteHostOptions.RequestServiceTimeoutInMS));
var remoteHostStream = await RequestServiceAsync(primary, WellKnownRemoteHostServices.RemoteHostService, hostGroup, timeout, cancellationToken).ConfigureAwait(false);
var instance = new ServiceHubRemoteHostClient(workspace, primary, hostGroup, remoteHostStream);
// make sure connection is done right
var host = await instance._rpc.InvokeAsync<string>(nameof(IRemoteHostService.Connect), current, TelemetryService.DefaultSession.SerializeSettings()).ConfigureAwait(false);
// TODO: change this to non fatal watson and make VS to use inproc implementation
Contract.ThrowIfFalse(host == current.ToString());
// Retry (with timeout) until we can connect to RemoteHost (service hub process).
// we are seeing cases where we failed to connect to service hub process when a machine is under heavy load.
// (see as one of example)
var instance = await RetryRemoteCallAsync<IOException, ServiceHubRemoteHostClient>(
() => CreateWorkerAsync(workspace, primary, timeout, cancellationToken), timeout, cancellationToken).ConfigureAwait(false);
......@@ -65,6 +56,43 @@ public static async Task<RemoteHostClient> CreateAsync(
public static async Task<ServiceHubRemoteHostClient> CreateWorkerAsync(Workspace workspace, HubClient primary, TimeSpan timeout, CancellationToken cancellationToken)
ServiceHubRemoteHostClient client = null;
// let each client to have unique id so that we can distinguish different clients when service is restarted
var currentInstanceId = Interlocked.Add(ref s_instanceId, 1);
var current = $"VS ({Process.GetCurrentProcess().Id}) ({currentInstanceId})";
var hostGroup = new HostGroup(current);
var remoteHostStream = await RequestServiceAsync(
primary, WellKnownRemoteHostServices.RemoteHostService, hostGroup, timeout, cancellationToken).ConfigureAwait(false);
client = new ServiceHubRemoteHostClient(workspace, primary, hostGroup, remoteHostStream);
await client._rpc.InvokeWithCancellationAsync<string>(
new object[] { current, TelemetryService.DefaultSession.SerializeSettings() },
return client;
catch (Exception ex)
// make sure we shutdown client if initializing client has failed.
// translate to our own cancellation if it is raised.
// otherwise, report watson and throw original exception
WatsonReporter.Report("ServiceHub creation failed", ex, ReportDetailInfo);
private static async Task RegisterWorkspaceHostAsync(Workspace workspace, RemoteHostClient client)
var vsWorkspace = workspace as VisualStudioWorkspaceImpl;
......@@ -88,7 +116,7 @@ await Task.Factory.SafeStartNew(() =>
private ServiceHubRemoteHostClient(
Workspace workspace, HubClient hubClient, HostGroup hostGroup, Stream stream) :
_hubClient = hubClient;
_hostGroup = hostGroup;
......@@ -136,6 +164,40 @@ private void OnRpcDisconnected(object sender, JsonRpcDisconnectedEventArgs e)
/// <summary>
/// call <paramref name="funcAsync"/> and retry up to <paramref name="timeout"/> if the call throws
/// <typeparamref name="TException"/>. any other exception from the call won't be handled here.
/// </summary>
private static async Task<TResult> RetryRemoteCallAsync<TException, TResult>(
Func<Task<TResult>> funcAsync,
TimeSpan timeout,
CancellationToken cancellationToken) where TException : Exception
const int retry_delayInMS = 50;
var start = DateTime.UtcNow;
while (DateTime.UtcNow - start < timeout)
return await funcAsync().ConfigureAwait(false);
catch (TException)
// throw cancellation token if operation is cancelled
// wait for retry_delayInMS before next try
await Task.Delay(retry_delayInMS, cancellationToken).ConfigureAwait(false);
// operation timed out, more than we are willing to wait
throw new TimeoutException("RequestServiceAsync timed out");
private static async Task<Stream> RequestServiceAsync(
HubClient client,
string serviceName,
......@@ -156,7 +218,17 @@ private static async Task<Stream> RequestServiceAsync(
return await RequestServiceAsync(client, descriptor, timeout, cancellationToken).ConfigureAwait(false);
// we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves.
// we have bug opened to track the issue.
// retry on cancellation token since HubClient will throw its own cancellation token
// when it couldn't connect to service hub service for some reasons
// (ex, OOP process GC blocked and not responding to request)
return await RetryRemoteCallAsync<OperationCanceledException, Stream>(
() => client.RequestServiceAsync(descriptor, cancellationToken),
catch (RemoteInvocationException ex)
......@@ -184,41 +256,6 @@ private static async Task<Stream> RequestServiceAsync(
throw ExceptionUtilities.Unreachable;
private static async Task<Stream> RequestServiceAsync(HubClient client, ServiceDescriptor descriptor, TimeSpan timeout, CancellationToken cancellationToken = default(CancellationToken))
// we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves.
// we have bug opened to track the issue.
const int retry_delayInMS = 50;
var start = DateTime.UtcNow;
while (start - DateTime.UtcNow < timeout)
return await client.RequestServiceAsync(descriptor, cancellationToken).ConfigureAwait(false);
catch (OperationCanceledException)
// if it is our own cancellation token, then rethrow
// otherwise, let us retry.
// we do this since HubClient itself can throw its own cancellation token
// when it couldn't connect to service hub service for some reasons
// (ex, OOP process GC blocked and not responding to request)
// wait for retry_delayInMS before next try
await Task.Delay(retry_delayInMS, cancellationToken).ConfigureAwait(false);
// request service to HubClient timed out, more than we are willing to wait
throw new TimeoutException("RequestServiceAsync timed out");
private static int ReportDetailInfo(IFaultUtility faultUtility)
// 0 means send watson, otherwise, cancel watson
......@@ -37,7 +37,7 @@ public void TestRemoteHostConnect()
var remoteHostService = CreateService();
var input = "Test";
var output = remoteHostService.Connect(input, serializedSession: null);
var output = remoteHostService.Connect(input, serializedSession: null, cancellationToken: CancellationToken.None);
Assert.Equal(input, output);
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System.Threading;
using System.Threading.Tasks;
namespace Microsoft.CodeAnalysis.Remote
internal interface IRemoteHostService
string Connect(string host, string serializedSession);
string Connect(string host, string serializedSession, CancellationToken cancellationToken);
Task SynchronizePrimaryWorkspaceAsync(Checksum checksum);
Task SynchronizeGlobalAssetsAsync(Checksum[] checksums);
......@@ -41,11 +41,6 @@ static RemoteHostService()
// we set up logger here
RoslynLogger.SetLogger(new EtwLogger(GetLoggingChecker()));
// Set this process's priority BelowNormal.
// this should let us to freely try to use all resources possible without worrying about affecting
// host's work such as responsiveness or build.
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.BelowNormal;
......@@ -56,8 +51,11 @@ public RemoteHostService(Stream stream, IServiceProvider serviceProvider) :
public string Connect(string host, string serializedSession)
public string Connect(string host, string serializedSession, CancellationToken cancellationToken)
// this is called only once when Host (VS) started RemoteHost (OOP)
_primaryInstance = InstanceId;
var existing = Interlocked.CompareExchange(ref _host, host, null);
......@@ -72,6 +70,15 @@ public string Connect(string host, string serializedSession)
// log telemetry that service hub started
RoslynLogger.Log(FunctionId.RemoteHost_Connect, KeyValueLogMessage.Create(SetSessionInfo));
// serializedSession will be null for testing
if (serializedSession != null)
// Set this process's priority BelowNormal.
// this should let us to freely try to use all resources possible without worrying about affecting
// host's work such as responsiveness or build.
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.BelowNormal;
return _host;
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册