Databricks-connect无法加载数据

发布于 2025-02-07 11:30:37 字数 16790 浏览 1 评论 0原文

经过很多挣扎之后,我终于设法远程连接到Databricks,只是发现我由于某种原因不允许加载。

我要在这里发布,希望那里有人有答案。

在这里尝试发生的事情是使用数据链芯来读取.avro从Azure Storeage容器中读取文件。

from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
df = spark.read.format("avro").load("azure/storage/containerlocation")
df.show()

错误追溯:

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
c:\Users\Desktop\local_projects\rpi-bdc-rec-eng\training_service\tmp.py in <cell line: 92>()
---> 3 spark.read.format("avro").load("azure/storage/containerlocation")

File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\pyspark\sql\readwriter.py:204, in DataFrameReader.load(self, path, format, schema, **options)
    202 self.options(**options)
    203 if isinstance(path, str):
--> 204     return self._df(self._jreader.load(path))
    205 elif path is not None:
    206     if type(path) != list:

File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\py4j\java_gateway.py:1304, in JavaMember.__call__(self, *args)
   1298 command = proto.CALL_COMMAND_NAME +\
   1299     self.command_header +\
   1300     args_command +\
   1301     proto.END_COMMAND_PART
   1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
   1305     answer, self.gateway_client, self.target_id, self.name)
   1307 for temp_arg in temp_args:
   1308     temp_arg._detach()

File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\pyspark\sql\utils.py:117, in capture_sql_exception.<locals>.deco(*a, **kw)
    115 def deco(*a, **kw):
    116     try:
--> 117         return f(*a, **kw)
    118     except py4j.protocol.Py4JJavaError as e:
    119         converted = convert_exception(e.java_exception)

File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\py4j\protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
    324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325 if answer[1] == REFERENCE_TYPE:
--> 326     raise Py4JJavaError(
    327         "An error occurred while calling {0}{1}{2}.\n".
    328         format(target_id, ".", name), value)
    329 else:
    330     raise Py4JError(
    331         "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332         format(target_id, ".", name, value))

Py4JJavaError: An error occurred while calling o27.load.
: com.databricks.service.SparkServiceRemoteException: com.google.common.util.concurrent.UncheckedExecutionException: com.databricks.common.client.DatabricksServiceHttpClientException: 403: Your token is missing the required scopes for this endpoint.
    at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2199)
    at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
    at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4721)
    at com.databricks.backend.daemon.driver.credentials.CachingCredentialStore.get(CachingCredentialStore.scala:60)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.refreshToken(OAuthTokenRefresherClient.scala:83)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.newToken(OAuthTokenRefresherClient.scala:132)
    at org.apache.spark.credentials.RuntimeCredential.getOrRefresh(CredentialContext.scala:51)
    at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredentialFromStore$2(CredentialContext.scala:221)
    at scala.Option.map(Option.scala:230)
    at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredentialFromStore$1(CredentialContext.scala:220)
    at scala.Option.map(Option.scala:230)
    at org.apache.spark.credentials.CredentialContext$.getCredentialFromStore(CredentialContext.scala:218)
    at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredential$6(CredentialContext.scala:213)
    at scala.Option.flatMap(Option.scala:271)
    at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredential$3(CredentialContext.scala:213)
    at scala.Option.orElse(Option.scala:447)
    at org.apache.spark.credentials.CredentialContext$.getCredential(CredentialContext.scala:213)
    at com.databricks.backend.daemon.data.client.adl.AdlGen2UpgradeCredentialContextTokenProvider.getToken(AdlGen2UpgradeCredentialContextTokenProvider.scala:26)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAccessToken(AbfsClient.java:912)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.executeHttpOperation(AbfsRestOperation.java:269)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:232)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:768)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:750)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getIsNamespaceEnabled(AzureBlobFileSystemStore.java:313)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getFileStatus(AzureBlobFileSystemStore.java:821)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:629)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.tryGetFileStatus(AzureBlobFileSystem.java:1146)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.createFileSystemIfNotExist(AzureBlobFileSystem.java:1461)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:142)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370)
    at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
    at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$1(DataSource.scala:845)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
    at scala.collection.immutable.List.foreach(List.scala:392)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
    at scala.collection.immutable.List.flatMap(List.scala:355)
    at org.apache.spark.sql.execution.datasources.DataSource$.checkAndGlobPathIfNecessary(DataSource.scala:843)
    at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:646)
    at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:461)
    at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:444)
    at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:400)
    at scala.Option.getOrElse(Option.scala:189)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:400)
    at com.databricks.service.SparkServiceRPCHandler$$anon$1.call(SparkServiceRPCHandler.scala:101)
    at com.databricks.service.SparkServiceRPCHandler$$anon$1.call(SparkServiceRPCHandler.scala:80)
    at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4724)
    at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
    at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
    at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
    at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
    at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
    at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4721)
    at com.databricks.service.SparkServiceRPCHandler$.getOrLoadAnonymousRelation(SparkServiceRPCHandler.scala:80)
    at com.databricks.service.SparkServiceRPCHandler.execute0(SparkServiceRPCHandler.scala:715)
    at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC0$1(SparkServiceRPCHandler.scala:477)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at com.databricks.service.SparkServiceRPCHandler.executeRPC0(SparkServiceRPCHandler.scala:372)
    at com.databricks.service.SparkServiceRPCHandler$$anon$2.call(SparkServiceRPCHandler.scala:323)
    at com.databricks.service.SparkServiceRPCHandler$$anon$2.call(SparkServiceRPCHandler.scala:309)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC$1(SparkServiceRPCHandler.scala:359)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at com.databricks.service.SparkServiceRPCHandler.executeRPC(SparkServiceRPCHandler.scala:336)
    at com.databricks.service.SparkServiceRPCServlet.doPost(SparkServiceRPCServer.scala:167)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
    at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
    at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:550)
    at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190)
    at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501)
    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
    at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
    at org.eclipse.jetty.server.Server.handle(Server.java:516)
    at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:388)
    at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:633)
    at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:380)
    at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
    at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
    at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
    at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
    at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:882)
    at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1036)
    at java.lang.Thread.run(Thread.java:748)
Caused by: com.databricks.common.client.DatabricksServiceHttpClientException: 403: Your token is missing the required scopes for this endpoint.
    at com.databricks.common.client.DatabricksServiceHttpClientException.copy(DBHttpClient.scala:802)
    at com.databricks.common.client.RawDBHttpClient.getResponseBody(DBHttpClient.scala:709)
    at com.databricks.common.client.RawDBHttpClient.$anonfun$httpRequestInternal$1(DBHttpClient.scala:654)
    at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:395)
    at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:484)
    at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:504)
    at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:266)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:261)
    at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:258)
    at com.databricks.common.client.RawDBHttpClient.withAttributionContext(DBHttpClient.scala:256)
    at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:305)
    at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:297)
    at com.databricks.common.client.RawDBHttpClient.withAttributionTags(DBHttpClient.scala:256)
    at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:479)
    at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:404)
    at com.databricks.common.client.RawDBHttpClient.recordOperationWithResultTags(DBHttpClient.scala:256)
    at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:395)
    at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:367)
    at com.databricks.common.client.RawDBHttpClient.recordOperation(DBHttpClient.scala:256)
    at com.databricks.common.client.RawDBHttpClient.httpRequestInternal(DBHttpClient.scala:642)
    at com.databricks.common.client.RawDBHttpClient.entityEnclosingRequestInternal(DBHttpClient.scala:632)
    at com.databricks.common.client.RawDBHttpClient.getInternal(DBHttpClient.scala:587)
    at com.databricks.common.client.RawDBHttpClient.getWithHeaders(DBHttpClient.scala:332)
    at com.databricks.common.client.RawDBHttpClient.get(DBHttpClient.scala:304)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.$anonfun$refreshToken$2(OAuthTokenRefresherClient.scala:92)
    at com.databricks.common.client.DBHttpClient$.retryWithDeadline(DBHttpClient.scala:173)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.reliably(OAuthTokenRefresherClient.scala:53)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.$anonfun$refreshToken$1(OAuthTokenRefresherClient.scala:92)
    at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4724)
    at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
    at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
    at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
    at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
    ... 84 more

    at org.apache.spark.sql.util.ProtoSerializer.deserializeException(ProtoSerializer.scala:6640)
    at com.databricks.service.SparkServiceRemoteFuncRunner.executeRPC(SparkServiceRemoteFuncRunner.scala:188)
    at com.databricks.service.SparkServiceRemoteFuncRunner.executeRPCHandleCancels(SparkServiceRemoteFuncRunner.scala:287)
    at com.databricks.service.SparkServiceRemoteFuncRunner.$anonfun$execute0$1(SparkServiceRemoteFuncRunner.scala:118)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at com.databricks.service.SparkServiceRemoteFuncRunner.withRetry(SparkServiceRemoteFuncRunner.scala:135)
    at com.databricks.service.SparkServiceRemoteFuncRunner.execute0(SparkServiceRemoteFuncRunner.scala:113)
    at com.databricks.service.SparkServiceRemoteFuncRunner.$anonfun$execute$1(SparkServiceRemoteFuncRunner.scala:86)
    at com.databricks.spark.util.Log4jUsageLogger.recordOperation(UsageLogger.scala:247)
    at com.databricks.spark.util.UsageLogging.recordOperation(UsageLogger.scala:429)
    at com.databricks.spark.util.UsageLogging.recordOperation$(UsageLogger.scala:408)
    at com.databricks.service.SparkServiceRPCClientStub.recordOperation(SparkServiceRPCClientStub.scala:58)
    at com.databricks.service.SparkServiceRemoteFuncRunner.execute(SparkServiceRemoteFuncRunner.scala:78)
    at com.databricks.service.SparkServiceRemoteFuncRunner.execute$(SparkServiceRemoteFuncRunner.scala:67)
    at com.databricks.service.SparkServiceRPCClientStub.execute(SparkServiceRPCClientStub.scala:58)
    at com.databricks.service.SparkServiceRPCClientStub.readDataFrame(SparkServiceRPCClientStub.scala:355)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:338)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:287)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
    at java.lang.reflect.Method.invoke(Unknown Source)
    at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
    at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
    at py4j.Gateway.invoke(Gateway.java:295)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:251)
    at java.lang.Thread.run(Unknown Source)

After a lot of struggling i finally managed to connect to databricks remotely only to find that i'm not allowed to load for some reason.

I'm posting here in the hope that someone out there has the answer.

What's trying to happen here is using databricks to read .avro files from an Azure storeage container.

from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
df = spark.read.format("avro").load("azure/storage/containerlocation")
df.show()

Error traceback:

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
c:\Users\Desktop\local_projects\rpi-bdc-rec-eng\training_service\tmp.py in <cell line: 92>()
---> 3 spark.read.format("avro").load("azure/storage/containerlocation")

File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\pyspark\sql\readwriter.py:204, in DataFrameReader.load(self, path, format, schema, **options)
    202 self.options(**options)
    203 if isinstance(path, str):
--> 204     return self._df(self._jreader.load(path))
    205 elif path is not None:
    206     if type(path) != list:

File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\py4j\java_gateway.py:1304, in JavaMember.__call__(self, *args)
   1298 command = proto.CALL_COMMAND_NAME +\
   1299     self.command_header +\
   1300     args_command +\
   1301     proto.END_COMMAND_PART
   1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
   1305     answer, self.gateway_client, self.target_id, self.name)
   1307 for temp_arg in temp_args:
   1308     temp_arg._detach()

File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\pyspark\sql\utils.py:117, in capture_sql_exception.<locals>.deco(*a, **kw)
    115 def deco(*a, **kw):
    116     try:
--> 117         return f(*a, **kw)
    118     except py4j.protocol.Py4JJavaError as e:
    119         converted = convert_exception(e.java_exception)

File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\py4j\protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
    324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325 if answer[1] == REFERENCE_TYPE:
--> 326     raise Py4JJavaError(
    327         "An error occurred while calling {0}{1}{2}.\n".
    328         format(target_id, ".", name), value)
    329 else:
    330     raise Py4JError(
    331         "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332         format(target_id, ".", name, value))

Py4JJavaError: An error occurred while calling o27.load.
: com.databricks.service.SparkServiceRemoteException: com.google.common.util.concurrent.UncheckedExecutionException: com.databricks.common.client.DatabricksServiceHttpClientException: 403: Your token is missing the required scopes for this endpoint.
    at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2199)
    at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
    at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4721)
    at com.databricks.backend.daemon.driver.credentials.CachingCredentialStore.get(CachingCredentialStore.scala:60)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.refreshToken(OAuthTokenRefresherClient.scala:83)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.newToken(OAuthTokenRefresherClient.scala:132)
    at org.apache.spark.credentials.RuntimeCredential.getOrRefresh(CredentialContext.scala:51)
    at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredentialFromStore$2(CredentialContext.scala:221)
    at scala.Option.map(Option.scala:230)
    at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredentialFromStore$1(CredentialContext.scala:220)
    at scala.Option.map(Option.scala:230)
    at org.apache.spark.credentials.CredentialContext$.getCredentialFromStore(CredentialContext.scala:218)
    at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredential$6(CredentialContext.scala:213)
    at scala.Option.flatMap(Option.scala:271)
    at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredential$3(CredentialContext.scala:213)
    at scala.Option.orElse(Option.scala:447)
    at org.apache.spark.credentials.CredentialContext$.getCredential(CredentialContext.scala:213)
    at com.databricks.backend.daemon.data.client.adl.AdlGen2UpgradeCredentialContextTokenProvider.getToken(AdlGen2UpgradeCredentialContextTokenProvider.scala:26)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAccessToken(AbfsClient.java:912)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.executeHttpOperation(AbfsRestOperation.java:269)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:232)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:768)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:750)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getIsNamespaceEnabled(AzureBlobFileSystemStore.java:313)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getFileStatus(AzureBlobFileSystemStore.java:821)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:629)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.tryGetFileStatus(AzureBlobFileSystem.java:1146)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.createFileSystemIfNotExist(AzureBlobFileSystem.java:1461)
    at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:142)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370)
    at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
    at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$1(DataSource.scala:845)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
    at scala.collection.immutable.List.foreach(List.scala:392)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
    at scala.collection.immutable.List.flatMap(List.scala:355)
    at org.apache.spark.sql.execution.datasources.DataSource$.checkAndGlobPathIfNecessary(DataSource.scala:843)
    at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:646)
    at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:461)
    at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:444)
    at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:400)
    at scala.Option.getOrElse(Option.scala:189)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:400)
    at com.databricks.service.SparkServiceRPCHandler$anon$1.call(SparkServiceRPCHandler.scala:101)
    at com.databricks.service.SparkServiceRPCHandler$anon$1.call(SparkServiceRPCHandler.scala:80)
    at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4724)
    at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
    at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
    at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
    at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
    at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
    at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4721)
    at com.databricks.service.SparkServiceRPCHandler$.getOrLoadAnonymousRelation(SparkServiceRPCHandler.scala:80)
    at com.databricks.service.SparkServiceRPCHandler.execute0(SparkServiceRPCHandler.scala:715)
    at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC0$1(SparkServiceRPCHandler.scala:477)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at com.databricks.service.SparkServiceRPCHandler.executeRPC0(SparkServiceRPCHandler.scala:372)
    at com.databricks.service.SparkServiceRPCHandler$anon$2.call(SparkServiceRPCHandler.scala:323)
    at com.databricks.service.SparkServiceRPCHandler$anon$2.call(SparkServiceRPCHandler.scala:309)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC$1(SparkServiceRPCHandler.scala:359)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at com.databricks.service.SparkServiceRPCHandler.executeRPC(SparkServiceRPCHandler.scala:336)
    at com.databricks.service.SparkServiceRPCServlet.doPost(SparkServiceRPCServer.scala:167)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
    at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
    at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:550)
    at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190)
    at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501)
    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
    at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
    at org.eclipse.jetty.server.Server.handle(Server.java:516)
    at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:388)
    at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:633)
    at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:380)
    at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
    at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
    at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
    at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
    at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:882)
    at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1036)
    at java.lang.Thread.run(Thread.java:748)
Caused by: com.databricks.common.client.DatabricksServiceHttpClientException: 403: Your token is missing the required scopes for this endpoint.
    at com.databricks.common.client.DatabricksServiceHttpClientException.copy(DBHttpClient.scala:802)
    at com.databricks.common.client.RawDBHttpClient.getResponseBody(DBHttpClient.scala:709)
    at com.databricks.common.client.RawDBHttpClient.$anonfun$httpRequestInternal$1(DBHttpClient.scala:654)
    at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:395)
    at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:484)
    at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:504)
    at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:266)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:261)
    at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:258)
    at com.databricks.common.client.RawDBHttpClient.withAttributionContext(DBHttpClient.scala:256)
    at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:305)
    at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:297)
    at com.databricks.common.client.RawDBHttpClient.withAttributionTags(DBHttpClient.scala:256)
    at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:479)
    at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:404)
    at com.databricks.common.client.RawDBHttpClient.recordOperationWithResultTags(DBHttpClient.scala:256)
    at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:395)
    at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:367)
    at com.databricks.common.client.RawDBHttpClient.recordOperation(DBHttpClient.scala:256)
    at com.databricks.common.client.RawDBHttpClient.httpRequestInternal(DBHttpClient.scala:642)
    at com.databricks.common.client.RawDBHttpClient.entityEnclosingRequestInternal(DBHttpClient.scala:632)
    at com.databricks.common.client.RawDBHttpClient.getInternal(DBHttpClient.scala:587)
    at com.databricks.common.client.RawDBHttpClient.getWithHeaders(DBHttpClient.scala:332)
    at com.databricks.common.client.RawDBHttpClient.get(DBHttpClient.scala:304)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.$anonfun$refreshToken$2(OAuthTokenRefresherClient.scala:92)
    at com.databricks.common.client.DBHttpClient$.retryWithDeadline(DBHttpClient.scala:173)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.reliably(OAuthTokenRefresherClient.scala:53)
    at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.$anonfun$refreshToken$1(OAuthTokenRefresherClient.scala:92)
    at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4724)
    at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
    at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
    at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
    at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
    ... 84 more

    at org.apache.spark.sql.util.ProtoSerializer.deserializeException(ProtoSerializer.scala:6640)
    at com.databricks.service.SparkServiceRemoteFuncRunner.executeRPC(SparkServiceRemoteFuncRunner.scala:188)
    at com.databricks.service.SparkServiceRemoteFuncRunner.executeRPCHandleCancels(SparkServiceRemoteFuncRunner.scala:287)
    at com.databricks.service.SparkServiceRemoteFuncRunner.$anonfun$execute0$1(SparkServiceRemoteFuncRunner.scala:118)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at com.databricks.service.SparkServiceRemoteFuncRunner.withRetry(SparkServiceRemoteFuncRunner.scala:135)
    at com.databricks.service.SparkServiceRemoteFuncRunner.execute0(SparkServiceRemoteFuncRunner.scala:113)
    at com.databricks.service.SparkServiceRemoteFuncRunner.$anonfun$execute$1(SparkServiceRemoteFuncRunner.scala:86)
    at com.databricks.spark.util.Log4jUsageLogger.recordOperation(UsageLogger.scala:247)
    at com.databricks.spark.util.UsageLogging.recordOperation(UsageLogger.scala:429)
    at com.databricks.spark.util.UsageLogging.recordOperation$(UsageLogger.scala:408)
    at com.databricks.service.SparkServiceRPCClientStub.recordOperation(SparkServiceRPCClientStub.scala:58)
    at com.databricks.service.SparkServiceRemoteFuncRunner.execute(SparkServiceRemoteFuncRunner.scala:78)
    at com.databricks.service.SparkServiceRemoteFuncRunner.execute$(SparkServiceRemoteFuncRunner.scala:67)
    at com.databricks.service.SparkServiceRPCClientStub.execute(SparkServiceRPCClientStub.scala:58)
    at com.databricks.service.SparkServiceRPCClientStub.readDataFrame(SparkServiceRPCClientStub.scala:355)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:338)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:287)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
    at java.lang.reflect.Method.invoke(Unknown Source)
    at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
    at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
    at py4j.Gateway.invoke(Gateway.java:295)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:251)
    at java.lang.Thread.run(Unknown Source)

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文