Databricks-connect无法加载数据
经过很多挣扎之后,我终于设法远程连接到Databricks,只是发现我由于某种原因不允许加载。
我要在这里发布,希望那里有人有答案。
在这里尝试发生的事情是使用数据链芯来读取.avro
从Azure Storeage容器中读取文件。
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
df = spark.read.format("avro").load("azure/storage/containerlocation")
df.show()
错误追溯:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
c:\Users\Desktop\local_projects\rpi-bdc-rec-eng\training_service\tmp.py in <cell line: 92>()
---> 3 spark.read.format("avro").load("azure/storage/containerlocation")
File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\pyspark\sql\readwriter.py:204, in DataFrameReader.load(self, path, format, schema, **options)
202 self.options(**options)
203 if isinstance(path, str):
--> 204 return self._df(self._jreader.load(path))
205 elif path is not None:
206 if type(path) != list:
File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\py4j\java_gateway.py:1304, in JavaMember.__call__(self, *args)
1298 command = proto.CALL_COMMAND_NAME +\
1299 self.command_header +\
1300 args_command +\
1301 proto.END_COMMAND_PART
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1307 for temp_arg in temp_args:
1308 temp_arg._detach()
File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\pyspark\sql\utils.py:117, in capture_sql_exception.<locals>.deco(*a, **kw)
115 def deco(*a, **kw):
116 try:
--> 117 return f(*a, **kw)
118 except py4j.protocol.Py4JJavaError as e:
119 converted = convert_exception(e.java_exception)
File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\py4j\protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
Py4JJavaError: An error occurred while calling o27.load.
: com.databricks.service.SparkServiceRemoteException: com.google.common.util.concurrent.UncheckedExecutionException: com.databricks.common.client.DatabricksServiceHttpClientException: 403: Your token is missing the required scopes for this endpoint.
at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2199)
at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4721)
at com.databricks.backend.daemon.driver.credentials.CachingCredentialStore.get(CachingCredentialStore.scala:60)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.refreshToken(OAuthTokenRefresherClient.scala:83)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.newToken(OAuthTokenRefresherClient.scala:132)
at org.apache.spark.credentials.RuntimeCredential.getOrRefresh(CredentialContext.scala:51)
at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredentialFromStore$2(CredentialContext.scala:221)
at scala.Option.map(Option.scala:230)
at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredentialFromStore$1(CredentialContext.scala:220)
at scala.Option.map(Option.scala:230)
at org.apache.spark.credentials.CredentialContext$.getCredentialFromStore(CredentialContext.scala:218)
at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredential$6(CredentialContext.scala:213)
at scala.Option.flatMap(Option.scala:271)
at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredential$3(CredentialContext.scala:213)
at scala.Option.orElse(Option.scala:447)
at org.apache.spark.credentials.CredentialContext$.getCredential(CredentialContext.scala:213)
at com.databricks.backend.daemon.data.client.adl.AdlGen2UpgradeCredentialContextTokenProvider.getToken(AdlGen2UpgradeCredentialContextTokenProvider.scala:26)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAccessToken(AbfsClient.java:912)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.executeHttpOperation(AbfsRestOperation.java:269)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:232)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:768)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:750)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getIsNamespaceEnabled(AzureBlobFileSystemStore.java:313)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getFileStatus(AzureBlobFileSystemStore.java:821)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:629)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.tryGetFileStatus(AzureBlobFileSystem.java:1146)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.createFileSystemIfNotExist(AzureBlobFileSystem.java:1461)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:142)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$1(DataSource.scala:845)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
at scala.collection.immutable.List.flatMap(List.scala:355)
at org.apache.spark.sql.execution.datasources.DataSource$.checkAndGlobPathIfNecessary(DataSource.scala:843)
at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:646)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:461)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:444)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:400)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:400)
at com.databricks.service.SparkServiceRPCHandler$$anon$1.call(SparkServiceRPCHandler.scala:101)
at com.databricks.service.SparkServiceRPCHandler$$anon$1.call(SparkServiceRPCHandler.scala:80)
at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4724)
at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4721)
at com.databricks.service.SparkServiceRPCHandler$.getOrLoadAnonymousRelation(SparkServiceRPCHandler.scala:80)
at com.databricks.service.SparkServiceRPCHandler.execute0(SparkServiceRPCHandler.scala:715)
at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC0$1(SparkServiceRPCHandler.scala:477)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.service.SparkServiceRPCHandler.executeRPC0(SparkServiceRPCHandler.scala:372)
at com.databricks.service.SparkServiceRPCHandler$$anon$2.call(SparkServiceRPCHandler.scala:323)
at com.databricks.service.SparkServiceRPCHandler$$anon$2.call(SparkServiceRPCHandler.scala:309)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC$1(SparkServiceRPCHandler.scala:359)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.service.SparkServiceRPCHandler.executeRPC(SparkServiceRPCHandler.scala:336)
at com.databricks.service.SparkServiceRPCServlet.doPost(SparkServiceRPCServer.scala:167)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:550)
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
at org.eclipse.jetty.server.Server.handle(Server.java:516)
at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:388)
at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:633)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:380)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:882)
at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1036)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.databricks.common.client.DatabricksServiceHttpClientException: 403: Your token is missing the required scopes for this endpoint.
at com.databricks.common.client.DatabricksServiceHttpClientException.copy(DBHttpClient.scala:802)
at com.databricks.common.client.RawDBHttpClient.getResponseBody(DBHttpClient.scala:709)
at com.databricks.common.client.RawDBHttpClient.$anonfun$httpRequestInternal$1(DBHttpClient.scala:654)
at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:395)
at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:484)
at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:504)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:266)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:261)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:258)
at com.databricks.common.client.RawDBHttpClient.withAttributionContext(DBHttpClient.scala:256)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:305)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:297)
at com.databricks.common.client.RawDBHttpClient.withAttributionTags(DBHttpClient.scala:256)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:479)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:404)
at com.databricks.common.client.RawDBHttpClient.recordOperationWithResultTags(DBHttpClient.scala:256)
at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:395)
at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:367)
at com.databricks.common.client.RawDBHttpClient.recordOperation(DBHttpClient.scala:256)
at com.databricks.common.client.RawDBHttpClient.httpRequestInternal(DBHttpClient.scala:642)
at com.databricks.common.client.RawDBHttpClient.entityEnclosingRequestInternal(DBHttpClient.scala:632)
at com.databricks.common.client.RawDBHttpClient.getInternal(DBHttpClient.scala:587)
at com.databricks.common.client.RawDBHttpClient.getWithHeaders(DBHttpClient.scala:332)
at com.databricks.common.client.RawDBHttpClient.get(DBHttpClient.scala:304)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.$anonfun$refreshToken$2(OAuthTokenRefresherClient.scala:92)
at com.databricks.common.client.DBHttpClient$.retryWithDeadline(DBHttpClient.scala:173)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.reliably(OAuthTokenRefresherClient.scala:53)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.$anonfun$refreshToken$1(OAuthTokenRefresherClient.scala:92)
at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4724)
at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
... 84 more
at org.apache.spark.sql.util.ProtoSerializer.deserializeException(ProtoSerializer.scala:6640)
at com.databricks.service.SparkServiceRemoteFuncRunner.executeRPC(SparkServiceRemoteFuncRunner.scala:188)
at com.databricks.service.SparkServiceRemoteFuncRunner.executeRPCHandleCancels(SparkServiceRemoteFuncRunner.scala:287)
at com.databricks.service.SparkServiceRemoteFuncRunner.$anonfun$execute0$1(SparkServiceRemoteFuncRunner.scala:118)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.service.SparkServiceRemoteFuncRunner.withRetry(SparkServiceRemoteFuncRunner.scala:135)
at com.databricks.service.SparkServiceRemoteFuncRunner.execute0(SparkServiceRemoteFuncRunner.scala:113)
at com.databricks.service.SparkServiceRemoteFuncRunner.$anonfun$execute$1(SparkServiceRemoteFuncRunner.scala:86)
at com.databricks.spark.util.Log4jUsageLogger.recordOperation(UsageLogger.scala:247)
at com.databricks.spark.util.UsageLogging.recordOperation(UsageLogger.scala:429)
at com.databricks.spark.util.UsageLogging.recordOperation$(UsageLogger.scala:408)
at com.databricks.service.SparkServiceRPCClientStub.recordOperation(SparkServiceRPCClientStub.scala:58)
at com.databricks.service.SparkServiceRemoteFuncRunner.execute(SparkServiceRemoteFuncRunner.scala:78)
at com.databricks.service.SparkServiceRemoteFuncRunner.execute$(SparkServiceRemoteFuncRunner.scala:67)
at com.databricks.service.SparkServiceRPCClientStub.execute(SparkServiceRPCClientStub.scala:58)
at com.databricks.service.SparkServiceRPCClientStub.readDataFrame(SparkServiceRPCClientStub.scala:355)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:338)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:287)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Unknown Source)
After a lot of struggling i finally managed to connect to databricks remotely only to find that i'm not allowed to load for some reason.
I'm posting here in the hope that someone out there has the answer.
What's trying to happen here is using databricks to read .avro
files from an Azure storeage container.
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
df = spark.read.format("avro").load("azure/storage/containerlocation")
df.show()
Error traceback:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
c:\Users\Desktop\local_projects\rpi-bdc-rec-eng\training_service\tmp.py in <cell line: 92>()
---> 3 spark.read.format("avro").load("azure/storage/containerlocation")
File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\pyspark\sql\readwriter.py:204, in DataFrameReader.load(self, path, format, schema, **options)
202 self.options(**options)
203 if isinstance(path, str):
--> 204 return self._df(self._jreader.load(path))
205 elif path is not None:
206 if type(path) != list:
File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\py4j\java_gateway.py:1304, in JavaMember.__call__(self, *args)
1298 command = proto.CALL_COMMAND_NAME +\
1299 self.command_header +\
1300 args_command +\
1301 proto.END_COMMAND_PART
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1307 for temp_arg in temp_args:
1308 temp_arg._detach()
File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\pyspark\sql\utils.py:117, in capture_sql_exception.<locals>.deco(*a, **kw)
115 def deco(*a, **kw):
116 try:
--> 117 return f(*a, **kw)
118 except py4j.protocol.Py4JJavaError as e:
119 converted = convert_exception(e.java_exception)
File ~\AppData\Local\pypoetry\Cache\virtualenvs\rec-eng-training-service-vcboY0OA-py3.8\lib\site-packages\py4j\protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
Py4JJavaError: An error occurred while calling o27.load.
: com.databricks.service.SparkServiceRemoteException: com.google.common.util.concurrent.UncheckedExecutionException: com.databricks.common.client.DatabricksServiceHttpClientException: 403: Your token is missing the required scopes for this endpoint.
at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2199)
at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4721)
at com.databricks.backend.daemon.driver.credentials.CachingCredentialStore.get(CachingCredentialStore.scala:60)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.refreshToken(OAuthTokenRefresherClient.scala:83)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.newToken(OAuthTokenRefresherClient.scala:132)
at org.apache.spark.credentials.RuntimeCredential.getOrRefresh(CredentialContext.scala:51)
at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredentialFromStore$2(CredentialContext.scala:221)
at scala.Option.map(Option.scala:230)
at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredentialFromStore$1(CredentialContext.scala:220)
at scala.Option.map(Option.scala:230)
at org.apache.spark.credentials.CredentialContext$.getCredentialFromStore(CredentialContext.scala:218)
at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredential$6(CredentialContext.scala:213)
at scala.Option.flatMap(Option.scala:271)
at org.apache.spark.credentials.CredentialContext$.$anonfun$getCredential$3(CredentialContext.scala:213)
at scala.Option.orElse(Option.scala:447)
at org.apache.spark.credentials.CredentialContext$.getCredential(CredentialContext.scala:213)
at com.databricks.backend.daemon.data.client.adl.AdlGen2UpgradeCredentialContextTokenProvider.getToken(AdlGen2UpgradeCredentialContextTokenProvider.scala:26)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAccessToken(AbfsClient.java:912)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.executeHttpOperation(AbfsRestOperation.java:269)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:232)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:768)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:750)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getIsNamespaceEnabled(AzureBlobFileSystemStore.java:313)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getFileStatus(AzureBlobFileSystemStore.java:821)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:629)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.tryGetFileStatus(AzureBlobFileSystem.java:1146)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.createFileSystemIfNotExist(AzureBlobFileSystem.java:1461)
at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:142)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$1(DataSource.scala:845)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
at scala.collection.immutable.List.flatMap(List.scala:355)
at org.apache.spark.sql.execution.datasources.DataSource$.checkAndGlobPathIfNecessary(DataSource.scala:843)
at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:646)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:461)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:444)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:400)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:400)
at com.databricks.service.SparkServiceRPCHandler$anon$1.call(SparkServiceRPCHandler.scala:101)
at com.databricks.service.SparkServiceRPCHandler$anon$1.call(SparkServiceRPCHandler.scala:80)
at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4724)
at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4721)
at com.databricks.service.SparkServiceRPCHandler$.getOrLoadAnonymousRelation(SparkServiceRPCHandler.scala:80)
at com.databricks.service.SparkServiceRPCHandler.execute0(SparkServiceRPCHandler.scala:715)
at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC0$1(SparkServiceRPCHandler.scala:477)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.service.SparkServiceRPCHandler.executeRPC0(SparkServiceRPCHandler.scala:372)
at com.databricks.service.SparkServiceRPCHandler$anon$2.call(SparkServiceRPCHandler.scala:323)
at com.databricks.service.SparkServiceRPCHandler$anon$2.call(SparkServiceRPCHandler.scala:309)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC$1(SparkServiceRPCHandler.scala:359)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.service.SparkServiceRPCHandler.executeRPC(SparkServiceRPCHandler.scala:336)
at com.databricks.service.SparkServiceRPCServlet.doPost(SparkServiceRPCServer.scala:167)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:550)
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
at org.eclipse.jetty.server.Server.handle(Server.java:516)
at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:388)
at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:633)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:380)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:882)
at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1036)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.databricks.common.client.DatabricksServiceHttpClientException: 403: Your token is missing the required scopes for this endpoint.
at com.databricks.common.client.DatabricksServiceHttpClientException.copy(DBHttpClient.scala:802)
at com.databricks.common.client.RawDBHttpClient.getResponseBody(DBHttpClient.scala:709)
at com.databricks.common.client.RawDBHttpClient.$anonfun$httpRequestInternal$1(DBHttpClient.scala:654)
at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:395)
at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:484)
at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:504)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:266)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:261)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:258)
at com.databricks.common.client.RawDBHttpClient.withAttributionContext(DBHttpClient.scala:256)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:305)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:297)
at com.databricks.common.client.RawDBHttpClient.withAttributionTags(DBHttpClient.scala:256)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:479)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:404)
at com.databricks.common.client.RawDBHttpClient.recordOperationWithResultTags(DBHttpClient.scala:256)
at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:395)
at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:367)
at com.databricks.common.client.RawDBHttpClient.recordOperation(DBHttpClient.scala:256)
at com.databricks.common.client.RawDBHttpClient.httpRequestInternal(DBHttpClient.scala:642)
at com.databricks.common.client.RawDBHttpClient.entityEnclosingRequestInternal(DBHttpClient.scala:632)
at com.databricks.common.client.RawDBHttpClient.getInternal(DBHttpClient.scala:587)
at com.databricks.common.client.RawDBHttpClient.getWithHeaders(DBHttpClient.scala:332)
at com.databricks.common.client.RawDBHttpClient.get(DBHttpClient.scala:304)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.$anonfun$refreshToken$2(OAuthTokenRefresherClient.scala:92)
at com.databricks.common.client.DBHttpClient$.retryWithDeadline(DBHttpClient.scala:173)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.reliably(OAuthTokenRefresherClient.scala:53)
at com.databricks.backend.daemon.driver.credentials.OAuthTokenRefresherClient.$anonfun$refreshToken$1(OAuthTokenRefresherClient.scala:92)
at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4724)
at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
... 84 more
at org.apache.spark.sql.util.ProtoSerializer.deserializeException(ProtoSerializer.scala:6640)
at com.databricks.service.SparkServiceRemoteFuncRunner.executeRPC(SparkServiceRemoteFuncRunner.scala:188)
at com.databricks.service.SparkServiceRemoteFuncRunner.executeRPCHandleCancels(SparkServiceRemoteFuncRunner.scala:287)
at com.databricks.service.SparkServiceRemoteFuncRunner.$anonfun$execute0$1(SparkServiceRemoteFuncRunner.scala:118)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.service.SparkServiceRemoteFuncRunner.withRetry(SparkServiceRemoteFuncRunner.scala:135)
at com.databricks.service.SparkServiceRemoteFuncRunner.execute0(SparkServiceRemoteFuncRunner.scala:113)
at com.databricks.service.SparkServiceRemoteFuncRunner.$anonfun$execute$1(SparkServiceRemoteFuncRunner.scala:86)
at com.databricks.spark.util.Log4jUsageLogger.recordOperation(UsageLogger.scala:247)
at com.databricks.spark.util.UsageLogging.recordOperation(UsageLogger.scala:429)
at com.databricks.spark.util.UsageLogging.recordOperation$(UsageLogger.scala:408)
at com.databricks.service.SparkServiceRPCClientStub.recordOperation(SparkServiceRPCClientStub.scala:58)
at com.databricks.service.SparkServiceRemoteFuncRunner.execute(SparkServiceRemoteFuncRunner.scala:78)
at com.databricks.service.SparkServiceRemoteFuncRunner.execute$(SparkServiceRemoteFuncRunner.scala:67)
at com.databricks.service.SparkServiceRPCClientStub.execute(SparkServiceRPCClientStub.scala:58)
at com.databricks.service.SparkServiceRPCClientStub.readDataFrame(SparkServiceRPCClientStub.scala:355)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:338)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:287)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Unknown Source)
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论