BlackBerry 使用 SAX 解析器解析 UTF-8 XML 文件
我正在尝试使用 SAX 解析器解析 UTF-8 xml 文件,并且我使用了解析器,但它导致了一个异常,它是消息“期望一个元素”
<?xml version='1.0' encoding='UTF-8' standalone='yes' ?>
<config>
<filepath>/mnt/sdcard/Audio_Recorder/anonymous22242.3gp</filepath>
<filename>anonymous22242.3gp</filename>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:06</timestamp>
<note>test1</note>
</annotation>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:09</timestamp>
<note>لول</note>
</annotation>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:09</timestamp>
<note>لولو</note>
</annotation>
</config>
private static String fileDirectory;
private final static ArrayList<String> allFileNames = new ArrayList<String>();
private final static ArrayList<String[]> allAnnotations = new ArrayList<String[]>();
private static String[] currentAnnotation = new String[3];
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser playbackParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean audioFullPath = false;
boolean audioName = false;
boolean annotationFile = false;
boolean annotationTimestamp = false;
boolean annotationNote = false;
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("filepath")) {
audioFullPath = true;
}
if (qName.equalsIgnoreCase("filename")) {
audioName = true;
}
if (qName.equalsIgnoreCase("file")) {
annotationFile = true;
}
if (qName.equalsIgnoreCase("timestamp")) {
annotationTimestamp = true;
}
if (qName.equalsIgnoreCase("note")) {
annotationNote = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (audioFullPath) {
String filePath = new String(ch, start, length);
System.out.println("Full Path : " + filePath);
fileDirectory = filePath;
audioFullPath = false;
}
if (audioName) {
String fileName = new String(ch, start, length);
System.out.println("File Name : " + fileName);
allFileNames.add(fileName);
audioName = false;
}
if (annotationFile) {
String fileName = new String(ch, start, length);
currentAnnotation[0] = fileName;
annotationFile = false;
}
if (annotationTimestamp) {
String timestamp = new String(ch, start, length);
currentAnnotation[1] = timestamp;
annotationTimestamp = false;
}
if (annotationNote) {
String note = new String(ch, start, length);
currentAnnotation[2] = note;
annotationNote = false;
allAnnotations.add(currentAnnotation);
}
}
};
InputStream inputStream = getStream("http://www.example.com/example.xml");
Reader xmlReader = new InputStreamReader(inputStream, "UTF-8");
InputSource xmlSource = new InputSource(xmlReader);
xmlSource.setEncoding("UTF-8");
playbackParser.parse(xmlSource, handler);
System.out.println(fileDirectory);
System.out.println(allFileNames);
System.out.println(allAnnotations);
} catch (Exception e) {
e.printStackTrace();
}
}
}
public Static InputStream getStream(String url)
{
try
{
connection = getConnection(url);
connection.setRequestProperty("User-Agent",System.getProperty("microedition.profiles"));
connection.setRequestProperty("Connection", "Keep-Alive");
connection.setRequestProperty("Content-Type", "text/xml; charset=UTF-8");
inputStream = connection.openInputStream();
return inputStream;
}
catch(Exception e)
{
System.out.println("NNNNNNN "+e.getMessage());
return null;
}
}
public HttpConnection getConnection(String url)
{
try
{
connection = (HttpConnection) Connector.open(url+getConnectionString());
}
catch(Exception e)
{
}
return connection;
}
但是当我传递给解析方法时,inputStream 而不是 inputSource 它会解析文件,但之间的阿拉伯字符仍然存在问题
playbackParser.parse(inputStream, handler);
I am trying to parse a UTF-8 xml file using SAX parser and i used the parser but it results an exception it's message "Expecting an element"
<?xml version='1.0' encoding='UTF-8' standalone='yes' ?>
<config>
<filepath>/mnt/sdcard/Audio_Recorder/anonymous22242.3gp</filepath>
<filename>anonymous22242.3gp</filename>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:06</timestamp>
<note>test1</note>
</annotation>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:09</timestamp>
<note>لول</note>
</annotation>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:09</timestamp>
<note>لولو</note>
</annotation>
</config>
private static String fileDirectory;
private final static ArrayList<String> allFileNames = new ArrayList<String>();
private final static ArrayList<String[]> allAnnotations = new ArrayList<String[]>();
private static String[] currentAnnotation = new String[3];
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser playbackParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean audioFullPath = false;
boolean audioName = false;
boolean annotationFile = false;
boolean annotationTimestamp = false;
boolean annotationNote = false;
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("filepath")) {
audioFullPath = true;
}
if (qName.equalsIgnoreCase("filename")) {
audioName = true;
}
if (qName.equalsIgnoreCase("file")) {
annotationFile = true;
}
if (qName.equalsIgnoreCase("timestamp")) {
annotationTimestamp = true;
}
if (qName.equalsIgnoreCase("note")) {
annotationNote = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (audioFullPath) {
String filePath = new String(ch, start, length);
System.out.println("Full Path : " + filePath);
fileDirectory = filePath;
audioFullPath = false;
}
if (audioName) {
String fileName = new String(ch, start, length);
System.out.println("File Name : " + fileName);
allFileNames.add(fileName);
audioName = false;
}
if (annotationFile) {
String fileName = new String(ch, start, length);
currentAnnotation[0] = fileName;
annotationFile = false;
}
if (annotationTimestamp) {
String timestamp = new String(ch, start, length);
currentAnnotation[1] = timestamp;
annotationTimestamp = false;
}
if (annotationNote) {
String note = new String(ch, start, length);
currentAnnotation[2] = note;
annotationNote = false;
allAnnotations.add(currentAnnotation);
}
}
};
InputStream inputStream = getStream("http://www.example.com/example.xml");
Reader xmlReader = new InputStreamReader(inputStream, "UTF-8");
InputSource xmlSource = new InputSource(xmlReader);
xmlSource.setEncoding("UTF-8");
playbackParser.parse(xmlSource, handler);
System.out.println(fileDirectory);
System.out.println(allFileNames);
System.out.println(allAnnotations);
} catch (Exception e) {
e.printStackTrace();
}
}
}
public Static InputStream getStream(String url)
{
try
{
connection = getConnection(url);
connection.setRequestProperty("User-Agent",System.getProperty("microedition.profiles"));
connection.setRequestProperty("Connection", "Keep-Alive");
connection.setRequestProperty("Content-Type", "text/xml; charset=UTF-8");
inputStream = connection.openInputStream();
return inputStream;
}
catch(Exception e)
{
System.out.println("NNNNNNN "+e.getMessage());
return null;
}
}
public HttpConnection getConnection(String url)
{
try
{
connection = (HttpConnection) Connector.open(url+getConnectionString());
}
catch(Exception e)
{
}
return connection;
}
but when i pass to the parse method the inputStream instead of inputSource it parses the file but still have a problem with Arabic characters between
playbackParser.parse(inputStream, handler);
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
您显示的 XML 中包含未编码的阿拉伯字符。这违反了 XML 声明的编码,这意味着 XML 格式错误。 SAX 解析器按顺序逐段处理数据,为每个片段触发事件。在到达包含这些错误字符的片段之前,它不会检测到此类编码错误。你对此无能为力。 XML 需要由其原作者修复。
The XML you showed has unencoded Arabic characters in it. That is in violation of the XML's declared Encoding, which means the XML is malformed. A SAX parser processes data piece by piece sequentially, triggering events for each piece. It will not detect such an encoding error until it reaches the piece that contains those erroneous characters. There is nothing you can do about that. The XML needs to be fixed by its original author.