quick and dirty solution for download/audio bug

Youtube has changed (again) this time it was a bit harder,
therfor the changes are deeper. May be still a bit unstable.
This commit is contained in:
Christian Schabesberger 2016-02-12 01:29:14 +01:00
parent 60d636940d
commit 751ffb9de9
15 changed files with 308 additions and 130 deletions

View file

@ -37,6 +37,7 @@ public class YoutubeSearchEngineTest extends AndroidTestCase {
public void setUp() throws Exception{ public void setUp() throws Exception{
super.setUp(); super.setUp();
SearchEngine engine = new YoutubeSearchEngine(); SearchEngine engine = new YoutubeSearchEngine();
result = engine.search("https://www.youtube.com/results?search_query=bla", result = engine.search("https://www.youtube.com/results?search_query=bla",
0, "de", new Downloader()); 0, "de", new Downloader());
suggestionReply = engine.suggestionList("hello", new Downloader()); suggestionReply = engine.suggestionList("hello", new Downloader());

View file

@ -1,12 +1,11 @@
package org.schabi.newpipe.services.youtube; package org.schabi.newpipe.services.youtube;
import android.test.AndroidTestCase; import android.test.AndroidTestCase;
import android.util.Log;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.CrawlingException;
import org.schabi.newpipe.crawler.ParsingException; import org.schabi.newpipe.crawler.ParsingException;
import org.schabi.newpipe.crawler.services.youtube.YoutubeVideoExtractor; import org.schabi.newpipe.crawler.services.youtube.YoutubeStreamExtractor;
import org.schabi.newpipe.crawler.VideoInfo; import org.schabi.newpipe.crawler.VideoInfo;
import java.io.IOException; import java.io.IOException;
@ -31,11 +30,15 @@ import java.io.IOException;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>. * along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/ */
public class YoutubeVideoExtractorDefaultTest extends AndroidTestCase { public class YoutubeStreamExtractorDefaultTest extends AndroidTestCase {
private YoutubeVideoExtractor extractor; private YoutubeStreamExtractor extractor;
public void setUp() throws IOException, CrawlingException { public void setUp() throws IOException, CrawlingException {
extractor = new YoutubeVideoExtractor("https://www.youtube.com/watch?v=FmG385_uUys", /* some anonymus video test
extractor = new YoutubeStreamExtractor("https://www.youtube.com/watch?v=FmG385_uUys",
new Downloader()); */
/* some vevo video (suggested to test against) */
extractor = new YoutubeStreamExtractor("https://www.youtube.com/watch?v=YQHsXMglC9A",
new Downloader()); new Downloader());
} }
@ -45,8 +48,8 @@ public class YoutubeVideoExtractorDefaultTest extends AndroidTestCase {
} }
public void testGetValidTimeStamp() throws CrawlingException, IOException { public void testGetValidTimeStamp() throws CrawlingException, IOException {
YoutubeVideoExtractor extractor = YoutubeStreamExtractor extractor =
new YoutubeVideoExtractor("https://youtu.be/FmG385_uUys?t=174", new Downloader()); new YoutubeStreamExtractor("https://youtu.be/FmG385_uUys?t=174", new Downloader());
assertTrue(Integer.toString(extractor.getTimeStamp()), assertTrue(Integer.toString(extractor.getTimeStamp()),
extractor.getTimeStamp() == 174); extractor.getTimeStamp() == 174);
} }
@ -86,7 +89,7 @@ public class YoutubeVideoExtractorDefaultTest extends AndroidTestCase {
} }
public void testGetAudioStreams() throws ParsingException { public void testGetAudioStreams() throws ParsingException {
assertTrue(extractor.getAudioStreams() == null); assertTrue(!extractor.getAudioStreams().isEmpty());
} }
public void testGetVideoStreams() throws ParsingException { public void testGetVideoStreams() throws ParsingException {
@ -98,4 +101,9 @@ public class YoutubeVideoExtractorDefaultTest extends AndroidTestCase {
0 <= s.format && s.format <= 4); 0 <= s.format && s.format <= 4);
} }
} }
public void testGetDashMpd() throws ParsingException {
assertTrue(extractor.getDashMpdUrl(),
!extractor.getDashMpdUrl().isEmpty());
}
} }

View file

@ -4,9 +4,7 @@ import android.test.AndroidTestCase;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.CrawlingException;
import org.schabi.newpipe.crawler.services.youtube.YoutubeVideoExtractor; import org.schabi.newpipe.crawler.services.youtube.YoutubeStreamExtractor;
import org.schabi.newpipe.crawler.VideoInfo;
import org.schabi.newpipe.Downloader;
import java.io.IOException; import java.io.IOException;
@ -32,7 +30,7 @@ import java.io.IOException;
// This class only works in Germany. // This class only works in Germany.
public class YoutubeVideoExtractorGemaTest extends AndroidTestCase { public class YoutubeStreamExtractorGemaTest extends AndroidTestCase {
// Deaktivate this Test Case bevore uploading it githup, otherwise CI will fail. // Deaktivate this Test Case bevore uploading it githup, otherwise CI will fail.
private static final boolean testActive = false; private static final boolean testActive = false;
@ -40,10 +38,10 @@ public class YoutubeVideoExtractorGemaTest extends AndroidTestCase {
public void testGemaError() throws IOException, CrawlingException { public void testGemaError() throws IOException, CrawlingException {
if(testActive) { if(testActive) {
try { try {
new YoutubeVideoExtractor("https://www.youtube.com/watch?v=3O1_3zBUKM8", new YoutubeStreamExtractor("https://www.youtube.com/watch?v=3O1_3zBUKM8",
new Downloader()); new Downloader());
assertTrue("Gema exception not thrown", false); assertTrue("Gema exception not thrown", false);
} catch(YoutubeVideoExtractor.GemaException ge) { } catch(YoutubeStreamExtractor.GemaException ge) {
assertTrue(true); assertTrue(true);
} }
} }

View file

@ -71,7 +71,7 @@ public class VideoItemDetailActivity extends AppCompatActivity {
if (getIntent().getData() != null) { if (getIntent().getData() != null) {
videoUrl = getIntent().getData().toString(); videoUrl = getIntent().getData().toString();
StreamingService[] serviceList = ServiceList.getServices(); StreamingService[] serviceList = ServiceList.getServices();
//VideoExtractor videoExtractor = null; //StreamExtractor videoExtractor = null;
for (int i = 0; i < serviceList.length; i++) { for (int i = 0; i < serviceList.length; i++) {
if (serviceList[i].getUrlIdHandler().acceptUrl(videoUrl)) { if (serviceList[i].getUrlIdHandler().acceptUrl(videoUrl)) {
arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i); arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i);

View file

@ -49,11 +49,11 @@ import java.util.Vector;
import org.schabi.newpipe.crawler.MediaFormat; import org.schabi.newpipe.crawler.MediaFormat;
import org.schabi.newpipe.crawler.ParsingException; import org.schabi.newpipe.crawler.ParsingException;
import org.schabi.newpipe.crawler.ServiceList; import org.schabi.newpipe.crawler.ServiceList;
import org.schabi.newpipe.crawler.StreamExtractor;
import org.schabi.newpipe.crawler.VideoPreviewInfo; import org.schabi.newpipe.crawler.VideoPreviewInfo;
import org.schabi.newpipe.crawler.VideoExtractor;
import org.schabi.newpipe.crawler.StreamingService; import org.schabi.newpipe.crawler.StreamingService;
import org.schabi.newpipe.crawler.VideoInfo; import org.schabi.newpipe.crawler.VideoInfo;
import org.schabi.newpipe.crawler.services.youtube.YoutubeVideoExtractor; import org.schabi.newpipe.crawler.services.youtube.YoutubeStreamExtractor;
/** /**
@ -115,7 +115,7 @@ public class VideoItemDetailFragment extends Fragment {
private class VideoExtractorRunnable implements Runnable { private class VideoExtractorRunnable implements Runnable {
private final Handler h = new Handler(); private final Handler h = new Handler();
private VideoExtractor videoExtractor; private StreamExtractor streamExtractor;
private final StreamingService service; private final StreamingService service;
private final String videoUrl; private final String videoUrl;
@ -127,8 +127,8 @@ public class VideoItemDetailFragment extends Fragment {
@Override @Override
public void run() { public void run() {
try { try {
videoExtractor = service.getExtractorInstance(videoUrl, new Downloader()); streamExtractor = service.getExtractorInstance(videoUrl, new Downloader());
VideoInfo videoInfo = VideoInfo.getVideoInfo(videoExtractor, new Downloader()); VideoInfo videoInfo = VideoInfo.getVideoInfo(streamExtractor, new Downloader());
h.post(new VideoResultReturnedRunnable(videoInfo)); h.post(new VideoResultReturnedRunnable(videoInfo));
} catch (IOException e) { } catch (IOException e) {
@ -136,10 +136,10 @@ public class VideoItemDetailFragment extends Fragment {
e.printStackTrace(); e.printStackTrace();
} }
// custom service related exceptions // custom service related exceptions
catch (YoutubeVideoExtractor.DecryptException de) { catch (YoutubeStreamExtractor.DecryptException de) {
postNewErrorToast(h, R.string.youtube_signature_decryption_error); postNewErrorToast(h, R.string.youtube_signature_decryption_error);
de.printStackTrace(); de.printStackTrace();
} catch (YoutubeVideoExtractor.GemaException ge) { } catch (YoutubeStreamExtractor.GemaException ge) {
h.post(new Runnable() { h.post(new Runnable() {
@Override @Override
public void run() { public void run() {
@ -148,7 +148,7 @@ public class VideoItemDetailFragment extends Fragment {
}); });
} }
// ---------------------------------------- // ----------------------------------------
catch(VideoExtractor.ContentNotAvailableException e) { catch(StreamExtractor.ContentNotAvailableException e) {
h.post(new Runnable() { h.post(new Runnable() {
@Override @Override
public void run() { public void run() {

View file

@ -74,6 +74,7 @@ public class DashMpdParser {
break; break;
case XmlPullParser.TEXT: case XmlPullParser.TEXT:
// actual stream tag
if(currentTagIsBaseUrl && if(currentTagIsBaseUrl &&
(currentMimeType.contains("audio"))) { (currentMimeType.contains("audio"))) {
int format = -1; int format = -1;
@ -86,12 +87,14 @@ public class DashMpdParser {
format, currentBandwidth, currentSamplingRate)); format, currentBandwidth, currentSamplingRate));
} }
break; break;
case XmlPullParser.END_TAG: case XmlPullParser.END_TAG:
if(tagName.equals("AdaptationSet")) { if(tagName.equals("AdaptationSet")) {
currentMimeType = ""; currentMimeType = "";
} else if(tagName.equals("BaseURL")) { } else if(tagName.equals("BaseURL")) {
currentTagIsBaseUrl = false; currentTagIsBaseUrl = false;
}//no break needed here }
break;
} }
} }
} catch(Exception e) { } catch(Exception e) {

View file

@ -25,10 +25,12 @@ package org.schabi.newpipe.crawler;
/**Static data about various media formats support by Newpipe, eg mime type, extension*/ /**Static data about various media formats support by Newpipe, eg mime type, extension*/
public enum MediaFormat { public enum MediaFormat {
//video and audio combined formats
// id name suffix mime type // id name suffix mime type
MPEG_4 (0x0, "MPEG-4", "mp4", "video/mp4"), MPEG_4 (0x0, "MPEG-4", "mp4", "video/mp4"),
v3GPP (0x1, "3GPP", "3gp", "video/3gpp"), v3GPP (0x1, "3GPP", "3gp", "video/3gpp"),
WEBM (0x2, "WebM", "webm", "video/webm"), WEBM (0x2, "WebM", "webm", "video/webm"),
// audio formats
M4A (0x3, "m4a", "m4a", "audio/mp4"), M4A (0x3, "m4a", "m4a", "audio/mp4"),
WEBMA (0x4, "WebM", "webm", "audio/webm"); WEBMA (0x4, "WebM", "webm", "audio/webm");

View file

@ -1,5 +1,9 @@
package org.schabi.newpipe.crawler; package org.schabi.newpipe.crawler;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -7,7 +11,7 @@ import java.util.regex.Pattern;
* Created by Christian Schabesberger on 02.02.16. * Created by Christian Schabesberger on 02.02.16.
* *
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org> * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* RegexHelper.java is part of NewPipe. * Parser.java is part of NewPipe.
* *
* NewPipe is free software: you can redistribute it and/or modify * NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -24,7 +28,7 @@ import java.util.regex.Pattern;
*/ */
/** avoid using regex !!! */ /** avoid using regex !!! */
public class RegexHelper { public class Parser {
public static class RegexException extends ParsingException { public static class RegexException extends ParsingException {
public RegexException(String message) { public RegexException(String message) {
@ -44,4 +48,13 @@ public class RegexHelper {
throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\""); throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\"");
} }
} }
public static Map<String, String> compatParseMap(final String input) throws UnsupportedEncodingException {
Map<String, String> map = new HashMap<>();
for(String arg : input.split("&")) {
String[] split_arg = arg.split("=");
map.put(split_arg[0], URLDecoder.decode(split_arg[1], "UTF-8"));
}
return map;
}
} }

View file

@ -4,7 +4,7 @@ package org.schabi.newpipe.crawler;
* Created by Christian Schabesberger on 10.08.15. * Created by Christian Schabesberger on 10.08.15.
* *
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org> * Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* VideoExtractor.java is part of NewPipe. * StreamExtractor.java is part of NewPipe.
* *
* NewPipe is free software: you can redistribute it and/or modify * NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -26,7 +26,7 @@ import java.util.List;
@SuppressWarnings("ALL") @SuppressWarnings("ALL")
public interface VideoExtractor { public interface StreamExtractor {
public class ExctractorInitException extends CrawlingException { public class ExctractorInitException extends CrawlingException {
public ExctractorInitException() {} public ExctractorInitException() {}
@ -65,6 +65,7 @@ public interface VideoExtractor {
public abstract String getUploaderThumbnailUrl() throws ParsingException; public abstract String getUploaderThumbnailUrl() throws ParsingException;
public abstract List<VideoInfo.AudioStream> getAudioStreams() throws ParsingException; public abstract List<VideoInfo.AudioStream> getAudioStreams() throws ParsingException;
public abstract List<VideoInfo.VideoStream> getVideoStreams() throws ParsingException; public abstract List<VideoInfo.VideoStream> getVideoStreams() throws ParsingException;
public abstract List<VideoInfo.VideoStream> getVideoOnlyStreams() throws ParsingException;
public abstract String getDashMpdUrl() throws ParsingException; public abstract String getDashMpdUrl() throws ParsingException;
public abstract int getAgeLimit() throws ParsingException; public abstract int getAgeLimit() throws ParsingException;
public abstract String getAverageRating() throws ParsingException; public abstract String getAverageRating() throws ParsingException;

View file

@ -27,7 +27,7 @@ public interface StreamingService {
public String name = ""; public String name = "";
} }
ServiceInfo getServiceInfo(); ServiceInfo getServiceInfo();
VideoExtractor getExtractorInstance(String url, Downloader downloader) StreamExtractor getExtractorInstance(String url, Downloader downloader)
throws IOException, CrawlingException; throws IOException, CrawlingException;
SearchEngine getSearchEngineInstance(); SearchEngine getSearchEngineInstance();

View file

@ -30,7 +30,7 @@ public class VideoInfo extends AbstractVideoInfo {
/**Fills out the video info fields which are common to all services. /**Fills out the video info fields which are common to all services.
* Probably needs to be overridden by subclasses*/ * Probably needs to be overridden by subclasses*/
public static VideoInfo getVideoInfo(VideoExtractor extractor, Downloader downloader) public static VideoInfo getVideoInfo(StreamExtractor extractor, Downloader downloader)
throws CrawlingException, IOException { throws CrawlingException, IOException {
VideoInfo videoInfo = new VideoInfo(); VideoInfo videoInfo = new VideoInfo();
@ -46,18 +46,34 @@ public class VideoInfo extends AbstractVideoInfo {
videoInfo.upload_date = extractor.getUploadDate(); videoInfo.upload_date = extractor.getUploadDate();
videoInfo.thumbnail_url = extractor.getThumbnailUrl(); videoInfo.thumbnail_url = extractor.getThumbnailUrl();
videoInfo.id = uiconv.getVideoId(extractor.getPageUrl()); videoInfo.id = uiconv.getVideoId(extractor.getPageUrl());
videoInfo.dashMpdUrl = extractor.getDashMpdUrl(); //todo: make this quick and dirty solution a real fallback
// The front end should be notified that the dash mpd could not be downloaded
// although not getting the dash mpd is not the end of the world, therfore
// we continue.
try {
videoInfo.dashMpdUrl = extractor.getDashMpdUrl();
} catch(Exception e) {
e.printStackTrace();
}
/** Load and extract audio*/ /** Load and extract audio*/
videoInfo.audio_streams = extractor.getAudioStreams(); videoInfo.audio_streams = extractor.getAudioStreams();
if(videoInfo.dashMpdUrl != null && !videoInfo.dashMpdUrl.isEmpty()) { if(videoInfo.dashMpdUrl != null && !videoInfo.dashMpdUrl.isEmpty()) {
if(videoInfo.audio_streams == null) { if(videoInfo.audio_streams == null) {
videoInfo.audio_streams = new Vector<AudioStream>(); videoInfo.audio_streams = new Vector<AudioStream>();
} }
videoInfo.audio_streams.addAll( //todo: make this quick and dirty solution a real fallback
DashMpdParser.getAudioStreams(videoInfo.dashMpdUrl, downloader)); // same as the quick and dirty aboth
try {
videoInfo.audio_streams.addAll(
DashMpdParser.getAudioStreams(videoInfo.dashMpdUrl, downloader));
} catch(Exception e) {
e.printStackTrace();
}
} }
/** Extract video stream url*/ /** Extract video stream url*/
videoInfo.video_streams = extractor.getVideoStreams(); videoInfo.video_streams = extractor.getVideoStreams();
/** Extract video only stream url*/
videoInfo.video_only_streams = extractor.getVideoOnlyStreams();
videoInfo.uploader_thumbnail_url = extractor.getUploaderThumbnailUrl(); videoInfo.uploader_thumbnail_url = extractor.getUploaderThumbnailUrl();
videoInfo.start_position = extractor.getTimeStamp(); videoInfo.start_position = extractor.getTimeStamp();
videoInfo.average_rating = extractor.getAverageRating(); videoInfo.average_rating = extractor.getAverageRating();
@ -78,6 +94,7 @@ public class VideoInfo extends AbstractVideoInfo {
/*todo: make this lists over vectors*/ /*todo: make this lists over vectors*/
public List<VideoStream> video_streams = null; public List<VideoStream> video_streams = null;
public List<AudioStream> audio_streams = null; public List<AudioStream> audio_streams = null;
public List<VideoStream> video_only_streams = null;
// video streams provided by the dash mpd do not need to be provided as VideoStream. // video streams provided by the dash mpd do not need to be provided as VideoStream.
// Later on this will also aplly to audio streams. Since dash mpd is standarized, // Later on this will also aplly to audio streams. Since dash mpd is standarized,
// crawling such a file is not service dependent. Therefore getting audio only streams by yust // crawling such a file is not service dependent. Therefore getting audio only streams by yust
@ -132,6 +149,18 @@ public class VideoInfo extends AbstractVideoInfo {
public VideoStream(String url, int format, String res) { public VideoStream(String url, int format, String res) {
this.url = url; this.format = format; resolution = res; this.url = url; this.format = format; resolution = res;
} }
// reveals wether two streams are the same, but have diferent urls
public boolean equalStats(VideoStream cmp) {
return format == cmp.format
&& resolution == cmp.resolution;
}
// revelas wether two streams are equal
public boolean equals(VideoStream cmp) {
return equalStats(cmp)
&& url == cmp.url;
}
} }
@SuppressWarnings("unused") @SuppressWarnings("unused")
@ -145,5 +174,18 @@ public class VideoInfo extends AbstractVideoInfo {
this.url = url; this.format = format; this.url = url; this.format = format;
this.bandwidth = bandwidth; this.sampling_rate = samplingRate; this.bandwidth = bandwidth; this.sampling_rate = samplingRate;
} }
// reveals wether two streams are the same, but have diferent urls
public boolean equalStats(AudioStream cmp) {
return format == cmp.format
&& bandwidth == cmp.bandwidth
&& sampling_rate == cmp.sampling_rate;
}
// revelas wether two streams are equal
public boolean equals(AudioStream cmp) {
return equalStats(cmp)
&& url == cmp.url;
}
} }
} }

View file

@ -6,11 +6,9 @@ import android.util.Log;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.schabi.newpipe.crawler.CrawlingException;
import org.schabi.newpipe.crawler.Downloader; import org.schabi.newpipe.crawler.Downloader;
import org.schabi.newpipe.crawler.ParsingException; import org.schabi.newpipe.crawler.ParsingException;
import org.schabi.newpipe.crawler.SearchEngine; import org.schabi.newpipe.crawler.SearchEngine;
import org.schabi.newpipe.crawler.VideoExtractor;
import org.schabi.newpipe.crawler.VideoPreviewInfo; import org.schabi.newpipe.crawler.VideoPreviewInfo;
import org.w3c.dom.Node; import org.w3c.dom.Node;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;

View file

@ -2,9 +2,9 @@ package org.schabi.newpipe.crawler.services.youtube;
import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.CrawlingException;
import org.schabi.newpipe.crawler.Downloader; import org.schabi.newpipe.crawler.Downloader;
import org.schabi.newpipe.crawler.StreamExtractor;
import org.schabi.newpipe.crawler.StreamingService; import org.schabi.newpipe.crawler.StreamingService;
import org.schabi.newpipe.crawler.VideoUrlIdHandler; import org.schabi.newpipe.crawler.VideoUrlIdHandler;
import org.schabi.newpipe.crawler.VideoExtractor;
import org.schabi.newpipe.crawler.SearchEngine; import org.schabi.newpipe.crawler.SearchEngine;
import java.io.IOException; import java.io.IOException;
@ -38,11 +38,11 @@ public class YoutubeService implements StreamingService {
return serviceInfo; return serviceInfo;
} }
@Override @Override
public VideoExtractor getExtractorInstance(String url, Downloader downloader) public StreamExtractor getExtractorInstance(String url, Downloader downloader)
throws CrawlingException, IOException { throws CrawlingException, IOException {
VideoUrlIdHandler urlIdHandler = new YoutubeVideoUrlIdHandler(); VideoUrlIdHandler urlIdHandler = new YoutubeVideoUrlIdHandler();
if(urlIdHandler.acceptUrl(url)) { if(urlIdHandler.acceptUrl(url)) {
return new YoutubeVideoExtractor(url, downloader) ; return new YoutubeStreamExtractor(url, downloader) ;
} }
else { else {
throw new IllegalArgumentException("supplied String is not a valid Youtube URL"); throw new IllegalArgumentException("supplied String is not a valid Youtube URL");

View file

@ -1,5 +1,6 @@
package org.schabi.newpipe.crawler.services.youtube; package org.schabi.newpipe.crawler.services.youtube;
import android.provider.MediaStore;
import android.util.Log; import android.util.Log;
import org.json.JSONException; import org.json.JSONException;
@ -7,16 +8,15 @@ import org.json.JSONObject;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import org.mozilla.javascript.Context; import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function; import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.ScriptableObject;
import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.CrawlingException;
import org.schabi.newpipe.crawler.Downloader; import org.schabi.newpipe.crawler.Downloader;
import org.schabi.newpipe.crawler.Parser;
import org.schabi.newpipe.crawler.ParsingException; import org.schabi.newpipe.crawler.ParsingException;
import org.schabi.newpipe.crawler.RegexHelper;
import org.schabi.newpipe.crawler.VideoUrlIdHandler; import org.schabi.newpipe.crawler.VideoUrlIdHandler;
import org.schabi.newpipe.crawler.VideoExtractor; import org.schabi.newpipe.crawler.StreamExtractor;
import org.schabi.newpipe.crawler.MediaFormat; import org.schabi.newpipe.crawler.MediaFormat;
import org.schabi.newpipe.crawler.VideoInfo; import org.schabi.newpipe.crawler.VideoInfo;
import org.schabi.newpipe.crawler.VideoPreviewInfo; import org.schabi.newpipe.crawler.VideoPreviewInfo;
@ -32,7 +32,7 @@ import java.util.Vector;
* Created by Christian Schabesberger on 06.08.15. * Created by Christian Schabesberger on 06.08.15.
* *
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org> * Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* YoutubeVideoExtractor.java is part of NewPipe. * YoutubeStreamExtractor.java is part of NewPipe.
* *
* NewPipe is free software: you can redistribute it and/or modify * NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -48,7 +48,98 @@ import java.util.Vector;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>. * along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/ */
public class YoutubeVideoExtractor implements VideoExtractor { public class YoutubeStreamExtractor implements StreamExtractor {
public enum ItagType {
AUDIO,
VIDEO,
VIDEO_ONLY
}
private static class ItagItem {
public ItagItem(int id, ItagType type, MediaFormat format, String res, int fps) {
this.id = id;
this.itagType = type;
this.mediaFormatId = format.id;
this.resolutionString = res;
this.fps = fps;
}
public ItagItem(int id, ItagType type, MediaFormat format, int samplingRate, int bandWidth) {
this.id = id;
this.itagType = type;
this.mediaFormatId = format.id;
this.samplingRate = samplingRate;
this.bandWidth = bandWidth;
}
public int id;
public ItagType itagType;
public int mediaFormatId;
public String resolutionString = null;
public int fps = -1;
public int samplingRate = -1;
public int bandWidth = -1;
}
private static final ItagItem[] itagList = {
// video streams
// id, ItagType, MediaFormat, Resolution, fps
new ItagItem(17, ItagType.VIDEO, MediaFormat.v3GPP, "144p", 12),
new ItagItem(18, ItagType.VIDEO, MediaFormat.MPEG_4, "360p", 24),
new ItagItem(22, ItagType.VIDEO, MediaFormat.MPEG_4, "720p", 24),
new ItagItem(36, ItagType.VIDEO, MediaFormat.v3GPP, "240p", 24),
new ItagItem(37, ItagType.VIDEO, MediaFormat.MPEG_4, "1080p", 24),
new ItagItem(38, ItagType.VIDEO, MediaFormat.MPEG_4, "1080p", 24),
new ItagItem(43, ItagType.VIDEO, MediaFormat.WEBM, "360p", 24),
new ItagItem(44, ItagType.VIDEO, MediaFormat.WEBM, "480p", 24),
new ItagItem(45, ItagType.VIDEO, MediaFormat.WEBM, "720p", 24),
new ItagItem(46, ItagType.VIDEO, MediaFormat.WEBM, "1080p", 24),
// audio streams
// id, ItagType, MediaFormat, samplingR, bandwidth
new ItagItem(249, ItagType.AUDIO, MediaFormat.WEBMA, 0, 0), // bandwith/samplingR 0 because not known
new ItagItem(250, ItagType.AUDIO, MediaFormat.WEBMA, 0, 0),
new ItagItem(171, ItagType.AUDIO, MediaFormat.WEBMA, 0, 0),
new ItagItem(140, ItagType.AUDIO, MediaFormat.M4A, 0, 0),
new ItagItem(251, ItagType.AUDIO, MediaFormat.WEBMA, 0, 0),
// video only streams
new ItagItem(160, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "144p", 24),
new ItagItem(133, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "240p", 24),
new ItagItem(134, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "360p", 24),
new ItagItem(135, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "480p", 24),
new ItagItem(136, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "720p", 24),
new ItagItem(137, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "1080p", 24),
};
/**These lists only contain itag formats that are supported by the common Android Video player.
However if you are looking for a list showing all itag formats, look at
https://github.com/rg3/youtube-dl/issues/1687 */
public static boolean itagIsSupported(int itag) {
for(ItagItem item : itagList) {
if(itag == item.id) {
return true;
}
}
return false;
}
public static ItagItem getItagItem(int itag) throws ParsingException {
for(ItagItem item : itagList) {
if(itag == item.id) {
return item;
}
}
throw new ParsingException("itag=" + Integer.toString(itag) + " not supported");
}
// Sometimes if the html page of youtube is already downloaded, youtube web page will internally
// download the /get_video_info page. Since a certain date dashmpd url is only available over
// this /get_video_info page, so we always need to download this one to.
// %%video_id%% will be replaced by the actual video id
// $$el_type$$ will be replaced by the actual el_type (se the declarations below)
private static final String GET_VIDEO_INFO_URL =
"https://www.youtube.com/get_video_info?video_id=%%video_id%%$$el_type$$&ps=default&eurl=&gl=US&hl=en";
// eltype is nececeary for the url aboth
private static final String EL_INFO = "el=info";
public class DecryptException extends ParsingException { public class DecryptException extends ParsingException {
DecryptException(Throwable cause) { DecryptException(Throwable cause) {
@ -69,9 +160,10 @@ public class YoutubeVideoExtractor implements VideoExtractor {
// ---------------- // ----------------
private static final String TAG = YoutubeVideoExtractor.class.toString(); private static final String TAG = YoutubeStreamExtractor.class.toString();
private final Document doc; private final Document doc;
private JSONObject playerArgs; private JSONObject playerArgs;
private Map<String, String> videoInfoPage;
// static values // static values
private static final String DECRYPTION_FUNC_NAME="decrypt"; private static final String DECRYPTION_FUNC_NAME="decrypt";
@ -84,7 +176,7 @@ public class YoutubeVideoExtractor implements VideoExtractor {
private Downloader downloader; private Downloader downloader;
public YoutubeVideoExtractor(String pageUrl, Downloader dl) throws CrawlingException, IOException { public YoutubeStreamExtractor(String pageUrl, Downloader dl) throws CrawlingException, IOException {
//most common videoInfo fields are now set in our superclass, for all services //most common videoInfo fields are now set in our superclass, for all services
downloader = dl; downloader = dl;
this.pageUrl = pageUrl; this.pageUrl = pageUrl;
@ -96,10 +188,10 @@ public class YoutubeVideoExtractor implements VideoExtractor {
//attempt to load the youtube js player JSON arguments //attempt to load the youtube js player JSON arguments
try { try {
ytPlayerConfigRaw = ytPlayerConfigRaw =
RegexHelper.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent); Parser.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
ytPlayerConfig = new JSONObject(ytPlayerConfigRaw); ytPlayerConfig = new JSONObject(ytPlayerConfigRaw);
playerArgs = ytPlayerConfig.getJSONObject("args"); playerArgs = ytPlayerConfig.getJSONObject("args");
} catch (RegexHelper.RegexException e) { } catch (Parser.RegexException e) {
String errorReason = findErrorReason(doc); String errorReason = findErrorReason(doc);
switch(errorReason) { switch(errorReason) {
case "GEMA": case "GEMA":
@ -113,6 +205,17 @@ public class YoutubeVideoExtractor implements VideoExtractor {
throw new ParsingException("Could not parse yt player config"); throw new ParsingException("Could not parse yt player config");
} }
// get videoInfo page
try {
//Parser.unescapeEntities(url_data_str, true).split("&")
String getVideoInfoUrl = GET_VIDEO_INFO_URL.replace("%%video_id%%",
urlidhandler.getVideoId(pageUrl)).replace("$$el_type$$", "&" + EL_INFO);
videoInfoPage = Parser.compatParseMap(downloader.download(getVideoInfoUrl));
} catch(Exception e) {
throw new ParsingException("Could not load video info page.", e);
}
//---------------------------------- //----------------------------------
// load and parse description code, if it isn't already initialised // load and parse description code, if it isn't already initialised
//---------------------------------- //----------------------------------
@ -211,7 +314,8 @@ public class YoutubeVideoExtractor implements VideoExtractor {
return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
} catch(Exception e) { } catch(Exception e) {
Log.w(TAG, "Could not find high res Thumbnail. Using low res instead"); Log.w(TAG, "Could not find high res Thumbnail. Using low res instead");
} try { //fall through to fallback }
try { //fall through to fallback
return playerArgs.getString("thumbnail_url"); return playerArgs.getString("thumbnail_url");
} catch (JSONException je) { } catch (JSONException je) {
throw new ParsingException( throw new ParsingException(
@ -232,10 +336,11 @@ public class YoutubeVideoExtractor implements VideoExtractor {
@Override @Override
public String getDashMpdUrl() throws ParsingException { public String getDashMpdUrl() throws ParsingException {
/*
try { try {
String dashManifest = playerArgs.getString("dashmpd"); String dashManifest = playerArgs.getString("dashmpd");
if(!dashManifest.contains("/signature/")) { if(!dashManifest.contains("/signature/")) {
String encryptedSig = RegexHelper.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest); String encryptedSig = Parser.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
String decryptedSig; String decryptedSig;
decryptedSig = decryptSignature(encryptedSig, decryptionCode); decryptedSig = decryptSignature(encryptedSig, decryptionCode);
@ -243,47 +348,95 @@ public class YoutubeVideoExtractor implements VideoExtractor {
} }
return dashManifest; return dashManifest;
} catch(NullPointerException e) { } catch(JSONException je) {
throw new ParsingException( throw new ParsingException(
"Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e); "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", je);
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException(e); throw new ParsingException(e);
} }
*/
try {
String dashManifestUrl = videoInfoPage.get("dashmpd");
if(!dashManifestUrl.contains("/signature/")) {
String encryptedSig = Parser.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifestUrl);
String decryptedSig;
decryptedSig = decryptSignature(encryptedSig, decryptionCode);
dashManifestUrl = dashManifestUrl.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
}
return dashManifestUrl;
} catch (Exception e) {
throw new ParsingException(
"Could not get \"dashmpd\" maybe VideoInfoPage is broken.", e);
}
} }
@Override @Override
public List<VideoInfo.AudioStream> getAudioStreams() throws ParsingException { public List<VideoInfo.AudioStream> getAudioStreams() throws ParsingException {
/* If we provide a valid dash manifest, we don't need to provide audio streams extra */ Vector<VideoInfo.AudioStream> audioStreams = new Vector<>();
return null; try{
String encoded_url_map = playerArgs.getString("adaptive_fmts");
for(String url_data_str : encoded_url_map.split(",")) {
// This loop iterates through multiple streams, therefor tags
// is related to one and the same stream at a time.
Map<String, String> tags = Parser.compatParseMap(
org.jsoup.parser.Parser.unescapeEntities(url_data_str, true));
int itag = Integer.parseInt(tags.get("itag"));
if (itagIsSupported(itag)) {
ItagItem itagItem = getItagItem(itag);
if (itagItem.itagType == ItagType.AUDIO) {
String streamUrl = tags.get("url");
// if video has a signature: decrypt it and add it to the url
if (tags.get("s") != null) {
streamUrl = streamUrl + "&signature="
+ decryptSignature(tags.get("s"), decryptionCode);
}
audioStreams.add(new VideoInfo.AudioStream(streamUrl,
itagItem.mediaFormatId,
itagItem.bandWidth,
itagItem.samplingRate));
}
}
}
} catch (Exception e) {
throw new ParsingException("Could not get audiostreams", e);
}
return audioStreams;
} }
@Override @Override
public List<VideoInfo.VideoStream> getVideoStreams() throws ParsingException { public List<VideoInfo.VideoStream> getVideoStreams() throws ParsingException {
Vector<VideoInfo.VideoStream> videoStreams = new Vector<>(); Vector<VideoInfo.VideoStream> videoStreams = new Vector<>();
try{ try{
String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map"); String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map");
for(String url_data_str : encoded_url_map.split(",")) { for(String url_data_str : encoded_url_map.split(",")) {
try { try {
Map<String, String> tags = new HashMap<>(); // This loop iterates through multiple streams, therefor tags
for (String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) { // is related to one and the same stream at a time.
String[] split_tag = raw_tag.split("="); Map<String, String> tags = Parser.compatParseMap(
tags.put(split_tag[0], split_tag[1]); org.jsoup.parser.Parser.unescapeEntities(url_data_str, true));
}
int itag = Integer.parseInt(tags.get("itag")); int itag = Integer.parseInt(tags.get("itag"));
String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8");
// if video has a signature: decrypt it and add it to the url if (itagIsSupported(itag)) {
if (tags.get("s") != null) { ItagItem itagItem = getItagItem(itag);
streamUrl = streamUrl + "&signature=" if(itagItem.itagType == ItagType.VIDEO) {
+ decryptSignature(tags.get("s"), decryptionCode); String streamUrl = tags.get("url");
} // if video has a signature: decrypt it and add it to the url
if (tags.get("s") != null) {
if (resolveFormat(itag) != -1) { streamUrl = streamUrl + "&signature="
videoStreams.add(new VideoInfo.VideoStream( + decryptSignature(tags.get("s"), decryptionCode);
streamUrl, }
resolveFormat(itag), videoStreams.add(new VideoInfo.VideoStream(
resolveResolutionString(itag))); streamUrl,
itagItem.mediaFormatId,
itagItem.resolutionString));
}
} }
} catch (Exception e) { } catch (Exception e) {
Log.w(TAG, "Could not get Video stream."); Log.w(TAG, "Could not get Video stream.");
@ -298,19 +451,23 @@ public class YoutubeVideoExtractor implements VideoExtractor {
if(videoStreams.isEmpty()) { if(videoStreams.isEmpty()) {
throw new ParsingException("Failed to get any video stream"); throw new ParsingException("Failed to get any video stream");
} }
return videoStreams; return videoStreams;
} }
@Override
public List<VideoInfo.VideoStream> getVideoOnlyStreams() throws ParsingException {
return null;
}
/**Attempts to parse (and return) the offset to start playing the video from. /**Attempts to parse (and return) the offset to start playing the video from.
* @return the offset (in seconds), or 0 if no timestamp is found.*/ * @return the offset (in seconds), or 0 if no timestamp is found.*/
@Override @Override
public int getTimeStamp() throws ParsingException { public int getTimeStamp() throws ParsingException {
//todo: add unit test for timestamp //todo: use video_info for getting timestamp
String timeStamp; String timeStamp;
try { try {
timeStamp = RegexHelper.matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl); timeStamp = Parser.matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
} catch (RegexHelper.RegexException e) { } catch (Parser.RegexException e) {
// catch this instantly since an url does not necessarily have to have a time stamp // catch this instantly since an url does not necessarily have to have a time stamp
// -2 because well the testing system will then know its the regex that failed :/ // -2 because well the testing system will then know its the regex that failed :/
@ -318,22 +475,21 @@ public class YoutubeVideoExtractor implements VideoExtractor {
return -2; return -2;
} }
//TODO: test this
if(!timeStamp.isEmpty()) { if(!timeStamp.isEmpty()) {
try { try {
String secondsString = ""; String secondsString = "";
String minutesString = ""; String minutesString = "";
String hoursString = ""; String hoursString = "";
try { try {
secondsString = RegexHelper.matchGroup1("(\\d{1,3})s", timeStamp); secondsString = Parser.matchGroup1("(\\d{1,3})s", timeStamp);
minutesString = RegexHelper.matchGroup1("(\\d{1,3})m", timeStamp); minutesString = Parser.matchGroup1("(\\d{1,3})m", timeStamp);
hoursString = RegexHelper.matchGroup1("(\\d{1,3})h", timeStamp); hoursString = Parser.matchGroup1("(\\d{1,3})h", timeStamp);
} catch (Exception e) { } catch (Exception e) {
//it could be that time is given in another method //it could be that time is given in another method
if (secondsString.isEmpty() //if nothing was got, if (secondsString.isEmpty() //if nothing was got,
&& minutesString.isEmpty()//treat as unlabelled seconds && minutesString.isEmpty()//treat as unlabelled seconds
&& hoursString.isEmpty()) { && hoursString.isEmpty()) {
secondsString = RegexHelper.matchGroup1("t=(\\d{1,3})", timeStamp); secondsString = Parser.matchGroup1("t=(\\d{1,3})", timeStamp);
} }
} }
@ -446,7 +602,7 @@ public class YoutubeVideoExtractor implements VideoExtractor {
info.webpage_url = li.select("a.content-link").first() info.webpage_url = li.select("a.content-link").first()
.attr("abs:href"); .attr("abs:href");
info.id = RegexHelper.matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url); info.id = Parser.matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);
//todo: check NullPointerException causing //todo: check NullPointerException causing
info.title = li.select("span.title").first().text(); info.title = li.select("span.title").first().text();
@ -498,19 +654,19 @@ public class YoutubeVideoExtractor implements VideoExtractor {
String playerCode = downloader.download(playerUrl); String playerCode = downloader.download(playerUrl);
decryptionFuncName = decryptionFuncName =
RegexHelper.matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode); Parser.matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode);
String functionPattern = "(" String functionPattern = "("
+ decryptionFuncName.replace("$", "\\$") + decryptionFuncName.replace("$", "\\$")
+ "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})"; + "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})";
decryptionFunc = "var " + RegexHelper.matchGroup1(functionPattern, playerCode) + ";"; decryptionFunc = "var " + Parser.matchGroup1(functionPattern, playerCode) + ";";
helperObjectName = RegexHelper helperObjectName = Parser
.matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc); .matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc);
String helperPattern = "(var " String helperPattern = "(var "
+ helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)"; + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)";
helperObject = RegexHelper.matchGroup1(helperPattern, playerCode); helperObject = Parser.matchGroup1(helperPattern, playerCode);
callerFunc = callerFunc.replace("%%", decryptionFuncName); callerFunc = callerFunc.replace("%%", decryptionFuncName);
@ -553,48 +709,4 @@ public class YoutubeVideoExtractor implements VideoExtractor {
} }
return ""; return "";
} }
/**These lists only contain itag formats that are supported by the common Android Video player.
However if you are looking for a list showing all itag formats, look at
https://github.com/rg3/youtube-dl/issues/1687 */
@SuppressWarnings("WeakerAccess")
public static int resolveFormat(int itag) {
switch(itag) {
// !!! lists only supported formats !!!
// video
case 17: return MediaFormat.v3GPP.id;
case 18: return MediaFormat.MPEG_4.id;
case 22: return MediaFormat.MPEG_4.id;
case 36: return MediaFormat.v3GPP.id;
case 37: return MediaFormat.MPEG_4.id;
case 38: return MediaFormat.MPEG_4.id;
case 43: return MediaFormat.WEBM.id;
case 44: return MediaFormat.WEBM.id;
case 45: return MediaFormat.WEBM.id;
case 46: return MediaFormat.WEBM.id;
default:
//Log.i(TAG, "Itag " + Integer.toString(itag) + " not known or not supported.");
return -1;
}
}
@SuppressWarnings("WeakerAccess")
public static String resolveResolutionString(int itag) {
switch(itag) {
case 17: return "144p";
case 18: return "360p";
case 22: return "720p";
case 36: return "240p";
case 37: return "1080p";
case 38: return "1080p";
case 43: return "360p";
case 44: return "480p";
case 45: return "720p";
case 46: return "1080p";
default:
//Log.i(TAG, "Itag " + Integer.toString(itag) + " not known or not supported.");
return null;
}
}
} }

View file

@ -1,7 +1,7 @@
package org.schabi.newpipe.crawler.services.youtube; package org.schabi.newpipe.crawler.services.youtube;
import org.schabi.newpipe.crawler.Parser;
import org.schabi.newpipe.crawler.ParsingException; import org.schabi.newpipe.crawler.ParsingException;
import org.schabi.newpipe.crawler.RegexHelper;
import org.schabi.newpipe.crawler.VideoUrlIdHandler; import org.schabi.newpipe.crawler.VideoUrlIdHandler;
/** /**
@ -47,7 +47,7 @@ public class YoutubeVideoUrlIdHandler implements VideoUrlIdHandler {
throw new ParsingException("Error no suitable url: " + url); throw new ParsingException("Error no suitable url: " + url);
} }
id = RegexHelper.matchGroup1(pat, url); id = Parser.matchGroup1(pat, url);
if(!id.isEmpty()){ if(!id.isEmpty()){
//Log.i(TAG, "string \""+url+"\" matches!"); //Log.i(TAG, "string \""+url+"\" matches!");
return id; return id;